blob: 3e3198d6b9d6caf78a68bc8e0f0e5698999aaa96 [file] [log] [blame]
Jan Karaf9a61eb2016-02-22 11:49:09 -05001#include <linux/spinlock.h>
2#include <linux/slab.h>
3#include <linux/list.h>
4#include <linux/list_bl.h>
5#include <linux/module.h>
6#include <linux/sched.h>
Jan Karac2f31402016-02-22 12:33:03 -05007#include <linux/workqueue.h>
Jan Karaf9a61eb2016-02-22 11:49:09 -05008#include <linux/mbcache2.h>
9
10/*
11 * Mbcache is a simple key-value store. Keys need not be unique, however
12 * key-value pairs are expected to be unique (we use this fact in
13 * mb2_cache_entry_delete_block()).
14 *
15 * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
16 * They use hash of a block contents as a key and block number as a value.
17 * That's why keys need not be unique (different xattr blocks may end up having
18 * the same hash). However block number always uniquely identifies a cache
19 * entry.
20 *
21 * We provide functions for creation and removal of entries, search by key,
22 * and a special "delete entry with given key-value pair" operation. Fixed
23 * size hash table is used for fast key lookups.
24 */
25
26struct mb2_cache {
27 /* Hash table of entries */
28 struct hlist_bl_head *c_hash;
29 /* log2 of hash table size */
30 int c_bucket_bits;
Jan Karac2f31402016-02-22 12:33:03 -050031 /* Maximum entries in cache to avoid degrading hash too much */
32 int c_max_entries;
Jan Karaf9a61eb2016-02-22 11:49:09 -050033 /* Protects c_lru_list, c_entry_count */
34 spinlock_t c_lru_list_lock;
35 struct list_head c_lru_list;
36 /* Number of entries in cache */
37 unsigned long c_entry_count;
38 struct shrinker c_shrink;
Jan Karac2f31402016-02-22 12:33:03 -050039 /* Work for shrinking when the cache has too many entries */
40 struct work_struct c_shrink_work;
Jan Karaf9a61eb2016-02-22 11:49:09 -050041};
42
43static struct kmem_cache *mb2_entry_cache;
44
Jan Karac2f31402016-02-22 12:33:03 -050045static unsigned long mb2_cache_shrink(struct mb2_cache *cache,
46 unsigned int nr_to_scan);
47
48/*
49 * Number of entries to reclaim synchronously when there are too many entries
50 * in cache
51 */
52#define SYNC_SHRINK_BATCH 64
53
Jan Karaf9a61eb2016-02-22 11:49:09 -050054/*
55 * mb2_cache_entry_create - create entry in cache
56 * @cache - cache where the entry should be created
57 * @mask - gfp mask with which the entry should be allocated
58 * @key - key of the entry
59 * @block - block that contains data
60 *
61 * Creates entry in @cache with key @key and records that data is stored in
62 * block @block. The function returns -EBUSY if entry with the same key
63 * and for the same block already exists in cache. Otherwise 0 is returned.
64 */
65int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key,
66 sector_t block)
67{
68 struct mb2_cache_entry *entry, *dup;
69 struct hlist_bl_node *dup_node;
70 struct hlist_bl_head *head;
71
Jan Karac2f31402016-02-22 12:33:03 -050072 /* Schedule background reclaim if there are too many entries */
73 if (cache->c_entry_count >= cache->c_max_entries)
74 schedule_work(&cache->c_shrink_work);
75 /* Do some sync reclaim if background reclaim cannot keep up */
76 if (cache->c_entry_count >= 2*cache->c_max_entries)
77 mb2_cache_shrink(cache, SYNC_SHRINK_BATCH);
78
Jan Karaf9a61eb2016-02-22 11:49:09 -050079 entry = kmem_cache_alloc(mb2_entry_cache, mask);
80 if (!entry)
81 return -ENOMEM;
82
83 INIT_LIST_HEAD(&entry->e_lru_list);
84 /* One ref for hash, one ref returned */
85 atomic_set(&entry->e_refcnt, 1);
86 entry->e_key = key;
87 entry->e_block = block;
88 head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
89 entry->e_hash_list_head = head;
90 hlist_bl_lock(head);
91 hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
92 if (dup->e_key == key && dup->e_block == block) {
93 hlist_bl_unlock(head);
94 kmem_cache_free(mb2_entry_cache, entry);
95 return -EBUSY;
96 }
97 }
98 hlist_bl_add_head(&entry->e_hash_list, head);
99 hlist_bl_unlock(head);
100
101 spin_lock(&cache->c_lru_list_lock);
102 list_add_tail(&entry->e_lru_list, &cache->c_lru_list);
103 /* Grab ref for LRU list */
104 atomic_inc(&entry->e_refcnt);
105 cache->c_entry_count++;
106 spin_unlock(&cache->c_lru_list_lock);
107
108 return 0;
109}
110EXPORT_SYMBOL(mb2_cache_entry_create);
111
112void __mb2_cache_entry_free(struct mb2_cache_entry *entry)
113{
114 kmem_cache_free(mb2_entry_cache, entry);
115}
116EXPORT_SYMBOL(__mb2_cache_entry_free);
117
118static struct mb2_cache_entry *__entry_find(struct mb2_cache *cache,
119 struct mb2_cache_entry *entry,
120 u32 key)
121{
122 struct mb2_cache_entry *old_entry = entry;
123 struct hlist_bl_node *node;
124 struct hlist_bl_head *head;
125
126 if (entry)
127 head = entry->e_hash_list_head;
128 else
129 head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
130 hlist_bl_lock(head);
131 if (entry && !hlist_bl_unhashed(&entry->e_hash_list))
132 node = entry->e_hash_list.next;
133 else
134 node = hlist_bl_first(head);
135 while (node) {
136 entry = hlist_bl_entry(node, struct mb2_cache_entry,
137 e_hash_list);
138 if (entry->e_key == key) {
139 atomic_inc(&entry->e_refcnt);
140 goto out;
141 }
142 node = node->next;
143 }
144 entry = NULL;
145out:
146 hlist_bl_unlock(head);
147 if (old_entry)
148 mb2_cache_entry_put(cache, old_entry);
149
150 return entry;
151}
152
153/*
154 * mb2_cache_entry_find_first - find the first entry in cache with given key
155 * @cache: cache where we should search
156 * @key: key to look for
157 *
158 * Search in @cache for entry with key @key. Grabs reference to the first
159 * entry found and returns the entry.
160 */
161struct mb2_cache_entry *mb2_cache_entry_find_first(struct mb2_cache *cache,
162 u32 key)
163{
164 return __entry_find(cache, NULL, key);
165}
166EXPORT_SYMBOL(mb2_cache_entry_find_first);
167
168/*
169 * mb2_cache_entry_find_next - find next entry in cache with the same
170 * @cache: cache where we should search
171 * @entry: entry to start search from
172 *
173 * Finds next entry in the hash chain which has the same key as @entry.
174 * If @entry is unhashed (which can happen when deletion of entry races
175 * with the search), finds the first entry in the hash chain. The function
176 * drops reference to @entry and returns with a reference to the found entry.
177 */
178struct mb2_cache_entry *mb2_cache_entry_find_next(struct mb2_cache *cache,
179 struct mb2_cache_entry *entry)
180{
181 return __entry_find(cache, entry, entry->e_key);
182}
183EXPORT_SYMBOL(mb2_cache_entry_find_next);
184
185/* mb2_cache_entry_delete_block - remove information about block from cache
186 * @cache - cache we work with
187 * @key - key of the entry to remove
188 * @block - block containing data for @key
189 *
190 * Remove entry from cache @cache with key @key with data stored in @block.
191 */
192void mb2_cache_entry_delete_block(struct mb2_cache *cache, u32 key,
193 sector_t block)
194{
195 struct hlist_bl_node *node;
196 struct hlist_bl_head *head;
197 struct mb2_cache_entry *entry;
198
199 head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
200 hlist_bl_lock(head);
201 hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
202 if (entry->e_key == key && entry->e_block == block) {
203 /* We keep hash list reference to keep entry alive */
204 hlist_bl_del_init(&entry->e_hash_list);
205 hlist_bl_unlock(head);
206 spin_lock(&cache->c_lru_list_lock);
207 if (!list_empty(&entry->e_lru_list)) {
208 list_del_init(&entry->e_lru_list);
209 cache->c_entry_count--;
210 atomic_dec(&entry->e_refcnt);
211 }
212 spin_unlock(&cache->c_lru_list_lock);
213 mb2_cache_entry_put(cache, entry);
214 return;
215 }
216 }
217 hlist_bl_unlock(head);
218}
219EXPORT_SYMBOL(mb2_cache_entry_delete_block);
220
221/* mb2_cache_entry_touch - cache entry got used
222 * @cache - cache the entry belongs to
223 * @entry - entry that got used
224 *
225 * Move entry in lru list to reflect the fact that it was used.
226 */
227void mb2_cache_entry_touch(struct mb2_cache *cache,
228 struct mb2_cache_entry *entry)
229{
230 spin_lock(&cache->c_lru_list_lock);
231 if (!list_empty(&entry->e_lru_list))
232 list_move_tail(&cache->c_lru_list, &entry->e_lru_list);
233 spin_unlock(&cache->c_lru_list_lock);
234}
235EXPORT_SYMBOL(mb2_cache_entry_touch);
236
237static unsigned long mb2_cache_count(struct shrinker *shrink,
238 struct shrink_control *sc)
239{
240 struct mb2_cache *cache = container_of(shrink, struct mb2_cache,
241 c_shrink);
242
243 return cache->c_entry_count;
244}
245
246/* Shrink number of entries in cache */
Jan Karac2f31402016-02-22 12:33:03 -0500247static unsigned long mb2_cache_shrink(struct mb2_cache *cache,
248 unsigned int nr_to_scan)
Jan Karaf9a61eb2016-02-22 11:49:09 -0500249{
Jan Karaf9a61eb2016-02-22 11:49:09 -0500250 struct mb2_cache_entry *entry;
251 struct hlist_bl_head *head;
252 unsigned int shrunk = 0;
253
254 spin_lock(&cache->c_lru_list_lock);
255 while (nr_to_scan-- && !list_empty(&cache->c_lru_list)) {
256 entry = list_first_entry(&cache->c_lru_list,
257 struct mb2_cache_entry, e_lru_list);
258 list_del_init(&entry->e_lru_list);
259 cache->c_entry_count--;
260 /*
261 * We keep LRU list reference so that entry doesn't go away
262 * from under us.
263 */
264 spin_unlock(&cache->c_lru_list_lock);
265 head = entry->e_hash_list_head;
266 hlist_bl_lock(head);
267 if (!hlist_bl_unhashed(&entry->e_hash_list)) {
268 hlist_bl_del_init(&entry->e_hash_list);
269 atomic_dec(&entry->e_refcnt);
270 }
271 hlist_bl_unlock(head);
272 if (mb2_cache_entry_put(cache, entry))
273 shrunk++;
274 cond_resched();
275 spin_lock(&cache->c_lru_list_lock);
276 }
277 spin_unlock(&cache->c_lru_list_lock);
278
279 return shrunk;
280}
281
Jan Karac2f31402016-02-22 12:33:03 -0500282static unsigned long mb2_cache_scan(struct shrinker *shrink,
283 struct shrink_control *sc)
284{
285 int nr_to_scan = sc->nr_to_scan;
286 struct mb2_cache *cache = container_of(shrink, struct mb2_cache,
287 c_shrink);
288 return mb2_cache_shrink(cache, nr_to_scan);
289}
290
291/* We shrink 1/X of the cache when we have too many entries in it */
292#define SHRINK_DIVISOR 16
293
294static void mb2_cache_shrink_worker(struct work_struct *work)
295{
296 struct mb2_cache *cache = container_of(work, struct mb2_cache,
297 c_shrink_work);
298 mb2_cache_shrink(cache, cache->c_max_entries / SHRINK_DIVISOR);
299}
300
Jan Karaf9a61eb2016-02-22 11:49:09 -0500301/*
302 * mb2_cache_create - create cache
303 * @bucket_bits: log2 of the hash table size
304 *
305 * Create cache for keys with 2^bucket_bits hash entries.
306 */
307struct mb2_cache *mb2_cache_create(int bucket_bits)
308{
309 struct mb2_cache *cache;
310 int bucket_count = 1 << bucket_bits;
311 int i;
312
313 if (!try_module_get(THIS_MODULE))
314 return NULL;
315
316 cache = kzalloc(sizeof(struct mb2_cache), GFP_KERNEL);
317 if (!cache)
318 goto err_out;
319 cache->c_bucket_bits = bucket_bits;
Jan Karac2f31402016-02-22 12:33:03 -0500320 cache->c_max_entries = bucket_count << 4;
Jan Karaf9a61eb2016-02-22 11:49:09 -0500321 INIT_LIST_HEAD(&cache->c_lru_list);
322 spin_lock_init(&cache->c_lru_list_lock);
323 cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head),
324 GFP_KERNEL);
325 if (!cache->c_hash) {
326 kfree(cache);
327 goto err_out;
328 }
329 for (i = 0; i < bucket_count; i++)
330 INIT_HLIST_BL_HEAD(&cache->c_hash[i]);
331
332 cache->c_shrink.count_objects = mb2_cache_count;
333 cache->c_shrink.scan_objects = mb2_cache_scan;
334 cache->c_shrink.seeks = DEFAULT_SEEKS;
335 register_shrinker(&cache->c_shrink);
336
Jan Karac2f31402016-02-22 12:33:03 -0500337 INIT_WORK(&cache->c_shrink_work, mb2_cache_shrink_worker);
338
Jan Karaf9a61eb2016-02-22 11:49:09 -0500339 return cache;
340
341err_out:
342 module_put(THIS_MODULE);
343 return NULL;
344}
345EXPORT_SYMBOL(mb2_cache_create);
346
347/*
348 * mb2_cache_destroy - destroy cache
349 * @cache: the cache to destroy
350 *
351 * Free all entries in cache and cache itself. Caller must make sure nobody
352 * (except shrinker) can reach @cache when calling this.
353 */
354void mb2_cache_destroy(struct mb2_cache *cache)
355{
356 struct mb2_cache_entry *entry, *next;
357
358 unregister_shrinker(&cache->c_shrink);
359
360 /*
361 * We don't bother with any locking. Cache must not be used at this
362 * point.
363 */
364 list_for_each_entry_safe(entry, next, &cache->c_lru_list, e_lru_list) {
365 if (!hlist_bl_unhashed(&entry->e_hash_list)) {
366 hlist_bl_del_init(&entry->e_hash_list);
367 atomic_dec(&entry->e_refcnt);
368 } else
369 WARN_ON(1);
370 list_del(&entry->e_lru_list);
371 WARN_ON(atomic_read(&entry->e_refcnt) != 1);
372 mb2_cache_entry_put(cache, entry);
373 }
374 kfree(cache->c_hash);
375 kfree(cache);
376 module_put(THIS_MODULE);
377}
378EXPORT_SYMBOL(mb2_cache_destroy);
379
380static int __init mb2cache_init(void)
381{
382 mb2_entry_cache = kmem_cache_create("mbcache",
383 sizeof(struct mb2_cache_entry), 0,
384 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
385 BUG_ON(!mb2_entry_cache);
386 return 0;
387}
388
389static void __exit mb2cache_exit(void)
390{
391 kmem_cache_destroy(mb2_entry_cache);
392}
393
394module_init(mb2cache_init)
395module_exit(mb2cache_exit)
396
397MODULE_AUTHOR("Jan Kara <jack@suse.cz>");
398MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
399MODULE_LICENSE("GPL");