blob: 55db0daaca74c2f8ee66ca04f7e5900b0594ddc1 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/fs/mbcache.c
3 * (C) 2001-2002 Andreas Gruenbacher, <a.gruenbacher@computer.org>
4 */
5
6/*
7 * Filesystem Meta Information Block Cache (mbcache)
8 *
9 * The mbcache caches blocks of block devices that need to be located
10 * by their device/block number, as well as by other criteria (such
11 * as the block's contents).
12 *
13 * There can only be one cache entry in a cache per device and block number.
14 * Additional indexes need not be unique in this sense. The number of
15 * additional indexes (=other criteria) can be hardwired at compile time
16 * or specified at cache create time.
17 *
18 * Each cache entry is of fixed size. An entry may be `valid' or `invalid'
19 * in the cache. A valid entry is in the main hash tables of the cache,
20 * and may also be in the lru list. An invalid entry is not in any hashes
21 * or lists.
22 *
23 * A valid cache entry is only in the lru list if no handles refer to it.
24 * Invalid cache entries will be freed when the last handle to the cache
25 * entry is released. Entries that cannot be freed immediately are put
26 * back on the lru list.
27 */
28
29#include <linux/kernel.h>
30#include <linux/module.h>
31
32#include <linux/hash.h>
33#include <linux/fs.h>
34#include <linux/mm.h>
35#include <linux/slab.h>
36#include <linux/sched.h>
T Makphaibulchoke3e037e52014-03-18 19:19:41 -040037#include <linux/list_bl.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/mbcache.h>
T Makphaibulchoke3e037e52014-03-18 19:19:41 -040039#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040
41#ifdef MB_CACHE_DEBUG
42# define mb_debug(f...) do { \
43 printk(KERN_DEBUG f); \
44 printk("\n"); \
45 } while (0)
46#define mb_assert(c) do { if (!(c)) \
47 printk(KERN_ERR "assertion " #c " failed\n"); \
48 } while(0)
49#else
50# define mb_debug(f...) do { } while(0)
51# define mb_assert(c) do { } while(0)
52#endif
53#define mb_error(f...) do { \
54 printk(KERN_ERR f); \
55 printk("\n"); \
56 } while(0)
57
58#define MB_CACHE_WRITER ((unsigned short)~0U >> 1)
59
Adrian Bunk75c96f82005-05-05 16:16:09 -070060static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -070061
62MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>");
63MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
64MODULE_LICENSE("GPL");
65
66EXPORT_SYMBOL(mb_cache_create);
67EXPORT_SYMBOL(mb_cache_shrink);
68EXPORT_SYMBOL(mb_cache_destroy);
69EXPORT_SYMBOL(mb_cache_entry_alloc);
70EXPORT_SYMBOL(mb_cache_entry_insert);
71EXPORT_SYMBOL(mb_cache_entry_release);
72EXPORT_SYMBOL(mb_cache_entry_free);
73EXPORT_SYMBOL(mb_cache_entry_get);
74#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
75EXPORT_SYMBOL(mb_cache_entry_find_first);
76EXPORT_SYMBOL(mb_cache_entry_find_next);
77#endif
78
Linus Torvalds1da177e2005-04-16 15:20:36 -070079/*
80 * Global data: list of all mbcache's, lru list, and a spinlock for
81 * accessing cache data structures on SMP machines. The lru list is
82 * global across all mbcaches.
83 */
84
85static LIST_HEAD(mb_cache_list);
86static LIST_HEAD(mb_cache_lru_list);
87static DEFINE_SPINLOCK(mb_cache_spinlock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070088
Linus Torvalds1da177e2005-04-16 15:20:36 -070089static inline int
T Makphaibulchoke3e037e52014-03-18 19:19:41 -040090__mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce)
Linus Torvalds1da177e2005-04-16 15:20:36 -070091{
T Makphaibulchoke3e037e52014-03-18 19:19:41 -040092 return !hlist_bl_unhashed(&ce->e_block_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -070093}
94
95
T Makphaibulchoke3e037e52014-03-18 19:19:41 -040096static inline void
97__mb_cache_entry_unhash_block(struct mb_cache_entry *ce)
98{
99 if (__mb_cache_entry_is_block_hashed(ce))
100 hlist_bl_del_init(&ce->e_block_list);
101}
102
103static inline int
104__mb_cache_entry_is_index_hashed(struct mb_cache_entry *ce)
105{
106 return !hlist_bl_unhashed(&ce->e_index.o_list);
107}
108
109static inline void
110__mb_cache_entry_unhash_index(struct mb_cache_entry *ce)
111{
112 if (__mb_cache_entry_is_index_hashed(ce))
113 hlist_bl_del_init(&ce->e_index.o_list);
114}
115
116static inline void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117__mb_cache_entry_unhash(struct mb_cache_entry *ce)
118{
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400119 __mb_cache_entry_unhash_index(ce);
120 __mb_cache_entry_unhash_block(ce);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121}
122
Arjan van de Ven858119e2006-01-14 13:20:43 -0800123static void
Al Viro27496a82005-10-21 03:20:48 -0400124__mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125{
126 struct mb_cache *cache = ce->e_cache;
127
128 mb_assert(!(ce->e_used || ce->e_queued));
Andreas Gruenbacher2aec7c52010-07-19 18:19:41 +0200129 kmem_cache_free(cache->c_entry_cache, ce);
130 atomic_dec(&cache->c_entry_count);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131}
132
133
Arjan van de Ven858119e2006-01-14 13:20:43 -0800134static void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135__mb_cache_entry_release_unlock(struct mb_cache_entry *ce)
Josh Triplett58f555e2006-09-29 01:59:24 -0700136 __releases(mb_cache_spinlock)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137{
138 /* Wake up all processes queuing for this cache entry. */
139 if (ce->e_queued)
140 wake_up_all(&mb_cache_queue);
141 if (ce->e_used >= MB_CACHE_WRITER)
142 ce->e_used -= MB_CACHE_WRITER;
143 ce->e_used--;
144 if (!(ce->e_used || ce->e_queued)) {
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400145 if (!__mb_cache_entry_is_block_hashed(ce))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146 goto forget;
147 mb_assert(list_empty(&ce->e_lru_list));
148 list_add_tail(&ce->e_lru_list, &mb_cache_lru_list);
149 }
150 spin_unlock(&mb_cache_spinlock);
151 return;
152forget:
153 spin_unlock(&mb_cache_spinlock);
154 __mb_cache_entry_forget(ce, GFP_KERNEL);
155}
156
157
158/*
Dave Chinner1ab6c492013-08-28 10:18:09 +1000159 * mb_cache_shrink_scan() memory pressure callback
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160 *
161 * This function is called by the kernel memory management when memory
162 * gets low.
163 *
Dave Chinner7f8275d2010-07-19 14:56:17 +1000164 * @shrink: (ignored)
Ying Han1495f232011-05-24 17:12:27 -0700165 * @sc: shrink_control passed from reclaim
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 *
Dave Chinner1ab6c492013-08-28 10:18:09 +1000167 * Returns the number of objects freed.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168 */
Dave Chinner1ab6c492013-08-28 10:18:09 +1000169static unsigned long
170mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171{
172 LIST_HEAD(free_list);
Andreas Gruenbachere566d482010-07-21 19:44:45 +0200173 struct mb_cache_entry *entry, *tmp;
Ying Han1495f232011-05-24 17:12:27 -0700174 int nr_to_scan = sc->nr_to_scan;
175 gfp_t gfp_mask = sc->gfp_mask;
Dave Chinner1ab6c492013-08-28 10:18:09 +1000176 unsigned long freed = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178 mb_debug("trying to free %d entries", nr_to_scan);
Andreas Gruenbachere566d482010-07-21 19:44:45 +0200179 spin_lock(&mb_cache_spinlock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180 while (nr_to_scan-- && !list_empty(&mb_cache_lru_list)) {
181 struct mb_cache_entry *ce =
182 list_entry(mb_cache_lru_list.next,
183 struct mb_cache_entry, e_lru_list);
184 list_move_tail(&ce->e_lru_list, &free_list);
185 __mb_cache_entry_unhash(ce);
Dave Chinner1ab6c492013-08-28 10:18:09 +1000186 freed++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187 }
Dave Chinner1ab6c492013-08-28 10:18:09 +1000188 spin_unlock(&mb_cache_spinlock);
189 list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) {
190 __mb_cache_entry_forget(entry, gfp_mask);
191 }
192 return freed;
193}
194
195static unsigned long
196mb_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
197{
198 struct mb_cache *cache;
199 unsigned long count = 0;
200
201 spin_lock(&mb_cache_spinlock);
Andreas Gruenbachere566d482010-07-21 19:44:45 +0200202 list_for_each_entry(cache, &mb_cache_list, c_cache_list) {
203 mb_debug("cache %s (%d)", cache->c_name,
204 atomic_read(&cache->c_entry_count));
205 count += atomic_read(&cache->c_entry_count);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 }
Andreas Gruenbachere566d482010-07-21 19:44:45 +0200207 spin_unlock(&mb_cache_spinlock);
Dave Chinner1ab6c492013-08-28 10:18:09 +1000208
Glauber Costa55f841c2013-08-28 10:17:53 +1000209 return vfs_pressure_ratio(count);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210}
211
Dave Chinner1ab6c492013-08-28 10:18:09 +1000212static struct shrinker mb_cache_shrinker = {
213 .count_objects = mb_cache_shrink_count,
214 .scan_objects = mb_cache_shrink_scan,
215 .seeks = DEFAULT_SEEKS,
216};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217
218/*
219 * mb_cache_create() create a new cache
220 *
221 * All entries in one cache are equal size. Cache entries may be from
222 * multiple devices. If this is the first mbcache created, registers
223 * the cache with kernel memory management. Returns NULL if no more
224 * memory was available.
225 *
226 * @name: name of the cache (informal)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227 * @bucket_bits: log2(number of hash buckets)
228 */
229struct mb_cache *
Andreas Gruenbacher2aec7c52010-07-19 18:19:41 +0200230mb_cache_create(const char *name, int bucket_bits)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231{
Andreas Gruenbacher2aec7c52010-07-19 18:19:41 +0200232 int n, bucket_count = 1 << bucket_bits;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233 struct mb_cache *cache = NULL;
234
Andreas Gruenbacher2aec7c52010-07-19 18:19:41 +0200235 cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236 if (!cache)
Andreas Gruenbacher2aec7c52010-07-19 18:19:41 +0200237 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238 cache->c_name = name;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239 atomic_set(&cache->c_entry_count, 0);
240 cache->c_bucket_bits = bucket_bits;
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400241 cache->c_block_hash = kmalloc(bucket_count *
242 sizeof(struct hlist_bl_head), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 if (!cache->c_block_hash)
244 goto fail;
245 for (n=0; n<bucket_count; n++)
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400246 INIT_HLIST_BL_HEAD(&cache->c_block_hash[n]);
247 cache->c_index_hash = kmalloc(bucket_count *
248 sizeof(struct hlist_bl_head), GFP_KERNEL);
Andreas Gruenbacher2aec7c52010-07-19 18:19:41 +0200249 if (!cache->c_index_hash)
250 goto fail;
251 for (n=0; n<bucket_count; n++)
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400252 INIT_HLIST_BL_HEAD(&cache->c_index_hash[n]);
Andreas Gruenbacher2aec7c52010-07-19 18:19:41 +0200253 cache->c_entry_cache = kmem_cache_create(name,
254 sizeof(struct mb_cache_entry), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +0900255 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256 if (!cache->c_entry_cache)
Andreas Gruenbacher2aec7c52010-07-19 18:19:41 +0200257 goto fail2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258
Andreas Gruenbacher3a48ee82010-08-16 19:05:23 +0200259 /*
260 * Set an upper limit on the number of cache entries so that the hash
261 * chains won't grow too long.
262 */
263 cache->c_max_entries = bucket_count << 4;
264
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265 spin_lock(&mb_cache_spinlock);
266 list_add(&cache->c_cache_list, &mb_cache_list);
267 spin_unlock(&mb_cache_spinlock);
268 return cache;
269
Andreas Gruenbacher2aec7c52010-07-19 18:19:41 +0200270fail2:
271 kfree(cache->c_index_hash);
272
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273fail:
Andreas Gruenbacher2aec7c52010-07-19 18:19:41 +0200274 kfree(cache->c_block_hash);
275 kfree(cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276 return NULL;
277}
278
279
280/*
281 * mb_cache_shrink()
282 *
Alexey Dobriyan7f927fc2006-03-28 01:56:53 -0800283 * Removes all cache entries of a device from the cache. All cache entries
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 * currently in use cannot be freed, and thus remain in the cache. All others
285 * are freed.
286 *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 * @bdev: which device's cache entries to shrink
288 */
289void
Andreas Gruenbacher8c52ab42005-07-27 11:45:15 -0700290mb_cache_shrink(struct block_device *bdev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291{
292 LIST_HEAD(free_list);
293 struct list_head *l, *ltmp;
294
295 spin_lock(&mb_cache_spinlock);
296 list_for_each_safe(l, ltmp, &mb_cache_lru_list) {
297 struct mb_cache_entry *ce =
298 list_entry(l, struct mb_cache_entry, e_lru_list);
299 if (ce->e_bdev == bdev) {
300 list_move_tail(&ce->e_lru_list, &free_list);
301 __mb_cache_entry_unhash(ce);
302 }
303 }
304 spin_unlock(&mb_cache_spinlock);
305 list_for_each_safe(l, ltmp, &free_list) {
306 __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry,
307 e_lru_list), GFP_KERNEL);
308 }
309}
310
311
312/*
313 * mb_cache_destroy()
314 *
315 * Shrinks the cache to its minimum possible size (hopefully 0 entries),
316 * and then destroys it. If this was the last mbcache, un-registers the
317 * mbcache from kernel memory management.
318 */
319void
320mb_cache_destroy(struct mb_cache *cache)
321{
322 LIST_HEAD(free_list);
323 struct list_head *l, *ltmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324
325 spin_lock(&mb_cache_spinlock);
326 list_for_each_safe(l, ltmp, &mb_cache_lru_list) {
327 struct mb_cache_entry *ce =
328 list_entry(l, struct mb_cache_entry, e_lru_list);
329 if (ce->e_cache == cache) {
330 list_move_tail(&ce->e_lru_list, &free_list);
331 __mb_cache_entry_unhash(ce);
332 }
333 }
334 list_del(&cache->c_cache_list);
335 spin_unlock(&mb_cache_spinlock);
336
337 list_for_each_safe(l, ltmp, &free_list) {
338 __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry,
339 e_lru_list), GFP_KERNEL);
340 }
341
342 if (atomic_read(&cache->c_entry_count) > 0) {
343 mb_error("cache %s: %d orphaned entries",
344 cache->c_name,
345 atomic_read(&cache->c_entry_count));
346 }
347
348 kmem_cache_destroy(cache->c_entry_cache);
349
Andreas Gruenbacher2aec7c52010-07-19 18:19:41 +0200350 kfree(cache->c_index_hash);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351 kfree(cache->c_block_hash);
352 kfree(cache);
353}
354
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355/*
356 * mb_cache_entry_alloc()
357 *
358 * Allocates a new cache entry. The new entry will not be valid initially,
359 * and thus cannot be looked up yet. It should be filled with data, and
360 * then inserted into the cache using mb_cache_entry_insert(). Returns NULL
361 * if no more memory was available.
362 */
363struct mb_cache_entry *
Jan Kara335e92e2008-04-15 14:34:43 -0700364mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365{
Andreas Gruenbacher3a48ee82010-08-16 19:05:23 +0200366 struct mb_cache_entry *ce = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367
Andreas Gruenbacher3a48ee82010-08-16 19:05:23 +0200368 if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) {
369 spin_lock(&mb_cache_spinlock);
370 if (!list_empty(&mb_cache_lru_list)) {
371 ce = list_entry(mb_cache_lru_list.next,
372 struct mb_cache_entry, e_lru_list);
373 list_del_init(&ce->e_lru_list);
374 __mb_cache_entry_unhash(ce);
375 }
376 spin_unlock(&mb_cache_spinlock);
377 }
378 if (!ce) {
379 ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags);
380 if (!ce)
381 return NULL;
Ram Guptaf9e834892007-10-25 10:03:28 -0500382 atomic_inc(&cache->c_entry_count);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 INIT_LIST_HEAD(&ce->e_lru_list);
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400384 INIT_HLIST_BL_NODE(&ce->e_block_list);
385 INIT_HLIST_BL_NODE(&ce->e_index.o_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386 ce->e_cache = cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387 ce->e_queued = 0;
388 }
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400389 ce->e_block_hash_p = &cache->c_block_hash[0];
390 ce->e_index_hash_p = &cache->c_index_hash[0];
Andreas Gruenbacher3a48ee82010-08-16 19:05:23 +0200391 ce->e_used = 1 + MB_CACHE_WRITER;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392 return ce;
393}
394
395
396/*
397 * mb_cache_entry_insert()
398 *
399 * Inserts an entry that was allocated using mb_cache_entry_alloc() into
400 * the cache. After this, the cache entry can be looked up, but is not yet
401 * in the lru list as the caller still holds a handle to it. Returns 0 on
402 * success, or -EBUSY if a cache entry for that device + inode exists
403 * already (this may happen after a failed lookup, but when another process
404 * has inserted the same cache entry in the meantime).
405 *
406 * @bdev: device the cache entry belongs to
407 * @block: block number
Andreas Gruenbacher2aec7c52010-07-19 18:19:41 +0200408 * @key: lookup key
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409 */
410int
411mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev,
Andreas Gruenbacher2aec7c52010-07-19 18:19:41 +0200412 sector_t block, unsigned int key)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413{
414 struct mb_cache *cache = ce->e_cache;
415 unsigned int bucket;
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400416 struct hlist_bl_node *l;
Andreas Gruenbacher2aec7c52010-07-19 18:19:41 +0200417 int error = -EBUSY;
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400418 struct hlist_bl_head *block_hash_p;
419 struct hlist_bl_head *index_hash_p;
420 struct mb_cache_entry *lce;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400422 mb_assert(ce);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
424 cache->c_bucket_bits);
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400425 block_hash_p = &cache->c_block_hash[bucket];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426 spin_lock(&mb_cache_spinlock);
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400427 hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) {
428 if (lce->e_bdev == bdev && lce->e_block == block)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429 goto out;
430 }
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400431 mb_assert(!__mb_cache_entry_is_block_hashed(ce));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432 __mb_cache_entry_unhash(ce);
433 ce->e_bdev = bdev;
434 ce->e_block = block;
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400435 ce->e_block_hash_p = block_hash_p;
Andreas Gruenbacher2aec7c52010-07-19 18:19:41 +0200436 ce->e_index.o_key = key;
437 bucket = hash_long(key, cache->c_bucket_bits);
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400438 index_hash_p = &cache->c_index_hash[bucket];
439 ce->e_index_hash_p = index_hash_p;
440 hlist_bl_add_head(&ce->e_index.o_list, index_hash_p);
441 hlist_bl_add_head(&ce->e_block_list, block_hash_p);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 error = 0;
443out:
444 spin_unlock(&mb_cache_spinlock);
445 return error;
446}
447
448
449/*
450 * mb_cache_entry_release()
451 *
452 * Release a handle to a cache entry. When the last handle to a cache entry
453 * is released it is either freed (if it is invalid) or otherwise inserted
454 * in to the lru list.
455 */
456void
457mb_cache_entry_release(struct mb_cache_entry *ce)
458{
459 spin_lock(&mb_cache_spinlock);
460 __mb_cache_entry_release_unlock(ce);
461}
462
463
464/*
465 * mb_cache_entry_free()
466 *
467 * This is equivalent to the sequence mb_cache_entry_takeout() --
468 * mb_cache_entry_release().
469 */
470void
471mb_cache_entry_free(struct mb_cache_entry *ce)
472{
473 spin_lock(&mb_cache_spinlock);
474 mb_assert(list_empty(&ce->e_lru_list));
475 __mb_cache_entry_unhash(ce);
476 __mb_cache_entry_release_unlock(ce);
477}
478
479
480/*
481 * mb_cache_entry_get()
482 *
483 * Get a cache entry by device / block number. (There can only be one entry
484 * in the cache per device and block.) Returns NULL if no such cache entry
485 * exists. The returned cache entry is locked for exclusive access ("single
486 * writer").
487 */
488struct mb_cache_entry *
489mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
490 sector_t block)
491{
492 unsigned int bucket;
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400493 struct hlist_bl_node *l;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494 struct mb_cache_entry *ce;
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400495 struct hlist_bl_head *block_hash_p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496
497 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
498 cache->c_bucket_bits);
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400499 block_hash_p = &cache->c_block_hash[bucket];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500 spin_lock(&mb_cache_spinlock);
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400501 hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) {
502 mb_assert(ce->e_block_hash_p == block_hash_p);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503 if (ce->e_bdev == bdev && ce->e_block == block) {
504 DEFINE_WAIT(wait);
505
506 if (!list_empty(&ce->e_lru_list))
507 list_del_init(&ce->e_lru_list);
508
509 while (ce->e_used > 0) {
510 ce->e_queued++;
511 prepare_to_wait(&mb_cache_queue, &wait,
512 TASK_UNINTERRUPTIBLE);
513 spin_unlock(&mb_cache_spinlock);
514 schedule();
515 spin_lock(&mb_cache_spinlock);
516 ce->e_queued--;
517 }
518 finish_wait(&mb_cache_queue, &wait);
519 ce->e_used += 1 + MB_CACHE_WRITER;
520
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400521 if (!__mb_cache_entry_is_block_hashed(ce)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522 __mb_cache_entry_release_unlock(ce);
523 return NULL;
524 }
525 goto cleanup;
526 }
527 }
528 ce = NULL;
529
530cleanup:
531 spin_unlock(&mb_cache_spinlock);
532 return ce;
533}
534
535#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
536
537static struct mb_cache_entry *
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400538__mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head,
Andreas Gruenbacher2aec7c52010-07-19 18:19:41 +0200539 struct block_device *bdev, unsigned int key)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540{
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400541 while (l != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542 struct mb_cache_entry *ce =
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400543 hlist_bl_entry(l, struct mb_cache_entry,
544 e_index.o_list);
545 mb_assert(ce->e_index_hash_p == head);
Andreas Gruenbacher2aec7c52010-07-19 18:19:41 +0200546 if (ce->e_bdev == bdev && ce->e_index.o_key == key) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547 DEFINE_WAIT(wait);
548
549 if (!list_empty(&ce->e_lru_list))
550 list_del_init(&ce->e_lru_list);
551
552 /* Incrementing before holding the lock gives readers
553 priority over writers. */
554 ce->e_used++;
555 while (ce->e_used >= MB_CACHE_WRITER) {
556 ce->e_queued++;
557 prepare_to_wait(&mb_cache_queue, &wait,
558 TASK_UNINTERRUPTIBLE);
559 spin_unlock(&mb_cache_spinlock);
560 schedule();
561 spin_lock(&mb_cache_spinlock);
562 ce->e_queued--;
563 }
564 finish_wait(&mb_cache_queue, &wait);
565
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400566 if (!__mb_cache_entry_is_block_hashed(ce)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567 __mb_cache_entry_release_unlock(ce);
568 spin_lock(&mb_cache_spinlock);
569 return ERR_PTR(-EAGAIN);
570 }
571 return ce;
572 }
573 l = l->next;
574 }
575 return NULL;
576}
577
578
579/*
580 * mb_cache_entry_find_first()
581 *
582 * Find the first cache entry on a given device with a certain key in
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300583 * an additional index. Additional matches can be found with
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584 * mb_cache_entry_find_next(). Returns NULL if no match was found. The
585 * returned cache entry is locked for shared access ("multiple readers").
586 *
587 * @cache: the cache to search
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 * @bdev: the device the cache entry should belong to
589 * @key: the key in the index
590 */
591struct mb_cache_entry *
Andreas Gruenbacher2aec7c52010-07-19 18:19:41 +0200592mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev,
593 unsigned int key)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594{
595 unsigned int bucket = hash_long(key, cache->c_bucket_bits);
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400596 struct hlist_bl_node *l;
597 struct mb_cache_entry *ce = NULL;
598 struct hlist_bl_head *index_hash_p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400600 index_hash_p = &cache->c_index_hash[bucket];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601 spin_lock(&mb_cache_spinlock);
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400602 if (!hlist_bl_empty(index_hash_p)) {
603 l = hlist_bl_first(index_hash_p);
604 ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
605 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606 spin_unlock(&mb_cache_spinlock);
607 return ce;
608}
609
610
611/*
612 * mb_cache_entry_find_next()
613 *
614 * Find the next cache entry on a given device with a certain key in an
615 * additional index. Returns NULL if no match could be found. The previous
616 * entry is atomatically released, so that mb_cache_entry_find_next() can
617 * be called like this:
618 *
619 * entry = mb_cache_entry_find_first();
620 * while (entry) {
621 * ...
622 * entry = mb_cache_entry_find_next(entry, ...);
623 * }
624 *
625 * @prev: The previous match
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626 * @bdev: the device the cache entry should belong to
627 * @key: the key in the index
628 */
629struct mb_cache_entry *
Andreas Gruenbacher2aec7c52010-07-19 18:19:41 +0200630mb_cache_entry_find_next(struct mb_cache_entry *prev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 struct block_device *bdev, unsigned int key)
632{
633 struct mb_cache *cache = prev->e_cache;
634 unsigned int bucket = hash_long(key, cache->c_bucket_bits);
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400635 struct hlist_bl_node *l;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636 struct mb_cache_entry *ce;
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400637 struct hlist_bl_head *index_hash_p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400639 index_hash_p = &cache->c_index_hash[bucket];
640 mb_assert(prev->e_index_hash_p == index_hash_p);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641 spin_lock(&mb_cache_spinlock);
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400642 mb_assert(!hlist_bl_empty(index_hash_p));
Andreas Gruenbacher2aec7c52010-07-19 18:19:41 +0200643 l = prev->e_index.o_list.next;
T Makphaibulchoke3e037e52014-03-18 19:19:41 -0400644 ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645 __mb_cache_entry_release_unlock(prev);
646 return ce;
647}
648
649#endif /* !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) */
650
651static int __init init_mbcache(void)
652{
Rusty Russell8e1f9362007-07-17 04:03:17 -0700653 register_shrinker(&mb_cache_shrinker);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 return 0;
655}
656
657static void __exit exit_mbcache(void)
658{
Rusty Russell8e1f9362007-07-17 04:03:17 -0700659 unregister_shrinker(&mb_cache_shrinker);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660}
661
662module_init(init_mbcache)
663module_exit(exit_mbcache)
664