blob: 97b14309df90218200da1ce57ea3b5c5010af61d [file] [log] [blame]
Joe Thornberf2836352013-03-01 22:45:51 +00001/*
2 * Copyright (C) 2012 Red Hat. All rights reserved.
3 *
4 * This file is released under the GPL.
5 */
6
7#include "dm-cache-policy.h"
8#include "dm.h"
9
10#include <linux/hash.h>
11#include <linux/module.h>
12#include <linux/mutex.h>
13#include <linux/slab.h>
14#include <linux/vmalloc.h>
15
16#define DM_MSG_PREFIX "cache-policy-mq"
Joe Thornberf2836352013-03-01 22:45:51 +000017
18static struct kmem_cache *mq_entry_cache;
19
20/*----------------------------------------------------------------*/
21
22static unsigned next_power(unsigned n, unsigned min)
23{
24 return roundup_pow_of_two(max(n, min));
25}
26
27/*----------------------------------------------------------------*/
28
Joe Thornberf2836352013-03-01 22:45:51 +000029/*
30 * Large, sequential ios are probably better left on the origin device since
31 * spindles tend to have good bandwidth.
32 *
33 * The io_tracker tries to spot when the io is in one of these sequential
34 * modes.
35 *
36 * Two thresholds to switch between random and sequential io mode are defaulting
37 * as follows and can be adjusted via the constructor and message interfaces.
38 */
39#define RANDOM_THRESHOLD_DEFAULT 4
40#define SEQUENTIAL_THRESHOLD_DEFAULT 512
41
42enum io_pattern {
43 PATTERN_SEQUENTIAL,
44 PATTERN_RANDOM
45};
46
47struct io_tracker {
48 enum io_pattern pattern;
49
50 unsigned nr_seq_samples;
51 unsigned nr_rand_samples;
52 unsigned thresholds[2];
53
54 dm_oblock_t last_end_oblock;
55};
56
57static void iot_init(struct io_tracker *t,
58 int sequential_threshold, int random_threshold)
59{
60 t->pattern = PATTERN_RANDOM;
61 t->nr_seq_samples = 0;
62 t->nr_rand_samples = 0;
63 t->last_end_oblock = 0;
64 t->thresholds[PATTERN_RANDOM] = random_threshold;
65 t->thresholds[PATTERN_SEQUENTIAL] = sequential_threshold;
66}
67
68static enum io_pattern iot_pattern(struct io_tracker *t)
69{
70 return t->pattern;
71}
72
73static void iot_update_stats(struct io_tracker *t, struct bio *bio)
74{
Kent Overstreet4f024f32013-10-11 15:44:27 -070075 if (bio->bi_iter.bi_sector == from_oblock(t->last_end_oblock) + 1)
Joe Thornberf2836352013-03-01 22:45:51 +000076 t->nr_seq_samples++;
77 else {
78 /*
79 * Just one non-sequential IO is enough to reset the
80 * counters.
81 */
82 if (t->nr_seq_samples) {
83 t->nr_seq_samples = 0;
84 t->nr_rand_samples = 0;
85 }
86
87 t->nr_rand_samples++;
88 }
89
Kent Overstreet4f024f32013-10-11 15:44:27 -070090 t->last_end_oblock = to_oblock(bio_end_sector(bio) - 1);
Joe Thornberf2836352013-03-01 22:45:51 +000091}
92
93static void iot_check_for_pattern_switch(struct io_tracker *t)
94{
95 switch (t->pattern) {
96 case PATTERN_SEQUENTIAL:
97 if (t->nr_rand_samples >= t->thresholds[PATTERN_RANDOM]) {
98 t->pattern = PATTERN_RANDOM;
99 t->nr_seq_samples = t->nr_rand_samples = 0;
100 }
101 break;
102
103 case PATTERN_RANDOM:
104 if (t->nr_seq_samples >= t->thresholds[PATTERN_SEQUENTIAL]) {
105 t->pattern = PATTERN_SEQUENTIAL;
106 t->nr_seq_samples = t->nr_rand_samples = 0;
107 }
108 break;
109 }
110}
111
112static void iot_examine_bio(struct io_tracker *t, struct bio *bio)
113{
114 iot_update_stats(t, bio);
115 iot_check_for_pattern_switch(t);
116}
117
118/*----------------------------------------------------------------*/
119
120
121/*
122 * This queue is divided up into different levels. Allowing us to push
123 * entries to the back of any of the levels. Think of it as a partially
124 * sorted queue.
125 */
126#define NR_QUEUE_LEVELS 16u
Joe Thornber3e45c912015-02-20 13:49:45 +0000127#define NR_SENTINELS NR_QUEUE_LEVELS * 3
Joe Thornberf2836352013-03-01 22:45:51 +0000128
129struct queue {
Joe Thornber75da39b2015-02-20 12:58:03 +0000130 unsigned nr_elts;
Joe Thornberf2836352013-03-01 22:45:51 +0000131 struct list_head qs[NR_QUEUE_LEVELS];
Joe Thornber3e45c912015-02-20 13:49:45 +0000132 struct list_head sentinels[NR_SENTINELS];
Joe Thornberf2836352013-03-01 22:45:51 +0000133};
134
135static void queue_init(struct queue *q)
136{
137 unsigned i;
138
Joe Thornber75da39b2015-02-20 12:58:03 +0000139 q->nr_elts = 0;
Joe Thornber3e45c912015-02-20 13:49:45 +0000140 for (i = 0; i < NR_QUEUE_LEVELS; i++) {
Joe Thornberf2836352013-03-01 22:45:51 +0000141 INIT_LIST_HEAD(q->qs + i);
Joe Thornber3e45c912015-02-20 13:49:45 +0000142 INIT_LIST_HEAD(q->sentinels + i);
143 }
Joe Thornberf2836352013-03-01 22:45:51 +0000144}
145
Joe Thornberc86c3072013-10-24 14:10:28 -0400146static bool queue_empty(struct queue *q)
147{
Joe Thornber75da39b2015-02-20 12:58:03 +0000148 return q->nr_elts == 0;
Joe Thornberc86c3072013-10-24 14:10:28 -0400149}
150
151/*
Joe Thornberf2836352013-03-01 22:45:51 +0000152 * Insert an entry to the back of the given level.
153 */
154static void queue_push(struct queue *q, unsigned level, struct list_head *elt)
155{
Joe Thornber75da39b2015-02-20 12:58:03 +0000156 q->nr_elts++;
Joe Thornberf2836352013-03-01 22:45:51 +0000157 list_add_tail(elt, q->qs + level);
158}
159
Joe Thornber75da39b2015-02-20 12:58:03 +0000160static void queue_remove(struct queue *q, struct list_head *elt)
Joe Thornberf2836352013-03-01 22:45:51 +0000161{
Joe Thornber75da39b2015-02-20 12:58:03 +0000162 q->nr_elts--;
Joe Thornberf2836352013-03-01 22:45:51 +0000163 list_del(elt);
164}
165
Joe Thornber3e45c912015-02-20 13:49:45 +0000166static bool is_sentinel(struct queue *q, struct list_head *h)
167{
168 return (h >= q->sentinels) && (h < (q->sentinels + NR_SENTINELS));
169}
170
Joe Thornberf2836352013-03-01 22:45:51 +0000171/*
Joe Thornberf2836352013-03-01 22:45:51 +0000172 * Gives us the oldest entry of the lowest popoulated level. If the first
173 * level is emptied then we shift down one level.
174 */
Joe Thornberb155aa02014-10-22 14:30:58 +0100175static struct list_head *queue_peek(struct queue *q)
Joe Thornberf2836352013-03-01 22:45:51 +0000176{
177 unsigned level;
Joe Thornber3e45c912015-02-20 13:49:45 +0000178 struct list_head *h;
Joe Thornberf2836352013-03-01 22:45:51 +0000179
180 for (level = 0; level < NR_QUEUE_LEVELS; level++)
Joe Thornber3e45c912015-02-20 13:49:45 +0000181 list_for_each(h, q->qs + level)
182 if (!is_sentinel(q, h))
183 return h;
Joe Thornberf2836352013-03-01 22:45:51 +0000184
185 return NULL;
186}
187
Joe Thornberb155aa02014-10-22 14:30:58 +0100188static struct list_head *queue_pop(struct queue *q)
189{
190 struct list_head *r = queue_peek(q);
191
192 if (r) {
Joe Thornber75da39b2015-02-20 12:58:03 +0000193 q->nr_elts--;
Joe Thornberb155aa02014-10-22 14:30:58 +0100194 list_del(r);
Joe Thornberb155aa02014-10-22 14:30:58 +0100195 }
196
197 return r;
198}
199
Joe Thornberf2836352013-03-01 22:45:51 +0000200static struct list_head *list_pop(struct list_head *lh)
201{
202 struct list_head *r = lh->next;
203
204 BUG_ON(!r);
205 list_del_init(r);
206
207 return r;
208}
209
Joe Thornber3e45c912015-02-20 13:49:45 +0000210/*
211 * Sometimes we want to iterate through entries that have been pushed since
212 * a certain event. We use sentinel entries on the queues to delimit these
213 * 'tick' events.
214 */
215static void queue_tick(struct queue *q)
216{
217 unsigned i;
218
219 for (i = 0; i < NR_QUEUE_LEVELS; i++) {
220 list_del(q->sentinels + i);
221 list_add_tail(q->sentinels + i, q->qs + i);
222 }
223}
224
225typedef void (*iter_fn)(struct list_head *, void *);
226static void queue_iterate_tick(struct queue *q, iter_fn fn, void *context)
227{
228 unsigned i;
229 struct list_head *h;
230
231 for (i = 0; i < NR_QUEUE_LEVELS; i++) {
232 list_for_each_prev(h, q->qs + i) {
233 if (is_sentinel(q, h))
234 break;
235
236 fn(h, context);
237 }
238 }
239}
240
Joe Thornberf2836352013-03-01 22:45:51 +0000241/*----------------------------------------------------------------*/
242
243/*
244 * Describes a cache entry. Used in both the cache and the pre_cache.
245 */
246struct entry {
247 struct hlist_node hlist;
248 struct list_head list;
249 dm_oblock_t oblock;
Joe Thornberf2836352013-03-01 22:45:51 +0000250
251 /*
252 * FIXME: pack these better
253 */
Joe Thornber01911c12013-10-24 14:10:28 -0400254 bool dirty:1;
Joe Thornberf2836352013-03-01 22:45:51 +0000255 unsigned hit_count;
256 unsigned generation;
Joe Thornberf2836352013-03-01 22:45:51 +0000257};
258
Joe Thornber633618e2013-11-09 11:12:51 +0000259/*
260 * Rather than storing the cblock in an entry, we allocate all entries in
261 * an array, and infer the cblock from the entry position.
262 *
263 * Free entries are linked together into a list.
264 */
265struct entry_pool {
266 struct entry *entries, *entries_end;
267 struct list_head free;
268 unsigned nr_allocated;
269};
270
271static int epool_init(struct entry_pool *ep, unsigned nr_entries)
272{
273 unsigned i;
274
275 ep->entries = vzalloc(sizeof(struct entry) * nr_entries);
276 if (!ep->entries)
277 return -ENOMEM;
278
279 ep->entries_end = ep->entries + nr_entries;
280
281 INIT_LIST_HEAD(&ep->free);
282 for (i = 0; i < nr_entries; i++)
283 list_add(&ep->entries[i].list, &ep->free);
284
285 ep->nr_allocated = 0;
286
287 return 0;
288}
289
290static void epool_exit(struct entry_pool *ep)
291{
292 vfree(ep->entries);
293}
294
295static struct entry *alloc_entry(struct entry_pool *ep)
296{
297 struct entry *e;
298
299 if (list_empty(&ep->free))
300 return NULL;
301
302 e = list_entry(list_pop(&ep->free), struct entry, list);
303 INIT_LIST_HEAD(&e->list);
304 INIT_HLIST_NODE(&e->hlist);
305 ep->nr_allocated++;
306
307 return e;
308}
309
310/*
311 * This assumes the cblock hasn't already been allocated.
312 */
313static struct entry *alloc_particular_entry(struct entry_pool *ep, dm_cblock_t cblock)
314{
315 struct entry *e = ep->entries + from_cblock(cblock);
Joe Thornber633618e2013-11-09 11:12:51 +0000316
Wei Yongjunb8158052013-11-18 13:32:43 -0500317 list_del_init(&e->list);
Joe Thornber633618e2013-11-09 11:12:51 +0000318 INIT_HLIST_NODE(&e->hlist);
319 ep->nr_allocated++;
320
321 return e;
322}
323
324static void free_entry(struct entry_pool *ep, struct entry *e)
325{
326 BUG_ON(!ep->nr_allocated);
327 ep->nr_allocated--;
328 INIT_HLIST_NODE(&e->hlist);
329 list_add(&e->list, &ep->free);
330}
331
Joe Thornber532906a2013-11-08 16:36:17 +0000332/*
333 * Returns NULL if the entry is free.
334 */
335static struct entry *epool_find(struct entry_pool *ep, dm_cblock_t cblock)
336{
337 struct entry *e = ep->entries + from_cblock(cblock);
Mike Snitzer7b6b2bc2013-11-12 12:17:43 -0500338 return !hlist_unhashed(&e->hlist) ? e : NULL;
Joe Thornber532906a2013-11-08 16:36:17 +0000339}
340
Joe Thornber633618e2013-11-09 11:12:51 +0000341static bool epool_empty(struct entry_pool *ep)
342{
343 return list_empty(&ep->free);
344}
345
346static bool in_pool(struct entry_pool *ep, struct entry *e)
347{
348 return e >= ep->entries && e < ep->entries_end;
349}
350
351static dm_cblock_t infer_cblock(struct entry_pool *ep, struct entry *e)
352{
353 return to_cblock(e - ep->entries);
354}
355
356/*----------------------------------------------------------------*/
357
Joe Thornberf2836352013-03-01 22:45:51 +0000358struct mq_policy {
359 struct dm_cache_policy policy;
360
361 /* protects everything */
362 struct mutex lock;
363 dm_cblock_t cache_size;
364 struct io_tracker tracker;
365
366 /*
Joe Thornber633618e2013-11-09 11:12:51 +0000367 * Entries come from two pools, one of pre-cache entries, and one
368 * for the cache proper.
369 */
370 struct entry_pool pre_cache_pool;
371 struct entry_pool cache_pool;
372
373 /*
Joe Thornber01911c12013-10-24 14:10:28 -0400374 * We maintain three queues of entries. The cache proper,
375 * consisting of a clean and dirty queue, contains the currently
376 * active mappings. Whereas the pre_cache tracks blocks that
377 * are being hit frequently and potential candidates for promotion
378 * to the cache.
Joe Thornberf2836352013-03-01 22:45:51 +0000379 */
380 struct queue pre_cache;
Joe Thornber01911c12013-10-24 14:10:28 -0400381 struct queue cache_clean;
382 struct queue cache_dirty;
Joe Thornberf2836352013-03-01 22:45:51 +0000383
384 /*
385 * Keeps track of time, incremented by the core. We use this to
386 * avoid attributing multiple hits within the same tick.
387 *
388 * Access to tick_protected should be done with the spin lock held.
389 * It's copied to tick at the start of the map function (within the
390 * mutex).
391 */
392 spinlock_t tick_lock;
393 unsigned tick_protected;
394 unsigned tick;
395
396 /*
397 * A count of the number of times the map function has been called
398 * and found an entry in the pre_cache or cache. Currently used to
399 * calculate the generation.
400 */
401 unsigned hit_count;
402
403 /*
404 * A generation is a longish period that is used to trigger some
405 * book keeping effects. eg, decrementing hit counts on entries.
406 * This is needed to allow the cache to evolve as io patterns
407 * change.
408 */
409 unsigned generation;
410 unsigned generation_period; /* in lookups (will probably change) */
411
Joe Thornber78e03d62013-12-09 12:53:05 +0000412 unsigned discard_promote_adjustment;
413 unsigned read_promote_adjustment;
414 unsigned write_promote_adjustment;
415
Joe Thornberf2836352013-03-01 22:45:51 +0000416 /*
Joe Thornberf2836352013-03-01 22:45:51 +0000417 * The hash table allows us to quickly find an entry by origin
418 * block. Both pre_cache and cache entries are in here.
419 */
420 unsigned nr_buckets;
421 dm_block_t hash_bits;
422 struct hlist_head *table;
423};
424
Joe Thornber78e03d62013-12-09 12:53:05 +0000425#define DEFAULT_DISCARD_PROMOTE_ADJUSTMENT 1
426#define DEFAULT_READ_PROMOTE_ADJUSTMENT 4
427#define DEFAULT_WRITE_PROMOTE_ADJUSTMENT 8
Joe Thornberb155aa02014-10-22 14:30:58 +0100428#define DISCOURAGE_DEMOTING_DIRTY_THRESHOLD 128
Joe Thornber78e03d62013-12-09 12:53:05 +0000429
Joe Thornberf2836352013-03-01 22:45:51 +0000430/*----------------------------------------------------------------*/
Joe Thornberf2836352013-03-01 22:45:51 +0000431
432/*
433 * Simple hash table implementation. Should replace with the standard hash
434 * table that's making its way upstream.
435 */
436static void hash_insert(struct mq_policy *mq, struct entry *e)
437{
438 unsigned h = hash_64(from_oblock(e->oblock), mq->hash_bits);
439
440 hlist_add_head(&e->hlist, mq->table + h);
441}
442
443static struct entry *hash_lookup(struct mq_policy *mq, dm_oblock_t oblock)
444{
445 unsigned h = hash_64(from_oblock(oblock), mq->hash_bits);
446 struct hlist_head *bucket = mq->table + h;
447 struct entry *e;
448
449 hlist_for_each_entry(e, bucket, hlist)
450 if (e->oblock == oblock) {
451 hlist_del(&e->hlist);
452 hlist_add_head(&e->hlist, bucket);
453 return e;
454 }
455
456 return NULL;
457}
458
459static void hash_remove(struct entry *e)
460{
461 hlist_del(&e->hlist);
462}
463
464/*----------------------------------------------------------------*/
465
Joe Thornberf2836352013-03-01 22:45:51 +0000466static bool any_free_cblocks(struct mq_policy *mq)
467{
Joe Thornber633618e2013-11-09 11:12:51 +0000468 return !epool_empty(&mq->cache_pool);
Joe Thornberf2836352013-03-01 22:45:51 +0000469}
470
Joe Thornberc86c3072013-10-24 14:10:28 -0400471static bool any_clean_cblocks(struct mq_policy *mq)
472{
473 return !queue_empty(&mq->cache_clean);
474}
475
Joe Thornberf2836352013-03-01 22:45:51 +0000476/*----------------------------------------------------------------*/
477
478/*
479 * Now we get to the meat of the policy. This section deals with deciding
480 * when to to add entries to the pre_cache and cache, and move between
481 * them.
482 */
483
484/*
485 * The queue level is based on the log2 of the hit count.
486 */
487static unsigned queue_level(struct entry *e)
488{
489 return min((unsigned) ilog2(e->hit_count), NR_QUEUE_LEVELS - 1u);
490}
491
Joe Thornber633618e2013-11-09 11:12:51 +0000492static bool in_cache(struct mq_policy *mq, struct entry *e)
493{
494 return in_pool(&mq->cache_pool, e);
495}
496
Joe Thornberf2836352013-03-01 22:45:51 +0000497/*
498 * Inserts the entry into the pre_cache or the cache. Ensures the cache
Joe Thornber633618e2013-11-09 11:12:51 +0000499 * block is marked as allocated if necc. Inserts into the hash table.
500 * Sets the tick which records when the entry was last moved about.
Joe Thornberf2836352013-03-01 22:45:51 +0000501 */
502static void push(struct mq_policy *mq, struct entry *e)
503{
Joe Thornberf2836352013-03-01 22:45:51 +0000504 hash_insert(mq, e);
505
Joe Thornber633618e2013-11-09 11:12:51 +0000506 if (in_cache(mq, e))
Joe Thornber01911c12013-10-24 14:10:28 -0400507 queue_push(e->dirty ? &mq->cache_dirty : &mq->cache_clean,
508 queue_level(e), &e->list);
Joe Thornber633618e2013-11-09 11:12:51 +0000509 else
Joe Thornberf2836352013-03-01 22:45:51 +0000510 queue_push(&mq->pre_cache, queue_level(e), &e->list);
511}
512
513/*
514 * Removes an entry from pre_cache or cache. Removes from the hash table.
Joe Thornberf2836352013-03-01 22:45:51 +0000515 */
516static void del(struct mq_policy *mq, struct entry *e)
517{
Joe Thornber75da39b2015-02-20 12:58:03 +0000518 if (in_cache(mq, e))
519 queue_remove(e->dirty ? &mq->cache_dirty : &mq->cache_clean, &e->list);
520 else
521 queue_remove(&mq->pre_cache, &e->list);
522
Joe Thornberf2836352013-03-01 22:45:51 +0000523 hash_remove(e);
Joe Thornberf2836352013-03-01 22:45:51 +0000524}
525
526/*
527 * Like del, except it removes the first entry in the queue (ie. the least
528 * recently used).
529 */
530static struct entry *pop(struct mq_policy *mq, struct queue *q)
531{
Joe Thornber0184b442013-10-24 14:10:28 -0400532 struct entry *e;
533 struct list_head *h = queue_pop(q);
Joe Thornberf2836352013-03-01 22:45:51 +0000534
Joe Thornber0184b442013-10-24 14:10:28 -0400535 if (!h)
536 return NULL;
Joe Thornberf2836352013-03-01 22:45:51 +0000537
Joe Thornber0184b442013-10-24 14:10:28 -0400538 e = container_of(h, struct entry, list);
539 hash_remove(e);
Joe Thornberf2836352013-03-01 22:45:51 +0000540
541 return e;
542}
543
Joe Thornberb155aa02014-10-22 14:30:58 +0100544static struct entry *peek(struct queue *q)
545{
546 struct list_head *h = queue_peek(q);
547 return h ? container_of(h, struct entry, list) : NULL;
548}
549
Joe Thornberf2836352013-03-01 22:45:51 +0000550/*
Joe Thornberf2836352013-03-01 22:45:51 +0000551 * The promotion threshold is adjusted every generation. As are the counts
552 * of the entries.
553 *
554 * At the moment the threshold is taken by averaging the hit counts of some
Joe Thornber01911c12013-10-24 14:10:28 -0400555 * of the entries in the cache (the first 20 entries across all levels in
556 * ascending order, giving preference to the clean entries at each level).
Joe Thornberf2836352013-03-01 22:45:51 +0000557 *
558 * We can be much cleverer than this though. For example, each promotion
559 * could bump up the threshold helping to prevent churn. Much more to do
560 * here.
561 */
562
563#define MAX_TO_AVERAGE 20
564
565static void check_generation(struct mq_policy *mq)
566{
567 unsigned total = 0, nr = 0, count = 0, level;
568 struct list_head *head;
569 struct entry *e;
570
Joe Thornber633618e2013-11-09 11:12:51 +0000571 if ((mq->hit_count >= mq->generation_period) && (epool_empty(&mq->cache_pool))) {
Joe Thornberf2836352013-03-01 22:45:51 +0000572 mq->hit_count = 0;
573 mq->generation++;
574
575 for (level = 0; level < NR_QUEUE_LEVELS && count < MAX_TO_AVERAGE; level++) {
Joe Thornber01911c12013-10-24 14:10:28 -0400576 head = mq->cache_clean.qs + level;
577 list_for_each_entry(e, head, list) {
578 nr++;
579 total += e->hit_count;
580
581 if (++count >= MAX_TO_AVERAGE)
582 break;
583 }
584
585 head = mq->cache_dirty.qs + level;
Joe Thornberf2836352013-03-01 22:45:51 +0000586 list_for_each_entry(e, head, list) {
587 nr++;
588 total += e->hit_count;
589
590 if (++count >= MAX_TO_AVERAGE)
591 break;
592 }
593 }
Joe Thornberf2836352013-03-01 22:45:51 +0000594 }
595}
596
597/*
598 * Whenever we use an entry we bump up it's hit counter, and push it to the
599 * back to it's current level.
600 */
Joe Thornber3e45c912015-02-20 13:49:45 +0000601static void requeue(struct mq_policy *mq, struct entry *e)
Joe Thornberf2836352013-03-01 22:45:51 +0000602{
Joe Thornberf2836352013-03-01 22:45:51 +0000603 check_generation(mq);
Joe Thornberf2836352013-03-01 22:45:51 +0000604 del(mq, e);
605 push(mq, e);
606}
607
608/*
609 * Demote the least recently used entry from the cache to the pre_cache.
610 * Returns the new cache entry to use, and the old origin block it was
611 * mapped to.
612 *
613 * We drop the hit count on the demoted entry back to 1 to stop it bouncing
614 * straight back into the cache if it's subsequently hit. There are
615 * various options here, and more experimentation would be good:
616 *
617 * - just forget about the demoted entry completely (ie. don't insert it
618 into the pre_cache).
619 * - divide the hit count rather that setting to some hard coded value.
620 * - set the hit count to a hard coded value other than 1, eg, is it better
621 * if it goes in at level 2?
622 */
Joe Thornber633618e2013-11-09 11:12:51 +0000623static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock)
Joe Thornberf2836352013-03-01 22:45:51 +0000624{
Joe Thornber01911c12013-10-24 14:10:28 -0400625 struct entry *demoted = pop(mq, &mq->cache_clean);
Joe Thornberf2836352013-03-01 22:45:51 +0000626
Joe Thornber01911c12013-10-24 14:10:28 -0400627 if (!demoted)
628 /*
629 * We could get a block from mq->cache_dirty, but that
630 * would add extra latency to the triggering bio as it
631 * waits for the writeback. Better to not promote this
632 * time and hope there's a clean block next time this block
633 * is hit.
634 */
635 return -ENOSPC;
636
Joe Thornberf2836352013-03-01 22:45:51 +0000637 *oblock = demoted->oblock;
Joe Thornber633618e2013-11-09 11:12:51 +0000638 free_entry(&mq->cache_pool, demoted);
639
640 /*
641 * We used to put the demoted block into the pre-cache, but I think
642 * it's simpler to just let it work it's way up from zero again.
643 * Stops blocks flickering in and out of the cache.
644 */
Joe Thornberf2836352013-03-01 22:45:51 +0000645
Joe Thornber01911c12013-10-24 14:10:28 -0400646 return 0;
Joe Thornberf2836352013-03-01 22:45:51 +0000647}
648
649/*
Joe Thornberb155aa02014-10-22 14:30:58 +0100650 * Entries in the pre_cache whose hit count passes the promotion
651 * threshold move to the cache proper. Working out the correct
652 * value for the promotion_threshold is crucial to this policy.
653 */
654static unsigned promote_threshold(struct mq_policy *mq)
655{
656 struct entry *e;
657
658 if (any_free_cblocks(mq))
659 return 0;
660
661 e = peek(&mq->cache_clean);
662 if (e)
663 return e->hit_count;
664
665 e = peek(&mq->cache_dirty);
666 if (e)
667 return e->hit_count + DISCOURAGE_DEMOTING_DIRTY_THRESHOLD;
668
669 /* This should never happen */
670 return 0;
671}
672
673/*
Joe Thornberf2836352013-03-01 22:45:51 +0000674 * We modify the basic promotion_threshold depending on the specific io.
675 *
676 * If the origin block has been discarded then there's no cost to copy it
677 * to the cache.
678 *
679 * We bias towards reads, since they can be demoted at no cost if they
680 * haven't been dirtied.
681 */
Joe Thornberf2836352013-03-01 22:45:51 +0000682static unsigned adjusted_promote_threshold(struct mq_policy *mq,
683 bool discarded_oblock, int data_dir)
684{
Joe Thornberc86c3072013-10-24 14:10:28 -0400685 if (data_dir == READ)
Joe Thornberb155aa02014-10-22 14:30:58 +0100686 return promote_threshold(mq) + mq->read_promote_adjustment;
Joe Thornberc86c3072013-10-24 14:10:28 -0400687
688 if (discarded_oblock && (any_free_cblocks(mq) || any_clean_cblocks(mq))) {
Joe Thornberf2836352013-03-01 22:45:51 +0000689 /*
690 * We don't need to do any copying at all, so give this a
Joe Thornberc86c3072013-10-24 14:10:28 -0400691 * very low threshold.
Joe Thornberf2836352013-03-01 22:45:51 +0000692 */
Joe Thornber78e03d62013-12-09 12:53:05 +0000693 return mq->discard_promote_adjustment;
Joe Thornberc86c3072013-10-24 14:10:28 -0400694 }
Joe Thornberf2836352013-03-01 22:45:51 +0000695
Joe Thornberb155aa02014-10-22 14:30:58 +0100696 return promote_threshold(mq) + mq->write_promote_adjustment;
Joe Thornberf2836352013-03-01 22:45:51 +0000697}
698
699static bool should_promote(struct mq_policy *mq, struct entry *e,
700 bool discarded_oblock, int data_dir)
701{
702 return e->hit_count >=
703 adjusted_promote_threshold(mq, discarded_oblock, data_dir);
704}
705
706static int cache_entry_found(struct mq_policy *mq,
707 struct entry *e,
708 struct policy_result *result)
709{
Joe Thornber3e45c912015-02-20 13:49:45 +0000710 requeue(mq, e);
Joe Thornberf2836352013-03-01 22:45:51 +0000711
Joe Thornber633618e2013-11-09 11:12:51 +0000712 if (in_cache(mq, e)) {
Joe Thornberf2836352013-03-01 22:45:51 +0000713 result->op = POLICY_HIT;
Joe Thornber633618e2013-11-09 11:12:51 +0000714 result->cblock = infer_cblock(&mq->cache_pool, e);
Joe Thornberf2836352013-03-01 22:45:51 +0000715 }
716
717 return 0;
718}
719
720/*
Joe Thornber0184b442013-10-24 14:10:28 -0400721 * Moves an entry from the pre_cache to the cache. The main work is
Joe Thornberf2836352013-03-01 22:45:51 +0000722 * finding which cache block to use.
723 */
724static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e,
725 struct policy_result *result)
726{
Joe Thornber01911c12013-10-24 14:10:28 -0400727 int r;
Joe Thornber633618e2013-11-09 11:12:51 +0000728 struct entry *new_e;
Joe Thornberf2836352013-03-01 22:45:51 +0000729
Joe Thornber633618e2013-11-09 11:12:51 +0000730 /* Ensure there's a free cblock in the cache */
731 if (epool_empty(&mq->cache_pool)) {
Joe Thornberf2836352013-03-01 22:45:51 +0000732 result->op = POLICY_REPLACE;
Joe Thornber633618e2013-11-09 11:12:51 +0000733 r = demote_cblock(mq, &result->old_oblock);
Joe Thornber01911c12013-10-24 14:10:28 -0400734 if (r) {
735 result->op = POLICY_MISS;
736 return 0;
737 }
Joe Thornberf2836352013-03-01 22:45:51 +0000738 } else
739 result->op = POLICY_NEW;
740
Joe Thornber633618e2013-11-09 11:12:51 +0000741 new_e = alloc_entry(&mq->cache_pool);
742 BUG_ON(!new_e);
743
744 new_e->oblock = e->oblock;
745 new_e->dirty = false;
746 new_e->hit_count = e->hit_count;
747 new_e->generation = e->generation;
Joe Thornberf2836352013-03-01 22:45:51 +0000748
749 del(mq, e);
Joe Thornber633618e2013-11-09 11:12:51 +0000750 free_entry(&mq->pre_cache_pool, e);
751 push(mq, new_e);
752
753 result->cblock = infer_cblock(&mq->cache_pool, new_e);
Joe Thornberf2836352013-03-01 22:45:51 +0000754
755 return 0;
756}
757
758static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e,
759 bool can_migrate, bool discarded_oblock,
760 int data_dir, struct policy_result *result)
761{
762 int r = 0;
Joe Thornberf2836352013-03-01 22:45:51 +0000763
Joe Thornber3e45c912015-02-20 13:49:45 +0000764 if (!should_promote(mq, e, discarded_oblock, data_dir)) {
765 requeue(mq, e);
Joe Thornberf2836352013-03-01 22:45:51 +0000766 result->op = POLICY_MISS;
Joe Thornberaf95e7a2013-11-15 10:51:20 +0000767
768 } else if (!can_migrate)
Joe Thornberf2836352013-03-01 22:45:51 +0000769 r = -EWOULDBLOCK;
Joe Thornberaf95e7a2013-11-15 10:51:20 +0000770
771 else {
Joe Thornber3e45c912015-02-20 13:49:45 +0000772 requeue(mq, e);
Joe Thornberf2836352013-03-01 22:45:51 +0000773 r = pre_cache_to_cache(mq, e, result);
Joe Thornberaf95e7a2013-11-15 10:51:20 +0000774 }
Joe Thornberf2836352013-03-01 22:45:51 +0000775
776 return r;
777}
778
779static void insert_in_pre_cache(struct mq_policy *mq,
780 dm_oblock_t oblock)
781{
Joe Thornber633618e2013-11-09 11:12:51 +0000782 struct entry *e = alloc_entry(&mq->pre_cache_pool);
Joe Thornberf2836352013-03-01 22:45:51 +0000783
784 if (!e)
785 /*
786 * There's no spare entry structure, so we grab the least
787 * used one from the pre_cache.
788 */
789 e = pop(mq, &mq->pre_cache);
790
791 if (unlikely(!e)) {
792 DMWARN("couldn't pop from pre cache");
793 return;
794 }
795
Joe Thornber633618e2013-11-09 11:12:51 +0000796 e->dirty = false;
797 e->oblock = oblock;
798 e->hit_count = 1;
799 e->generation = mq->generation;
800 push(mq, e);
Joe Thornberf2836352013-03-01 22:45:51 +0000801}
802
803static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock,
804 struct policy_result *result)
805{
Joe Thornberc86c3072013-10-24 14:10:28 -0400806 int r;
Joe Thornberf2836352013-03-01 22:45:51 +0000807 struct entry *e;
Joe Thornberf2836352013-03-01 22:45:51 +0000808
Joe Thornber633618e2013-11-09 11:12:51 +0000809 if (epool_empty(&mq->cache_pool)) {
810 result->op = POLICY_REPLACE;
811 r = demote_cblock(mq, &result->old_oblock);
Joe Thornberc86c3072013-10-24 14:10:28 -0400812 if (unlikely(r)) {
813 result->op = POLICY_MISS;
814 insert_in_pre_cache(mq, oblock);
815 return;
816 }
Joe Thornberf2836352013-03-01 22:45:51 +0000817
Joe Thornberc86c3072013-10-24 14:10:28 -0400818 /*
819 * This will always succeed, since we've just demoted.
820 */
Joe Thornber633618e2013-11-09 11:12:51 +0000821 e = alloc_entry(&mq->cache_pool);
822 BUG_ON(!e);
Joe Thornberc86c3072013-10-24 14:10:28 -0400823
824 } else {
Joe Thornber633618e2013-11-09 11:12:51 +0000825 e = alloc_entry(&mq->cache_pool);
Joe Thornberc86c3072013-10-24 14:10:28 -0400826 result->op = POLICY_NEW;
Joe Thornberf2836352013-03-01 22:45:51 +0000827 }
828
829 e->oblock = oblock;
Joe Thornber01911c12013-10-24 14:10:28 -0400830 e->dirty = false;
Joe Thornberf2836352013-03-01 22:45:51 +0000831 e->hit_count = 1;
832 e->generation = mq->generation;
833 push(mq, e);
834
Joe Thornber633618e2013-11-09 11:12:51 +0000835 result->cblock = infer_cblock(&mq->cache_pool, e);
Joe Thornberf2836352013-03-01 22:45:51 +0000836}
837
838static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock,
839 bool can_migrate, bool discarded_oblock,
840 int data_dir, struct policy_result *result)
841{
Joe Thornber78e03d62013-12-09 12:53:05 +0000842 if (adjusted_promote_threshold(mq, discarded_oblock, data_dir) <= 1) {
Joe Thornberf2836352013-03-01 22:45:51 +0000843 if (can_migrate)
844 insert_in_cache(mq, oblock, result);
845 else
846 return -EWOULDBLOCK;
847 } else {
848 insert_in_pre_cache(mq, oblock);
849 result->op = POLICY_MISS;
850 }
851
852 return 0;
853}
854
855/*
856 * Looks the oblock up in the hash table, then decides whether to put in
857 * pre_cache, or cache etc.
858 */
859static int map(struct mq_policy *mq, dm_oblock_t oblock,
860 bool can_migrate, bool discarded_oblock,
861 int data_dir, struct policy_result *result)
862{
863 int r = 0;
864 struct entry *e = hash_lookup(mq, oblock);
865
Joe Thornber633618e2013-11-09 11:12:51 +0000866 if (e && in_cache(mq, e))
Joe Thornberf2836352013-03-01 22:45:51 +0000867 r = cache_entry_found(mq, e, result);
Joe Thornber633618e2013-11-09 11:12:51 +0000868
Mike Snitzerf1afb362014-10-30 10:02:01 -0400869 else if (mq->tracker.thresholds[PATTERN_SEQUENTIAL] &&
870 iot_pattern(&mq->tracker) == PATTERN_SEQUENTIAL)
Joe Thornberf2836352013-03-01 22:45:51 +0000871 result->op = POLICY_MISS;
Joe Thornber633618e2013-11-09 11:12:51 +0000872
Joe Thornberf2836352013-03-01 22:45:51 +0000873 else if (e)
874 r = pre_cache_entry_found(mq, e, can_migrate, discarded_oblock,
875 data_dir, result);
Joe Thornber633618e2013-11-09 11:12:51 +0000876
Joe Thornberf2836352013-03-01 22:45:51 +0000877 else
878 r = no_entry_found(mq, oblock, can_migrate, discarded_oblock,
879 data_dir, result);
880
881 if (r == -EWOULDBLOCK)
882 result->op = POLICY_MISS;
883
884 return r;
885}
886
887/*----------------------------------------------------------------*/
888
889/*
890 * Public interface, via the policy struct. See dm-cache-policy.h for a
891 * description of these.
892 */
893
894static struct mq_policy *to_mq_policy(struct dm_cache_policy *p)
895{
896 return container_of(p, struct mq_policy, policy);
897}
898
899static void mq_destroy(struct dm_cache_policy *p)
900{
901 struct mq_policy *mq = to_mq_policy(p);
902
Heinz Mauelshagen14f398c2014-02-28 12:02:56 -0500903 vfree(mq->table);
Joe Thornber633618e2013-11-09 11:12:51 +0000904 epool_exit(&mq->cache_pool);
905 epool_exit(&mq->pre_cache_pool);
Joe Thornberf2836352013-03-01 22:45:51 +0000906 kfree(mq);
907}
908
Joe Thornber3e45c912015-02-20 13:49:45 +0000909static void update_pre_cache_hits(struct list_head *h, void *context)
910{
911 struct entry *e = container_of(h, struct entry, list);
912 e->hit_count++;
913}
914
915static void update_cache_hits(struct list_head *h, void *context)
916{
917 struct mq_policy *mq = context;
918 struct entry *e = container_of(h, struct entry, list);
919 e->hit_count++;
920 mq->hit_count++;
921}
922
Joe Thornberf2836352013-03-01 22:45:51 +0000923static void copy_tick(struct mq_policy *mq)
924{
Joe Thornber3e45c912015-02-20 13:49:45 +0000925 unsigned long flags, tick;
Joe Thornberf2836352013-03-01 22:45:51 +0000926
927 spin_lock_irqsave(&mq->tick_lock, flags);
Joe Thornber3e45c912015-02-20 13:49:45 +0000928 tick = mq->tick_protected;
929 if (tick != mq->tick) {
930 queue_iterate_tick(&mq->pre_cache, update_pre_cache_hits, mq);
931 queue_iterate_tick(&mq->cache_dirty, update_cache_hits, mq);
932 queue_iterate_tick(&mq->cache_clean, update_cache_hits, mq);
933 mq->tick = tick;
934 }
935
936 queue_tick(&mq->pre_cache);
937 queue_tick(&mq->cache_dirty);
938 queue_tick(&mq->cache_clean);
Joe Thornberf2836352013-03-01 22:45:51 +0000939 spin_unlock_irqrestore(&mq->tick_lock, flags);
940}
941
942static int mq_map(struct dm_cache_policy *p, dm_oblock_t oblock,
943 bool can_block, bool can_migrate, bool discarded_oblock,
944 struct bio *bio, struct policy_result *result)
945{
946 int r;
947 struct mq_policy *mq = to_mq_policy(p);
948
949 result->op = POLICY_MISS;
950
951 if (can_block)
952 mutex_lock(&mq->lock);
953 else if (!mutex_trylock(&mq->lock))
954 return -EWOULDBLOCK;
955
956 copy_tick(mq);
957
958 iot_examine_bio(&mq->tracker, bio);
959 r = map(mq, oblock, can_migrate, discarded_oblock,
960 bio_data_dir(bio), result);
961
962 mutex_unlock(&mq->lock);
963
964 return r;
965}
966
967static int mq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock)
968{
969 int r;
970 struct mq_policy *mq = to_mq_policy(p);
971 struct entry *e;
972
973 if (!mutex_trylock(&mq->lock))
974 return -EWOULDBLOCK;
975
976 e = hash_lookup(mq, oblock);
Joe Thornber633618e2013-11-09 11:12:51 +0000977 if (e && in_cache(mq, e)) {
978 *cblock = infer_cblock(&mq->cache_pool, e);
Joe Thornberf2836352013-03-01 22:45:51 +0000979 r = 0;
980 } else
981 r = -ENOENT;
982
983 mutex_unlock(&mq->lock);
984
985 return r;
986}
987
Joe Thornber633618e2013-11-09 11:12:51 +0000988static void __mq_set_clear_dirty(struct mq_policy *mq, dm_oblock_t oblock, bool set)
Joe Thornber01911c12013-10-24 14:10:28 -0400989{
Joe Thornber01911c12013-10-24 14:10:28 -0400990 struct entry *e;
991
Joe Thornber01911c12013-10-24 14:10:28 -0400992 e = hash_lookup(mq, oblock);
Joe Thornber633618e2013-11-09 11:12:51 +0000993 BUG_ON(!e || !in_cache(mq, e));
Joe Thornber01911c12013-10-24 14:10:28 -0400994
Joe Thornber633618e2013-11-09 11:12:51 +0000995 del(mq, e);
996 e->dirty = set;
997 push(mq, e);
Joe Thornber01911c12013-10-24 14:10:28 -0400998}
999
1000static void mq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
1001{
Joe Thornber633618e2013-11-09 11:12:51 +00001002 struct mq_policy *mq = to_mq_policy(p);
1003
1004 mutex_lock(&mq->lock);
1005 __mq_set_clear_dirty(mq, oblock, true);
1006 mutex_unlock(&mq->lock);
Joe Thornber01911c12013-10-24 14:10:28 -04001007}
1008
1009static void mq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
1010{
Joe Thornber633618e2013-11-09 11:12:51 +00001011 struct mq_policy *mq = to_mq_policy(p);
1012
1013 mutex_lock(&mq->lock);
1014 __mq_set_clear_dirty(mq, oblock, false);
1015 mutex_unlock(&mq->lock);
Joe Thornber01911c12013-10-24 14:10:28 -04001016}
1017
Joe Thornberf2836352013-03-01 22:45:51 +00001018static int mq_load_mapping(struct dm_cache_policy *p,
1019 dm_oblock_t oblock, dm_cblock_t cblock,
1020 uint32_t hint, bool hint_valid)
1021{
1022 struct mq_policy *mq = to_mq_policy(p);
1023 struct entry *e;
1024
Joe Thornber633618e2013-11-09 11:12:51 +00001025 e = alloc_particular_entry(&mq->cache_pool, cblock);
Joe Thornberf2836352013-03-01 22:45:51 +00001026 e->oblock = oblock;
Joe Thornber01911c12013-10-24 14:10:28 -04001027 e->dirty = false; /* this gets corrected in a minute */
Joe Thornberf2836352013-03-01 22:45:51 +00001028 e->hit_count = hint_valid ? hint : 1;
1029 e->generation = mq->generation;
1030 push(mq, e);
1031
1032 return 0;
1033}
1034
Joe Thornber633618e2013-11-09 11:12:51 +00001035static int mq_save_hints(struct mq_policy *mq, struct queue *q,
1036 policy_walk_fn fn, void *context)
1037{
1038 int r;
1039 unsigned level;
Joe Thornber3e45c912015-02-20 13:49:45 +00001040 struct list_head *h;
Joe Thornber633618e2013-11-09 11:12:51 +00001041 struct entry *e;
1042
1043 for (level = 0; level < NR_QUEUE_LEVELS; level++)
Joe Thornber3e45c912015-02-20 13:49:45 +00001044 list_for_each(h, q->qs + level) {
1045 if (is_sentinel(q, h))
1046 continue;
1047
1048 e = container_of(h, struct entry, list);
Joe Thornber633618e2013-11-09 11:12:51 +00001049 r = fn(context, infer_cblock(&mq->cache_pool, e),
1050 e->oblock, e->hit_count);
1051 if (r)
1052 return r;
1053 }
1054
1055 return 0;
1056}
1057
Joe Thornberf2836352013-03-01 22:45:51 +00001058static int mq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn,
1059 void *context)
1060{
1061 struct mq_policy *mq = to_mq_policy(p);
1062 int r = 0;
Joe Thornberf2836352013-03-01 22:45:51 +00001063
1064 mutex_lock(&mq->lock);
1065
Joe Thornber633618e2013-11-09 11:12:51 +00001066 r = mq_save_hints(mq, &mq->cache_clean, fn, context);
1067 if (!r)
1068 r = mq_save_hints(mq, &mq->cache_dirty, fn, context);
Joe Thornber01911c12013-10-24 14:10:28 -04001069
Joe Thornberf2836352013-03-01 22:45:51 +00001070 mutex_unlock(&mq->lock);
1071
1072 return r;
1073}
1074
Joe Thornber633618e2013-11-09 11:12:51 +00001075static void __remove_mapping(struct mq_policy *mq, dm_oblock_t oblock)
1076{
1077 struct entry *e;
1078
1079 e = hash_lookup(mq, oblock);
1080 BUG_ON(!e || !in_cache(mq, e));
1081
1082 del(mq, e);
1083 free_entry(&mq->cache_pool, e);
1084}
1085
Geert Uytterhoevenb936bf82013-07-26 09:57:31 +02001086static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
Joe Thornberf2836352013-03-01 22:45:51 +00001087{
Geert Uytterhoevenb936bf82013-07-26 09:57:31 +02001088 struct mq_policy *mq = to_mq_policy(p);
Geert Uytterhoevenb936bf82013-07-26 09:57:31 +02001089
1090 mutex_lock(&mq->lock);
Joe Thornber633618e2013-11-09 11:12:51 +00001091 __remove_mapping(mq, oblock);
Joe Thornberf2836352013-03-01 22:45:51 +00001092 mutex_unlock(&mq->lock);
1093}
1094
Joe Thornber532906a2013-11-08 16:36:17 +00001095static int __remove_cblock(struct mq_policy *mq, dm_cblock_t cblock)
1096{
1097 struct entry *e = epool_find(&mq->cache_pool, cblock);
1098
1099 if (!e)
1100 return -ENODATA;
1101
1102 del(mq, e);
1103 free_entry(&mq->cache_pool, e);
1104
1105 return 0;
1106}
1107
1108static int mq_remove_cblock(struct dm_cache_policy *p, dm_cblock_t cblock)
1109{
1110 int r;
1111 struct mq_policy *mq = to_mq_policy(p);
1112
1113 mutex_lock(&mq->lock);
1114 r = __remove_cblock(mq, cblock);
1115 mutex_unlock(&mq->lock);
1116
1117 return r;
1118}
1119
Joe Thornber01911c12013-10-24 14:10:28 -04001120static int __mq_writeback_work(struct mq_policy *mq, dm_oblock_t *oblock,
1121 dm_cblock_t *cblock)
1122{
1123 struct entry *e = pop(mq, &mq->cache_dirty);
1124
1125 if (!e)
1126 return -ENODATA;
1127
1128 *oblock = e->oblock;
Joe Thornber633618e2013-11-09 11:12:51 +00001129 *cblock = infer_cblock(&mq->cache_pool, e);
Joe Thornber01911c12013-10-24 14:10:28 -04001130 e->dirty = false;
1131 push(mq, e);
1132
1133 return 0;
1134}
1135
1136static int mq_writeback_work(struct dm_cache_policy *p, dm_oblock_t *oblock,
1137 dm_cblock_t *cblock)
1138{
1139 int r;
1140 struct mq_policy *mq = to_mq_policy(p);
1141
1142 mutex_lock(&mq->lock);
1143 r = __mq_writeback_work(mq, oblock, cblock);
1144 mutex_unlock(&mq->lock);
1145
1146 return r;
1147}
1148
Joe Thornber633618e2013-11-09 11:12:51 +00001149static void __force_mapping(struct mq_policy *mq,
1150 dm_oblock_t current_oblock, dm_oblock_t new_oblock)
Joe Thornberf2836352013-03-01 22:45:51 +00001151{
1152 struct entry *e = hash_lookup(mq, current_oblock);
1153
Joe Thornber633618e2013-11-09 11:12:51 +00001154 if (e && in_cache(mq, e)) {
1155 del(mq, e);
1156 e->oblock = new_oblock;
1157 e->dirty = true;
1158 push(mq, e);
1159 }
Joe Thornberf2836352013-03-01 22:45:51 +00001160}
1161
1162static void mq_force_mapping(struct dm_cache_policy *p,
1163 dm_oblock_t current_oblock, dm_oblock_t new_oblock)
1164{
1165 struct mq_policy *mq = to_mq_policy(p);
1166
1167 mutex_lock(&mq->lock);
Joe Thornber633618e2013-11-09 11:12:51 +00001168 __force_mapping(mq, current_oblock, new_oblock);
Joe Thornberf2836352013-03-01 22:45:51 +00001169 mutex_unlock(&mq->lock);
1170}
1171
1172static dm_cblock_t mq_residency(struct dm_cache_policy *p)
1173{
Joe Thornber99ba2ae2013-10-21 11:44:57 +01001174 dm_cblock_t r;
Joe Thornberf2836352013-03-01 22:45:51 +00001175 struct mq_policy *mq = to_mq_policy(p);
1176
Joe Thornber99ba2ae2013-10-21 11:44:57 +01001177 mutex_lock(&mq->lock);
Joe Thornber633618e2013-11-09 11:12:51 +00001178 r = to_cblock(mq->cache_pool.nr_allocated);
Joe Thornber99ba2ae2013-10-21 11:44:57 +01001179 mutex_unlock(&mq->lock);
1180
1181 return r;
Joe Thornberf2836352013-03-01 22:45:51 +00001182}
1183
1184static void mq_tick(struct dm_cache_policy *p)
1185{
1186 struct mq_policy *mq = to_mq_policy(p);
1187 unsigned long flags;
1188
1189 spin_lock_irqsave(&mq->tick_lock, flags);
1190 mq->tick_protected++;
1191 spin_unlock_irqrestore(&mq->tick_lock, flags);
1192}
1193
1194static int mq_set_config_value(struct dm_cache_policy *p,
1195 const char *key, const char *value)
1196{
1197 struct mq_policy *mq = to_mq_policy(p);
Joe Thornberf2836352013-03-01 22:45:51 +00001198 unsigned long tmp;
1199
Joe Thornberf2836352013-03-01 22:45:51 +00001200 if (kstrtoul(value, 10, &tmp))
1201 return -EINVAL;
1202
Joe Thornber78e03d62013-12-09 12:53:05 +00001203 if (!strcasecmp(key, "random_threshold")) {
1204 mq->tracker.thresholds[PATTERN_RANDOM] = tmp;
1205
1206 } else if (!strcasecmp(key, "sequential_threshold")) {
1207 mq->tracker.thresholds[PATTERN_SEQUENTIAL] = tmp;
1208
1209 } else if (!strcasecmp(key, "discard_promote_adjustment"))
1210 mq->discard_promote_adjustment = tmp;
1211
1212 else if (!strcasecmp(key, "read_promote_adjustment"))
1213 mq->read_promote_adjustment = tmp;
1214
1215 else if (!strcasecmp(key, "write_promote_adjustment"))
1216 mq->write_promote_adjustment = tmp;
1217
1218 else
1219 return -EINVAL;
Joe Thornberf2836352013-03-01 22:45:51 +00001220
1221 return 0;
1222}
1223
1224static int mq_emit_config_values(struct dm_cache_policy *p, char *result, unsigned maxlen)
1225{
1226 ssize_t sz = 0;
1227 struct mq_policy *mq = to_mq_policy(p);
1228
Joe Thornber78e03d62013-12-09 12:53:05 +00001229 DMEMIT("10 random_threshold %u "
1230 "sequential_threshold %u "
1231 "discard_promote_adjustment %u "
1232 "read_promote_adjustment %u "
1233 "write_promote_adjustment %u",
Joe Thornberf2836352013-03-01 22:45:51 +00001234 mq->tracker.thresholds[PATTERN_RANDOM],
Joe Thornber78e03d62013-12-09 12:53:05 +00001235 mq->tracker.thresholds[PATTERN_SEQUENTIAL],
1236 mq->discard_promote_adjustment,
1237 mq->read_promote_adjustment,
1238 mq->write_promote_adjustment);
Joe Thornberf2836352013-03-01 22:45:51 +00001239
1240 return 0;
1241}
1242
1243/* Init the policy plugin interface function pointers. */
1244static void init_policy_functions(struct mq_policy *mq)
1245{
1246 mq->policy.destroy = mq_destroy;
1247 mq->policy.map = mq_map;
1248 mq->policy.lookup = mq_lookup;
Joe Thornber01911c12013-10-24 14:10:28 -04001249 mq->policy.set_dirty = mq_set_dirty;
1250 mq->policy.clear_dirty = mq_clear_dirty;
Joe Thornberf2836352013-03-01 22:45:51 +00001251 mq->policy.load_mapping = mq_load_mapping;
1252 mq->policy.walk_mappings = mq_walk_mappings;
1253 mq->policy.remove_mapping = mq_remove_mapping;
Joe Thornber532906a2013-11-08 16:36:17 +00001254 mq->policy.remove_cblock = mq_remove_cblock;
Joe Thornber01911c12013-10-24 14:10:28 -04001255 mq->policy.writeback_work = mq_writeback_work;
Joe Thornberf2836352013-03-01 22:45:51 +00001256 mq->policy.force_mapping = mq_force_mapping;
1257 mq->policy.residency = mq_residency;
1258 mq->policy.tick = mq_tick;
1259 mq->policy.emit_config_values = mq_emit_config_values;
1260 mq->policy.set_config_value = mq_set_config_value;
1261}
1262
1263static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
1264 sector_t origin_size,
1265 sector_t cache_block_size)
1266{
Joe Thornberf2836352013-03-01 22:45:51 +00001267 struct mq_policy *mq = kzalloc(sizeof(*mq), GFP_KERNEL);
1268
1269 if (!mq)
1270 return NULL;
1271
1272 init_policy_functions(mq);
1273 iot_init(&mq->tracker, SEQUENTIAL_THRESHOLD_DEFAULT, RANDOM_THRESHOLD_DEFAULT);
Joe Thornberf2836352013-03-01 22:45:51 +00001274 mq->cache_size = cache_size;
Joe Thornber633618e2013-11-09 11:12:51 +00001275
1276 if (epool_init(&mq->pre_cache_pool, from_cblock(cache_size))) {
1277 DMERR("couldn't initialize pool of pre-cache entries");
1278 goto bad_pre_cache_init;
1279 }
1280
1281 if (epool_init(&mq->cache_pool, from_cblock(cache_size))) {
1282 DMERR("couldn't initialize pool of cache entries");
1283 goto bad_cache_init;
1284 }
1285
Joe Thornberf2836352013-03-01 22:45:51 +00001286 mq->tick_protected = 0;
1287 mq->tick = 0;
1288 mq->hit_count = 0;
1289 mq->generation = 0;
Joe Thornber78e03d62013-12-09 12:53:05 +00001290 mq->discard_promote_adjustment = DEFAULT_DISCARD_PROMOTE_ADJUSTMENT;
1291 mq->read_promote_adjustment = DEFAULT_READ_PROMOTE_ADJUSTMENT;
1292 mq->write_promote_adjustment = DEFAULT_WRITE_PROMOTE_ADJUSTMENT;
Joe Thornberf2836352013-03-01 22:45:51 +00001293 mutex_init(&mq->lock);
1294 spin_lock_init(&mq->tick_lock);
Joe Thornberf2836352013-03-01 22:45:51 +00001295
1296 queue_init(&mq->pre_cache);
Joe Thornber01911c12013-10-24 14:10:28 -04001297 queue_init(&mq->cache_clean);
1298 queue_init(&mq->cache_dirty);
1299
Joe Thornberf2836352013-03-01 22:45:51 +00001300 mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U);
1301
Joe Thornberf2836352013-03-01 22:45:51 +00001302 mq->nr_buckets = next_power(from_cblock(cache_size) / 2, 16);
1303 mq->hash_bits = ffs(mq->nr_buckets) - 1;
Heinz Mauelshagen14f398c2014-02-28 12:02:56 -05001304 mq->table = vzalloc(sizeof(*mq->table) * mq->nr_buckets);
Joe Thornberf2836352013-03-01 22:45:51 +00001305 if (!mq->table)
1306 goto bad_alloc_table;
1307
Joe Thornberf2836352013-03-01 22:45:51 +00001308 return &mq->policy;
1309
Joe Thornberf2836352013-03-01 22:45:51 +00001310bad_alloc_table:
Joe Thornber633618e2013-11-09 11:12:51 +00001311 epool_exit(&mq->cache_pool);
1312bad_cache_init:
1313 epool_exit(&mq->pre_cache_pool);
1314bad_pre_cache_init:
Joe Thornberf2836352013-03-01 22:45:51 +00001315 kfree(mq);
1316
1317 return NULL;
1318}
1319
1320/*----------------------------------------------------------------*/
1321
1322static struct dm_cache_policy_type mq_policy_type = {
1323 .name = "mq",
Mike Snitzerf1afb362014-10-30 10:02:01 -04001324 .version = {1, 3, 0},
Joe Thornberf2836352013-03-01 22:45:51 +00001325 .hint_size = 4,
1326 .owner = THIS_MODULE,
1327 .create = mq_create
1328};
1329
1330static struct dm_cache_policy_type default_policy_type = {
1331 .name = "default",
Mike Snitzerf1afb362014-10-30 10:02:01 -04001332 .version = {1, 3, 0},
Joe Thornberf2836352013-03-01 22:45:51 +00001333 .hint_size = 4,
1334 .owner = THIS_MODULE,
Mike Snitzer2e68c4e2014-01-15 21:06:55 -05001335 .create = mq_create,
1336 .real = &mq_policy_type
Joe Thornberf2836352013-03-01 22:45:51 +00001337};
1338
1339static int __init mq_init(void)
1340{
1341 int r;
1342
1343 mq_entry_cache = kmem_cache_create("dm_mq_policy_cache_entry",
1344 sizeof(struct entry),
1345 __alignof__(struct entry),
1346 0, NULL);
1347 if (!mq_entry_cache)
1348 goto bad;
1349
1350 r = dm_cache_policy_register(&mq_policy_type);
1351 if (r) {
1352 DMERR("register failed %d", r);
1353 goto bad_register_mq;
1354 }
1355
1356 r = dm_cache_policy_register(&default_policy_type);
1357 if (!r) {
Mike Snitzer4e7f5062013-03-20 17:21:27 +00001358 DMINFO("version %u.%u.%u loaded",
1359 mq_policy_type.version[0],
1360 mq_policy_type.version[1],
1361 mq_policy_type.version[2]);
Joe Thornberf2836352013-03-01 22:45:51 +00001362 return 0;
1363 }
1364
1365 DMERR("register failed (as default) %d", r);
1366
1367 dm_cache_policy_unregister(&mq_policy_type);
1368bad_register_mq:
1369 kmem_cache_destroy(mq_entry_cache);
1370bad:
1371 return -ENOMEM;
1372}
1373
1374static void __exit mq_exit(void)
1375{
1376 dm_cache_policy_unregister(&mq_policy_type);
1377 dm_cache_policy_unregister(&default_policy_type);
1378
1379 kmem_cache_destroy(mq_entry_cache);
1380}
1381
1382module_init(mq_init);
1383module_exit(mq_exit);
1384
1385MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
1386MODULE_LICENSE("GPL");
1387MODULE_DESCRIPTION("mq cache policy");
1388
1389MODULE_ALIAS("dm-cache-default");