blob: 6f9bdd1bf7c44d8544f7c30742242ecbe8293e52 [file] [log] [blame]
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001/*
2 * Copyright (C) 2012 Red Hat. All rights reserved.
3 *
4 * This file is released under the GPL.
5 */
6
7#include "dm.h"
8#include "dm-bio-prison.h"
Darrick J. Wongb844fe62013-04-05 15:36:32 +01009#include "dm-bio-record.h"
Joe Thornberc6b4fcb2013-03-01 22:45:51 +000010#include "dm-cache-metadata.h"
11
12#include <linux/dm-io.h>
13#include <linux/dm-kcopyd.h>
Manuel Schölling0f30af92014-05-22 22:42:37 +020014#include <linux/jiffies.h>
Joe Thornberc6b4fcb2013-03-01 22:45:51 +000015#include <linux/init.h>
16#include <linux/mempool.h>
17#include <linux/module.h>
18#include <linux/slab.h>
19#include <linux/vmalloc.h>
20
21#define DM_MSG_PREFIX "cache"
22
23DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle,
24 "A percentage of time allocated for copying to and/or from cache");
25
26/*----------------------------------------------------------------*/
27
Joe Thornber77289d32015-05-15 13:45:30 +010028#define IOT_RESOLUTION 4
29
30struct io_tracker {
31 spinlock_t lock;
32
33 /*
34 * Sectors of in-flight IO.
35 */
36 sector_t in_flight;
37
38 /*
39 * The time, in jiffies, when this device became idle (if it is
40 * indeed idle).
41 */
42 unsigned long idle_time;
43 unsigned long last_update_time;
44};
45
46static void iot_init(struct io_tracker *iot)
47{
48 spin_lock_init(&iot->lock);
49 iot->in_flight = 0ul;
50 iot->idle_time = 0ul;
51 iot->last_update_time = jiffies;
52}
53
54static bool __iot_idle_for(struct io_tracker *iot, unsigned long jifs)
55{
56 if (iot->in_flight)
57 return false;
58
59 return time_after(jiffies, iot->idle_time + jifs);
60}
61
62static bool iot_idle_for(struct io_tracker *iot, unsigned long jifs)
63{
64 bool r;
65 unsigned long flags;
66
67 spin_lock_irqsave(&iot->lock, flags);
68 r = __iot_idle_for(iot, jifs);
69 spin_unlock_irqrestore(&iot->lock, flags);
70
71 return r;
72}
73
74static void iot_io_begin(struct io_tracker *iot, sector_t len)
75{
76 unsigned long flags;
77
78 spin_lock_irqsave(&iot->lock, flags);
79 iot->in_flight += len;
80 spin_unlock_irqrestore(&iot->lock, flags);
81}
82
83static void __iot_io_end(struct io_tracker *iot, sector_t len)
84{
85 iot->in_flight -= len;
86 if (!iot->in_flight)
87 iot->idle_time = jiffies;
88}
89
90static void iot_io_end(struct io_tracker *iot, sector_t len)
91{
92 unsigned long flags;
93
94 spin_lock_irqsave(&iot->lock, flags);
95 __iot_io_end(iot, len);
96 spin_unlock_irqrestore(&iot->lock, flags);
97}
98
99/*----------------------------------------------------------------*/
100
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000101/*
102 * Glossary:
103 *
104 * oblock: index of an origin block
105 * cblock: index of a cache block
106 * promotion: movement of a block from origin to cache
107 * demotion: movement of a block from cache to origin
108 * migration: movement of a block between the origin and cache device,
109 * either direction
110 */
111
112/*----------------------------------------------------------------*/
113
114static size_t bitset_size_in_bytes(unsigned nr_entries)
115{
116 return sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG);
117}
118
119static unsigned long *alloc_bitset(unsigned nr_entries)
120{
121 size_t s = bitset_size_in_bytes(nr_entries);
122 return vzalloc(s);
123}
124
125static void clear_bitset(void *bitset, unsigned nr_entries)
126{
127 size_t s = bitset_size_in_bytes(nr_entries);
128 memset(bitset, 0, s);
129}
130
131static void free_bitset(unsigned long *bits)
132{
133 vfree(bits);
134}
135
136/*----------------------------------------------------------------*/
137
Joe Thornberc9d28d52013-10-31 13:55:48 -0400138/*
139 * There are a couple of places where we let a bio run, but want to do some
140 * work before calling its endio function. We do this by temporarily
141 * changing the endio fn.
142 */
143struct dm_hook_info {
144 bio_end_io_t *bi_end_io;
145 void *bi_private;
146};
147
148static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
149 bio_end_io_t *bi_end_io, void *bi_private)
150{
151 h->bi_end_io = bio->bi_end_io;
152 h->bi_private = bio->bi_private;
153
154 bio->bi_end_io = bi_end_io;
155 bio->bi_private = bi_private;
156}
157
158static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
159{
160 bio->bi_end_io = h->bi_end_io;
161 bio->bi_private = h->bi_private;
162}
163
164/*----------------------------------------------------------------*/
165
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000166#define MIGRATION_POOL_SIZE 128
167#define COMMIT_PERIOD HZ
168#define MIGRATION_COUNT_WINDOW 10
169
170/*
Mike Snitzer05473042013-08-16 10:54:19 -0400171 * The block size of the device holding cache data must be
172 * between 32KB and 1GB.
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000173 */
174#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT)
Mike Snitzer05473042013-08-16 10:54:19 -0400175#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000176
177/*
178 * FIXME: the cache is read/write for the time being.
179 */
Joe Thornber2ee57d52013-10-24 14:10:29 -0400180enum cache_metadata_mode {
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000181 CM_WRITE, /* metadata may be changed */
182 CM_READ_ONLY, /* metadata may not be changed */
183};
184
Joe Thornber2ee57d52013-10-24 14:10:29 -0400185enum cache_io_mode {
186 /*
187 * Data is written to cached blocks only. These blocks are marked
188 * dirty. If you lose the cache device you will lose data.
189 * Potential performance increase for both reads and writes.
190 */
191 CM_IO_WRITEBACK,
192
193 /*
194 * Data is written to both cache and origin. Blocks are never
195 * dirty. Potential performance benfit for reads only.
196 */
197 CM_IO_WRITETHROUGH,
198
199 /*
200 * A degraded mode useful for various cache coherency situations
201 * (eg, rolling back snapshots). Reads and writes always go to the
202 * origin. If a write goes to a cached oblock, then the cache
203 * block is invalidated.
204 */
205 CM_IO_PASSTHROUGH
206};
207
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000208struct cache_features {
Joe Thornber2ee57d52013-10-24 14:10:29 -0400209 enum cache_metadata_mode mode;
210 enum cache_io_mode io_mode;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000211};
212
213struct cache_stats {
214 atomic_t read_hit;
215 atomic_t read_miss;
216 atomic_t write_hit;
217 atomic_t write_miss;
218 atomic_t demotion;
219 atomic_t promotion;
220 atomic_t copies_avoided;
221 atomic_t cache_cell_clash;
222 atomic_t commit_count;
223 atomic_t discard_count;
224};
225
Joe Thornber65790ff2013-11-08 16:39:50 +0000226/*
227 * Defines a range of cblocks, begin to (end - 1) are in the range. end is
228 * the one-past-the-end value.
229 */
230struct cblock_range {
231 dm_cblock_t begin;
232 dm_cblock_t end;
233};
234
235struct invalidation_request {
236 struct list_head list;
237 struct cblock_range *cblocks;
238
239 atomic_t complete;
240 int err;
241
242 wait_queue_head_t result_wait;
243};
244
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000245struct cache {
246 struct dm_target *ti;
247 struct dm_target_callbacks callbacks;
248
Mike Snitzerc9ec5d72013-08-16 10:54:21 -0400249 struct dm_cache_metadata *cmd;
250
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000251 /*
252 * Metadata is written to this device.
253 */
254 struct dm_dev *metadata_dev;
255
256 /*
257 * The slower of the two data devices. Typically a spindle.
258 */
259 struct dm_dev *origin_dev;
260
261 /*
262 * The faster of the two data devices. Typically an SSD.
263 */
264 struct dm_dev *cache_dev;
265
266 /*
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000267 * Size of the origin device in _complete_ blocks and native sectors.
268 */
269 dm_oblock_t origin_blocks;
270 sector_t origin_sectors;
271
272 /*
273 * Size of the cache device in blocks.
274 */
275 dm_cblock_t cache_size;
276
277 /*
278 * Fields for converting from sectors to blocks.
279 */
280 uint32_t sectors_per_block;
281 int sectors_per_block_shift;
282
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000283 spinlock_t lock;
284 struct bio_list deferred_bios;
285 struct bio_list deferred_flush_bios;
Joe Thornbere2e74d62013-03-20 17:21:27 +0000286 struct bio_list deferred_writethrough_bios;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000287 struct list_head quiesced_migrations;
288 struct list_head completed_migrations;
289 struct list_head need_commit_migrations;
290 sector_t migration_threshold;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000291 wait_queue_head_t migration_wait;
Joe Thornbera59db672015-01-23 10:16:16 +0000292 atomic_t nr_allocated_migrations;
293
294 /*
295 * The number of in flight migrations that are performing
296 * background io. eg, promotion, writeback.
297 */
298 atomic_t nr_io_migrations;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000299
Joe Thornber66cb1912013-10-30 17:11:58 +0000300 wait_queue_head_t quiescing_wait;
Joe Thornber238f8362013-10-30 17:29:30 +0000301 atomic_t quiescing;
Joe Thornber66cb1912013-10-30 17:11:58 +0000302 atomic_t quiescing_ack;
303
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000304 /*
305 * cache_size entries, dirty if set
306 */
Anssi Hannula44fa8162014-08-01 11:55:47 -0400307 atomic_t nr_dirty;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000308 unsigned long *dirty_bitset;
309
310 /*
311 * origin_blocks entries, discarded if set.
312 */
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000313 dm_dblock_t discard_nr_blocks;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000314 unsigned long *discard_bitset;
Joe Thornber08b18452014-11-06 14:38:01 +0000315 uint32_t discard_block_size; /* a power of 2 times sectors per block */
Mike Snitzerc9ec5d72013-08-16 10:54:21 -0400316
317 /*
318 * Rather than reconstructing the table line for the status we just
319 * save it and regurgitate.
320 */
321 unsigned nr_ctr_args;
322 const char **ctr_args;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000323
324 struct dm_kcopyd_client *copier;
325 struct workqueue_struct *wq;
326 struct work_struct worker;
327
328 struct delayed_work waker;
329 unsigned long last_commit_jiffies;
330
331 struct dm_bio_prison *prison;
332 struct dm_deferred_set *all_io_ds;
333
334 mempool_t *migration_pool;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000335
336 struct dm_cache_policy *policy;
337 unsigned policy_nr_args;
338
339 bool need_tick_bio:1;
340 bool sized:1;
Joe Thornber65790ff2013-11-08 16:39:50 +0000341 bool invalidate:1;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000342 bool commit_requested:1;
343 bool loaded_mappings:1;
344 bool loaded_discards:1;
345
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000346 /*
Mike Snitzerc9ec5d72013-08-16 10:54:21 -0400347 * Cache features such as write-through.
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000348 */
Mike Snitzerc9ec5d72013-08-16 10:54:21 -0400349 struct cache_features features;
350
351 struct cache_stats stats;
Joe Thornber65790ff2013-11-08 16:39:50 +0000352
353 /*
354 * Invalidation fields.
355 */
356 spinlock_t invalidation_lock;
357 struct list_head invalidation_requests;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000358};
359
360struct per_bio_data {
361 bool tick:1;
362 unsigned req_nr:2;
363 struct dm_deferred_entry *all_io_entry;
Mike Snitzerc6eda5e2014-01-31 14:11:54 -0500364 struct dm_hook_info hook_info;
Joe Thornbere2e74d62013-03-20 17:21:27 +0000365
Mike Snitzer19b00922013-04-05 15:36:34 +0100366 /*
367 * writethrough fields. These MUST remain at the end of this
368 * structure and the 'cache' member must be the first as it
Joe Thornberaeed1422013-05-10 14:37:18 +0100369 * is used to determine the offset of the writethrough fields.
Mike Snitzer19b00922013-04-05 15:36:34 +0100370 */
Joe Thornbere2e74d62013-03-20 17:21:27 +0000371 struct cache *cache;
372 dm_cblock_t cblock;
Darrick J. Wongb844fe62013-04-05 15:36:32 +0100373 struct dm_bio_details bio_details;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000374};
375
376struct dm_cache_migration {
377 struct list_head list;
378 struct cache *cache;
379
380 unsigned long start_jiffies;
381 dm_oblock_t old_oblock;
382 dm_oblock_t new_oblock;
383 dm_cblock_t cblock;
384
385 bool err:1;
Joe Thornber7ae34e72014-11-06 10:18:04 +0000386 bool discard:1;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000387 bool writeback:1;
388 bool demote:1;
389 bool promote:1;
Joe Thornberc9d28d52013-10-31 13:55:48 -0400390 bool requeue_holder:1;
Joe Thornber65790ff2013-11-08 16:39:50 +0000391 bool invalidate:1;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000392
393 struct dm_bio_prison_cell *old_ocell;
394 struct dm_bio_prison_cell *new_ocell;
395};
396
397/*
398 * Processing a bio in the worker thread may require these memory
399 * allocations. We prealloc to avoid deadlocks (the same worker thread
400 * frees them back to the mempool).
401 */
402struct prealloc {
403 struct dm_cache_migration *mg;
404 struct dm_bio_prison_cell *cell1;
405 struct dm_bio_prison_cell *cell2;
406};
407
408static void wake_worker(struct cache *cache)
409{
410 queue_work(cache->wq, &cache->worker);
411}
412
413/*----------------------------------------------------------------*/
414
415static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache)
416{
417 /* FIXME: change to use a local slab. */
418 return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT);
419}
420
421static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell)
422{
423 dm_bio_prison_free_cell(cache->prison, cell);
424}
425
Joe Thornbera59db672015-01-23 10:16:16 +0000426static struct dm_cache_migration *alloc_migration(struct cache *cache)
427{
428 struct dm_cache_migration *mg;
429
430 mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
431 if (mg) {
432 mg->cache = cache;
433 atomic_inc(&mg->cache->nr_allocated_migrations);
434 }
435
436 return mg;
437}
438
439static void free_migration(struct dm_cache_migration *mg)
440{
441 if (atomic_dec_and_test(&mg->cache->nr_allocated_migrations))
442 wake_up(&mg->cache->migration_wait);
443
444 mempool_free(mg, mg->cache->migration_pool);
445}
446
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000447static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
448{
449 if (!p->mg) {
Joe Thornbera59db672015-01-23 10:16:16 +0000450 p->mg = alloc_migration(cache);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000451 if (!p->mg)
452 return -ENOMEM;
453 }
454
455 if (!p->cell1) {
456 p->cell1 = alloc_prison_cell(cache);
457 if (!p->cell1)
458 return -ENOMEM;
459 }
460
461 if (!p->cell2) {
462 p->cell2 = alloc_prison_cell(cache);
463 if (!p->cell2)
464 return -ENOMEM;
465 }
466
467 return 0;
468}
469
470static void prealloc_free_structs(struct cache *cache, struct prealloc *p)
471{
472 if (p->cell2)
473 free_prison_cell(cache, p->cell2);
474
475 if (p->cell1)
476 free_prison_cell(cache, p->cell1);
477
478 if (p->mg)
Joe Thornbera59db672015-01-23 10:16:16 +0000479 free_migration(p->mg);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000480}
481
482static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p)
483{
484 struct dm_cache_migration *mg = p->mg;
485
486 BUG_ON(!mg);
487 p->mg = NULL;
488
489 return mg;
490}
491
492/*
493 * You must have a cell within the prealloc struct to return. If not this
494 * function will BUG() rather than returning NULL.
495 */
496static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p)
497{
498 struct dm_bio_prison_cell *r = NULL;
499
500 if (p->cell1) {
501 r = p->cell1;
502 p->cell1 = NULL;
503
504 } else if (p->cell2) {
505 r = p->cell2;
506 p->cell2 = NULL;
507 } else
508 BUG();
509
510 return r;
511}
512
513/*
514 * You can't have more than two cells in a prealloc struct. BUG() will be
515 * called if you try and overfill.
516 */
517static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell)
518{
519 if (!p->cell2)
520 p->cell2 = cell;
521
522 else if (!p->cell1)
523 p->cell1 = cell;
524
525 else
526 BUG();
527}
528
529/*----------------------------------------------------------------*/
530
Joe Thornber7ae34e72014-11-06 10:18:04 +0000531static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key *key)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000532{
533 key->virtual = 0;
534 key->dev = 0;
Joe Thornber7ae34e72014-11-06 10:18:04 +0000535 key->block_begin = from_oblock(begin);
536 key->block_end = from_oblock(end);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000537}
538
539/*
540 * The caller hands in a preallocated cell, and a free function for it.
541 * The cell will be freed if there's an error, or if it wasn't used because
542 * a cell with that key already exists.
543 */
544typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell);
545
Joe Thornber7ae34e72014-11-06 10:18:04 +0000546static int bio_detain_range(struct cache *cache, dm_oblock_t oblock_begin, dm_oblock_t oblock_end,
547 struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
548 cell_free_fn free_fn, void *free_context,
549 struct dm_bio_prison_cell **cell_result)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000550{
551 int r;
552 struct dm_cell_key key;
553
Joe Thornber7ae34e72014-11-06 10:18:04 +0000554 build_key(oblock_begin, oblock_end, &key);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000555 r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result);
556 if (r)
557 free_fn(free_context, cell_prealloc);
558
559 return r;
560}
561
Joe Thornber7ae34e72014-11-06 10:18:04 +0000562static int bio_detain(struct cache *cache, dm_oblock_t oblock,
563 struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
564 cell_free_fn free_fn, void *free_context,
565 struct dm_bio_prison_cell **cell_result)
566{
567 dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
568 return bio_detain_range(cache, oblock, end, bio,
569 cell_prealloc, free_fn, free_context, cell_result);
570}
571
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000572static int get_cell(struct cache *cache,
573 dm_oblock_t oblock,
574 struct prealloc *structs,
575 struct dm_bio_prison_cell **cell_result)
576{
577 int r;
578 struct dm_cell_key key;
579 struct dm_bio_prison_cell *cell_prealloc;
580
581 cell_prealloc = prealloc_get_cell(structs);
582
Joe Thornber7ae34e72014-11-06 10:18:04 +0000583 build_key(oblock, to_oblock(from_oblock(oblock) + 1ULL), &key);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000584 r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result);
585 if (r)
586 prealloc_put_cell(structs, cell_prealloc);
587
588 return r;
589}
590
Joe Thornberaeed1422013-05-10 14:37:18 +0100591/*----------------------------------------------------------------*/
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000592
593static bool is_dirty(struct cache *cache, dm_cblock_t b)
594{
595 return test_bit(from_cblock(b), cache->dirty_bitset);
596}
597
598static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
599{
600 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) {
Anssi Hannula44fa8162014-08-01 11:55:47 -0400601 atomic_inc(&cache->nr_dirty);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000602 policy_set_dirty(cache->policy, oblock);
603 }
604}
605
606static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
607{
608 if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) {
609 policy_clear_dirty(cache->policy, oblock);
Anssi Hannula44fa8162014-08-01 11:55:47 -0400610 if (atomic_dec_return(&cache->nr_dirty) == 0)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000611 dm_table_event(cache->ti->table);
612 }
613}
614
615/*----------------------------------------------------------------*/
Joe Thornberaeed1422013-05-10 14:37:18 +0100616
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000617static bool block_size_is_power_of_two(struct cache *cache)
618{
619 return cache->sectors_per_block_shift >= 0;
620}
621
Mikulas Patocka43aeaa22013-07-10 23:41:17 +0100622/* gcc on ARM generates spurious references to __udivdi3 and __umoddi3 */
623#if defined(CONFIG_ARM) && __GNUC__ == 4 && __GNUC_MINOR__ <= 6
624__always_inline
625#endif
Joe Thornber414dd672013-03-20 17:21:25 +0000626static dm_block_t block_div(dm_block_t b, uint32_t n)
627{
628 do_div(b, n);
629
630 return b;
631}
632
Joe Thornber7ae34e72014-11-06 10:18:04 +0000633static dm_block_t oblocks_per_dblock(struct cache *cache)
634{
635 dm_block_t oblocks = cache->discard_block_size;
636
637 if (block_size_is_power_of_two(cache))
638 oblocks >>= cache->sectors_per_block_shift;
639 else
640 oblocks = block_div(oblocks, cache->sectors_per_block);
641
642 return oblocks;
643}
644
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000645static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
646{
Joe Thornber7ae34e72014-11-06 10:18:04 +0000647 return to_dblock(block_div(from_oblock(oblock),
648 oblocks_per_dblock(cache)));
649}
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000650
Joe Thornber7ae34e72014-11-06 10:18:04 +0000651static dm_oblock_t dblock_to_oblock(struct cache *cache, dm_dblock_t dblock)
652{
653 return to_oblock(from_dblock(dblock) * oblocks_per_dblock(cache));
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000654}
655
656static void set_discard(struct cache *cache, dm_dblock_t b)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000657{
658 unsigned long flags;
659
Joe Thornber7ae34e72014-11-06 10:18:04 +0000660 BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000661 atomic_inc(&cache->stats.discard_count);
662
663 spin_lock_irqsave(&cache->lock, flags);
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000664 set_bit(from_dblock(b), cache->discard_bitset);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000665 spin_unlock_irqrestore(&cache->lock, flags);
666}
667
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000668static void clear_discard(struct cache *cache, dm_dblock_t b)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000669{
670 unsigned long flags;
671
672 spin_lock_irqsave(&cache->lock, flags);
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000673 clear_bit(from_dblock(b), cache->discard_bitset);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000674 spin_unlock_irqrestore(&cache->lock, flags);
675}
676
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000677static bool is_discarded(struct cache *cache, dm_dblock_t b)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000678{
679 int r;
680 unsigned long flags;
681
682 spin_lock_irqsave(&cache->lock, flags);
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000683 r = test_bit(from_dblock(b), cache->discard_bitset);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000684 spin_unlock_irqrestore(&cache->lock, flags);
685
686 return r;
687}
688
689static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
690{
691 int r;
692 unsigned long flags;
693
694 spin_lock_irqsave(&cache->lock, flags);
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000695 r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
696 cache->discard_bitset);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000697 spin_unlock_irqrestore(&cache->lock, flags);
698
699 return r;
700}
701
702/*----------------------------------------------------------------*/
703
704static void load_stats(struct cache *cache)
705{
706 struct dm_cache_statistics stats;
707
708 dm_cache_metadata_get_stats(cache->cmd, &stats);
709 atomic_set(&cache->stats.read_hit, stats.read_hits);
710 atomic_set(&cache->stats.read_miss, stats.read_misses);
711 atomic_set(&cache->stats.write_hit, stats.write_hits);
712 atomic_set(&cache->stats.write_miss, stats.write_misses);
713}
714
715static void save_stats(struct cache *cache)
716{
717 struct dm_cache_statistics stats;
718
719 stats.read_hits = atomic_read(&cache->stats.read_hit);
720 stats.read_misses = atomic_read(&cache->stats.read_miss);
721 stats.write_hits = atomic_read(&cache->stats.write_hit);
722 stats.write_misses = atomic_read(&cache->stats.write_miss);
723
724 dm_cache_metadata_set_stats(cache->cmd, &stats);
725}
726
727/*----------------------------------------------------------------
728 * Per bio data
729 *--------------------------------------------------------------*/
Mike Snitzer19b00922013-04-05 15:36:34 +0100730
731/*
732 * If using writeback, leave out struct per_bio_data's writethrough fields.
733 */
734#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache))
735#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data))
736
Joe Thornber2ee57d52013-10-24 14:10:29 -0400737static bool writethrough_mode(struct cache_features *f)
738{
739 return f->io_mode == CM_IO_WRITETHROUGH;
740}
741
742static bool writeback_mode(struct cache_features *f)
743{
744 return f->io_mode == CM_IO_WRITEBACK;
745}
746
747static bool passthrough_mode(struct cache_features *f)
748{
749 return f->io_mode == CM_IO_PASSTHROUGH;
750}
751
Mike Snitzer19b00922013-04-05 15:36:34 +0100752static size_t get_per_bio_data_size(struct cache *cache)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000753{
Joe Thornber2ee57d52013-10-24 14:10:29 -0400754 return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
Mike Snitzer19b00922013-04-05 15:36:34 +0100755}
756
757static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size)
758{
759 struct per_bio_data *pb = dm_per_bio_data(bio, data_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000760 BUG_ON(!pb);
761 return pb;
762}
763
Mike Snitzer19b00922013-04-05 15:36:34 +0100764static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000765{
Mike Snitzer19b00922013-04-05 15:36:34 +0100766 struct per_bio_data *pb = get_per_bio_data(bio, data_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000767
768 pb->tick = false;
769 pb->req_nr = dm_bio_get_target_bio_nr(bio);
770 pb->all_io_entry = NULL;
771
772 return pb;
773}
774
775/*----------------------------------------------------------------
776 * Remapping
777 *--------------------------------------------------------------*/
778static void remap_to_origin(struct cache *cache, struct bio *bio)
779{
780 bio->bi_bdev = cache->origin_dev->bdev;
781}
782
783static void remap_to_cache(struct cache *cache, struct bio *bio,
784 dm_cblock_t cblock)
785{
Kent Overstreet4f024f32013-10-11 15:44:27 -0700786 sector_t bi_sector = bio->bi_iter.bi_sector;
Heinz Mauelshagene0d849f2014-02-27 22:46:48 +0100787 sector_t block = from_cblock(cblock);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000788
789 bio->bi_bdev = cache->cache_dev->bdev;
790 if (!block_size_is_power_of_two(cache))
Kent Overstreet4f024f32013-10-11 15:44:27 -0700791 bio->bi_iter.bi_sector =
Heinz Mauelshagene0d849f2014-02-27 22:46:48 +0100792 (block * cache->sectors_per_block) +
Kent Overstreet4f024f32013-10-11 15:44:27 -0700793 sector_div(bi_sector, cache->sectors_per_block);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000794 else
Kent Overstreet4f024f32013-10-11 15:44:27 -0700795 bio->bi_iter.bi_sector =
Heinz Mauelshagene0d849f2014-02-27 22:46:48 +0100796 (block << cache->sectors_per_block_shift) |
Kent Overstreet4f024f32013-10-11 15:44:27 -0700797 (bi_sector & (cache->sectors_per_block - 1));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000798}
799
800static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
801{
802 unsigned long flags;
Mike Snitzer19b00922013-04-05 15:36:34 +0100803 size_t pb_data_size = get_per_bio_data_size(cache);
804 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000805
806 spin_lock_irqsave(&cache->lock, flags);
807 if (cache->need_tick_bio &&
808 !(bio->bi_rw & (REQ_FUA | REQ_FLUSH | REQ_DISCARD))) {
809 pb->tick = true;
810 cache->need_tick_bio = false;
811 }
812 spin_unlock_irqrestore(&cache->lock, flags);
813}
814
815static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
816 dm_oblock_t oblock)
817{
818 check_if_tick_bio_needed(cache, bio);
819 remap_to_origin(cache, bio);
820 if (bio_data_dir(bio) == WRITE)
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000821 clear_discard(cache, oblock_to_dblock(cache, oblock));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000822}
823
824static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
825 dm_oblock_t oblock, dm_cblock_t cblock)
826{
Joe Thornberf8e5f012013-10-21 12:51:45 +0100827 check_if_tick_bio_needed(cache, bio);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000828 remap_to_cache(cache, bio, cblock);
829 if (bio_data_dir(bio) == WRITE) {
830 set_dirty(cache, oblock, cblock);
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000831 clear_discard(cache, oblock_to_dblock(cache, oblock));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000832 }
833}
834
835static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
836{
Kent Overstreet4f024f32013-10-11 15:44:27 -0700837 sector_t block_nr = bio->bi_iter.bi_sector;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000838
839 if (!block_size_is_power_of_two(cache))
840 (void) sector_div(block_nr, cache->sectors_per_block);
841 else
842 block_nr >>= cache->sectors_per_block_shift;
843
844 return to_oblock(block_nr);
845}
846
847static int bio_triggers_commit(struct cache *cache, struct bio *bio)
848{
849 return bio->bi_rw & (REQ_FLUSH | REQ_FUA);
850}
851
Joe Thornber8c081b52014-05-13 16:18:38 +0100852/*
853 * You must increment the deferred set whilst the prison cell is held. To
854 * encourage this, we ask for 'cell' to be passed in.
855 */
856static void inc_ds(struct cache *cache, struct bio *bio,
857 struct dm_bio_prison_cell *cell)
858{
859 size_t pb_data_size = get_per_bio_data_size(cache);
860 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
861
862 BUG_ON(!cell);
863 BUG_ON(pb->all_io_entry);
864
865 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
866}
867
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000868static void issue(struct cache *cache, struct bio *bio)
869{
870 unsigned long flags;
871
872 if (!bio_triggers_commit(cache, bio)) {
873 generic_make_request(bio);
874 return;
875 }
876
877 /*
878 * Batch together any bios that trigger commits and then issue a
879 * single commit for them in do_worker().
880 */
881 spin_lock_irqsave(&cache->lock, flags);
882 cache->commit_requested = true;
883 bio_list_add(&cache->deferred_flush_bios, bio);
884 spin_unlock_irqrestore(&cache->lock, flags);
885}
886
Joe Thornber8c081b52014-05-13 16:18:38 +0100887static void inc_and_issue(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell *cell)
888{
889 inc_ds(cache, bio, cell);
890 issue(cache, bio);
891}
892
Joe Thornbere2e74d62013-03-20 17:21:27 +0000893static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
894{
895 unsigned long flags;
896
897 spin_lock_irqsave(&cache->lock, flags);
898 bio_list_add(&cache->deferred_writethrough_bios, bio);
899 spin_unlock_irqrestore(&cache->lock, flags);
900
901 wake_worker(cache);
902}
903
904static void writethrough_endio(struct bio *bio, int err)
905{
Mike Snitzer19b00922013-04-05 15:36:34 +0100906 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
Joe Thornberc9d28d52013-10-31 13:55:48 -0400907
908 dm_unhook_bio(&pb->hook_info, bio);
Joe Thornbere2e74d62013-03-20 17:21:27 +0000909
910 if (err) {
911 bio_endio(bio, err);
912 return;
913 }
914
Darrick J. Wongb844fe62013-04-05 15:36:32 +0100915 dm_bio_restore(&pb->bio_details, bio);
Joe Thornbere2e74d62013-03-20 17:21:27 +0000916 remap_to_cache(pb->cache, bio, pb->cblock);
917
918 /*
919 * We can't issue this bio directly, since we're in interrupt
Joe Thornberaeed1422013-05-10 14:37:18 +0100920 * context. So it gets put on a bio list for processing by the
Joe Thornbere2e74d62013-03-20 17:21:27 +0000921 * worker thread.
922 */
923 defer_writethrough_bio(pb->cache, bio);
924}
925
926/*
927 * When running in writethrough mode we need to send writes to clean blocks
928 * to both the cache and origin devices. In future we'd like to clone the
929 * bio and send them in parallel, but for now we're doing them in
930 * series as this is easier.
931 */
932static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
933 dm_oblock_t oblock, dm_cblock_t cblock)
934{
Mike Snitzer19b00922013-04-05 15:36:34 +0100935 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
Joe Thornbere2e74d62013-03-20 17:21:27 +0000936
937 pb->cache = cache;
938 pb->cblock = cblock;
Joe Thornberc9d28d52013-10-31 13:55:48 -0400939 dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL);
Darrick J. Wongb844fe62013-04-05 15:36:32 +0100940 dm_bio_record(&pb->bio_details, bio);
Joe Thornbere2e74d62013-03-20 17:21:27 +0000941
942 remap_to_origin_clear_discard(pb->cache, bio, oblock);
943}
944
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000945/*----------------------------------------------------------------
946 * Migration processing
947 *
948 * Migration covers moving data from the origin device to the cache, or
949 * vice versa.
950 *--------------------------------------------------------------*/
Joe Thornbera59db672015-01-23 10:16:16 +0000951static void inc_io_migrations(struct cache *cache)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000952{
Joe Thornbera59db672015-01-23 10:16:16 +0000953 atomic_inc(&cache->nr_io_migrations);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000954}
955
Joe Thornbera59db672015-01-23 10:16:16 +0000956static void dec_io_migrations(struct cache *cache)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000957{
Joe Thornbera59db672015-01-23 10:16:16 +0000958 atomic_dec(&cache->nr_io_migrations);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000959}
960
961static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
962 bool holder)
963{
964 (holder ? dm_cell_release : dm_cell_release_no_holder)
965 (cache->prison, cell, &cache->deferred_bios);
966 free_prison_cell(cache, cell);
967}
968
969static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
970 bool holder)
971{
972 unsigned long flags;
973
974 spin_lock_irqsave(&cache->lock, flags);
975 __cell_defer(cache, cell, holder);
976 spin_unlock_irqrestore(&cache->lock, flags);
977
978 wake_worker(cache);
979}
980
Joe Thornbera59db672015-01-23 10:16:16 +0000981static void free_io_migration(struct dm_cache_migration *mg)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000982{
Joe Thornbera59db672015-01-23 10:16:16 +0000983 dec_io_migrations(mg->cache);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000984 free_migration(mg);
985}
986
987static void migration_failure(struct dm_cache_migration *mg)
988{
989 struct cache *cache = mg->cache;
990
991 if (mg->writeback) {
992 DMWARN_LIMIT("writeback failed; couldn't copy block");
993 set_dirty(cache, mg->old_oblock, mg->cblock);
994 cell_defer(cache, mg->old_ocell, false);
995
996 } else if (mg->demote) {
997 DMWARN_LIMIT("demotion failed; couldn't copy block");
998 policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock);
999
Heinz Mauelshagen80f659f2013-10-14 17:10:47 +02001000 cell_defer(cache, mg->old_ocell, mg->promote ? false : true);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001001 if (mg->promote)
Heinz Mauelshagen80f659f2013-10-14 17:10:47 +02001002 cell_defer(cache, mg->new_ocell, true);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001003 } else {
1004 DMWARN_LIMIT("promotion failed; couldn't copy block");
1005 policy_remove_mapping(cache->policy, mg->new_oblock);
Heinz Mauelshagen80f659f2013-10-14 17:10:47 +02001006 cell_defer(cache, mg->new_ocell, true);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001007 }
1008
Joe Thornbera59db672015-01-23 10:16:16 +00001009 free_io_migration(mg);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001010}
1011
1012static void migration_success_pre_commit(struct dm_cache_migration *mg)
1013{
1014 unsigned long flags;
1015 struct cache *cache = mg->cache;
1016
1017 if (mg->writeback) {
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001018 clear_dirty(cache, mg->old_oblock, mg->cblock);
Anssi Hannula40aa9782014-09-05 03:11:28 +03001019 cell_defer(cache, mg->old_ocell, false);
Joe Thornbera59db672015-01-23 10:16:16 +00001020 free_io_migration(mg);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001021 return;
1022
1023 } else if (mg->demote) {
1024 if (dm_cache_remove_mapping(cache->cmd, mg->cblock)) {
1025 DMWARN_LIMIT("demotion failed; couldn't update on disk metadata");
1026 policy_force_mapping(cache->policy, mg->new_oblock,
1027 mg->old_oblock);
1028 if (mg->promote)
1029 cell_defer(cache, mg->new_ocell, true);
Joe Thornbera59db672015-01-23 10:16:16 +00001030 free_io_migration(mg);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001031 return;
1032 }
1033 } else {
1034 if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) {
1035 DMWARN_LIMIT("promotion failed; couldn't update on disk metadata");
1036 policy_remove_mapping(cache->policy, mg->new_oblock);
Joe Thornbera59db672015-01-23 10:16:16 +00001037 free_io_migration(mg);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001038 return;
1039 }
1040 }
1041
1042 spin_lock_irqsave(&cache->lock, flags);
1043 list_add_tail(&mg->list, &cache->need_commit_migrations);
1044 cache->commit_requested = true;
1045 spin_unlock_irqrestore(&cache->lock, flags);
1046}
1047
1048static void migration_success_post_commit(struct dm_cache_migration *mg)
1049{
1050 unsigned long flags;
1051 struct cache *cache = mg->cache;
1052
1053 if (mg->writeback) {
1054 DMWARN("writeback unexpectedly triggered commit");
1055 return;
1056
1057 } else if (mg->demote) {
Heinz Mauelshagen80f659f2013-10-14 17:10:47 +02001058 cell_defer(cache, mg->old_ocell, mg->promote ? false : true);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001059
1060 if (mg->promote) {
1061 mg->demote = false;
1062
1063 spin_lock_irqsave(&cache->lock, flags);
1064 list_add_tail(&mg->list, &cache->quiesced_migrations);
1065 spin_unlock_irqrestore(&cache->lock, flags);
1066
Joe Thornber65790ff2013-11-08 16:39:50 +00001067 } else {
1068 if (mg->invalidate)
1069 policy_remove_mapping(cache->policy, mg->old_oblock);
Joe Thornbera59db672015-01-23 10:16:16 +00001070 free_io_migration(mg);
Joe Thornber65790ff2013-11-08 16:39:50 +00001071 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001072
1073 } else {
Joe Thornber1e321342014-11-27 12:26:46 +00001074 if (mg->requeue_holder) {
1075 clear_dirty(cache, mg->new_oblock, mg->cblock);
Joe Thornberc9d28d52013-10-31 13:55:48 -04001076 cell_defer(cache, mg->new_ocell, true);
Joe Thornber1e321342014-11-27 12:26:46 +00001077 } else {
1078 /*
1079 * The block was promoted via an overwrite, so it's dirty.
1080 */
1081 set_dirty(cache, mg->new_oblock, mg->cblock);
Joe Thornberc9d28d52013-10-31 13:55:48 -04001082 bio_endio(mg->new_ocell->holder, 0);
1083 cell_defer(cache, mg->new_ocell, false);
1084 }
Joe Thornbera59db672015-01-23 10:16:16 +00001085 free_io_migration(mg);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001086 }
1087}
1088
1089static void copy_complete(int read_err, unsigned long write_err, void *context)
1090{
1091 unsigned long flags;
1092 struct dm_cache_migration *mg = (struct dm_cache_migration *) context;
1093 struct cache *cache = mg->cache;
1094
1095 if (read_err || write_err)
1096 mg->err = true;
1097
1098 spin_lock_irqsave(&cache->lock, flags);
1099 list_add_tail(&mg->list, &cache->completed_migrations);
1100 spin_unlock_irqrestore(&cache->lock, flags);
1101
1102 wake_worker(cache);
1103}
1104
Joe Thornber7ae34e72014-11-06 10:18:04 +00001105static void issue_copy(struct dm_cache_migration *mg)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001106{
1107 int r;
1108 struct dm_io_region o_region, c_region;
1109 struct cache *cache = mg->cache;
Heinz Mauelshagen8b9d9662014-03-12 00:40:05 +01001110 sector_t cblock = from_cblock(mg->cblock);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001111
1112 o_region.bdev = cache->origin_dev->bdev;
1113 o_region.count = cache->sectors_per_block;
1114
1115 c_region.bdev = cache->cache_dev->bdev;
Heinz Mauelshagen8b9d9662014-03-12 00:40:05 +01001116 c_region.sector = cblock * cache->sectors_per_block;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001117 c_region.count = cache->sectors_per_block;
1118
1119 if (mg->writeback || mg->demote) {
1120 /* demote */
1121 o_region.sector = from_oblock(mg->old_oblock) * cache->sectors_per_block;
1122 r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, mg);
1123 } else {
1124 /* promote */
1125 o_region.sector = from_oblock(mg->new_oblock) * cache->sectors_per_block;
1126 r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg);
1127 }
1128
Heinz Mauelshagen2c2263c2013-10-14 17:14:45 +02001129 if (r < 0) {
1130 DMERR_LIMIT("issuing migration failed");
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001131 migration_failure(mg);
Heinz Mauelshagen2c2263c2013-10-14 17:14:45 +02001132 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001133}
1134
Joe Thornberc9d28d52013-10-31 13:55:48 -04001135static void overwrite_endio(struct bio *bio, int err)
1136{
1137 struct dm_cache_migration *mg = bio->bi_private;
1138 struct cache *cache = mg->cache;
1139 size_t pb_data_size = get_per_bio_data_size(cache);
1140 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1141 unsigned long flags;
1142
Mike Snitzer80ae49a2014-01-31 14:30:37 -05001143 dm_unhook_bio(&pb->hook_info, bio);
1144
Joe Thornberc9d28d52013-10-31 13:55:48 -04001145 if (err)
1146 mg->err = true;
1147
Mike Snitzer80ae49a2014-01-31 14:30:37 -05001148 mg->requeue_holder = false;
1149
Joe Thornberc9d28d52013-10-31 13:55:48 -04001150 spin_lock_irqsave(&cache->lock, flags);
1151 list_add_tail(&mg->list, &cache->completed_migrations);
Joe Thornberc9d28d52013-10-31 13:55:48 -04001152 spin_unlock_irqrestore(&cache->lock, flags);
1153
1154 wake_worker(cache);
1155}
1156
1157static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio)
1158{
1159 size_t pb_data_size = get_per_bio_data_size(mg->cache);
1160 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1161
1162 dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
1163 remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock);
Joe Thornber8c081b52014-05-13 16:18:38 +01001164
1165 /*
1166 * No need to inc_ds() here, since the cell will be held for the
1167 * duration of the io.
1168 */
Joe Thornberc9d28d52013-10-31 13:55:48 -04001169 generic_make_request(bio);
1170}
1171
1172static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
1173{
1174 return (bio_data_dir(bio) == WRITE) &&
Kent Overstreet4f024f32013-10-11 15:44:27 -07001175 (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
Joe Thornberc9d28d52013-10-31 13:55:48 -04001176}
1177
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001178static void avoid_copy(struct dm_cache_migration *mg)
1179{
1180 atomic_inc(&mg->cache->stats.copies_avoided);
1181 migration_success_pre_commit(mg);
1182}
1183
Joe Thornber7ae34e72014-11-06 10:18:04 +00001184static void calc_discard_block_range(struct cache *cache, struct bio *bio,
1185 dm_dblock_t *b, dm_dblock_t *e)
1186{
1187 sector_t sb = bio->bi_iter.bi_sector;
1188 sector_t se = bio_end_sector(bio);
1189
1190 *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size));
1191
1192 if (se - sb < cache->discard_block_size)
1193 *e = *b;
1194 else
1195 *e = to_dblock(block_div(se, cache->discard_block_size));
1196}
1197
1198static void issue_discard(struct dm_cache_migration *mg)
1199{
1200 dm_dblock_t b, e;
1201 struct bio *bio = mg->new_ocell->holder;
1202
1203 calc_discard_block_range(mg->cache, bio, &b, &e);
1204 while (b != e) {
1205 set_discard(mg->cache, b);
1206 b = to_dblock(from_dblock(b) + 1);
1207 }
1208
1209 bio_endio(bio, 0);
1210 cell_defer(mg->cache, mg->new_ocell, false);
1211 free_migration(mg);
1212}
1213
1214static void issue_copy_or_discard(struct dm_cache_migration *mg)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001215{
1216 bool avoid;
1217 struct cache *cache = mg->cache;
1218
Joe Thornber7ae34e72014-11-06 10:18:04 +00001219 if (mg->discard) {
1220 issue_discard(mg);
1221 return;
1222 }
1223
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001224 if (mg->writeback || mg->demote)
1225 avoid = !is_dirty(cache, mg->cblock) ||
1226 is_discarded_oblock(cache, mg->old_oblock);
Joe Thornberc9d28d52013-10-31 13:55:48 -04001227 else {
1228 struct bio *bio = mg->new_ocell->holder;
1229
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001230 avoid = is_discarded_oblock(cache, mg->new_oblock);
1231
Joe Thornberf29a3142014-11-27 12:21:08 +00001232 if (writeback_mode(&cache->features) &&
1233 !avoid && bio_writes_complete_block(cache, bio)) {
Joe Thornberc9d28d52013-10-31 13:55:48 -04001234 issue_overwrite(mg, bio);
1235 return;
1236 }
1237 }
1238
Joe Thornber7ae34e72014-11-06 10:18:04 +00001239 avoid ? avoid_copy(mg) : issue_copy(mg);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001240}
1241
1242static void complete_migration(struct dm_cache_migration *mg)
1243{
1244 if (mg->err)
1245 migration_failure(mg);
1246 else
1247 migration_success_pre_commit(mg);
1248}
1249
1250static void process_migrations(struct cache *cache, struct list_head *head,
1251 void (*fn)(struct dm_cache_migration *))
1252{
1253 unsigned long flags;
1254 struct list_head list;
1255 struct dm_cache_migration *mg, *tmp;
1256
1257 INIT_LIST_HEAD(&list);
1258 spin_lock_irqsave(&cache->lock, flags);
1259 list_splice_init(head, &list);
1260 spin_unlock_irqrestore(&cache->lock, flags);
1261
1262 list_for_each_entry_safe(mg, tmp, &list, list)
1263 fn(mg);
1264}
1265
1266static void __queue_quiesced_migration(struct dm_cache_migration *mg)
1267{
1268 list_add_tail(&mg->list, &mg->cache->quiesced_migrations);
1269}
1270
1271static void queue_quiesced_migration(struct dm_cache_migration *mg)
1272{
1273 unsigned long flags;
1274 struct cache *cache = mg->cache;
1275
1276 spin_lock_irqsave(&cache->lock, flags);
1277 __queue_quiesced_migration(mg);
1278 spin_unlock_irqrestore(&cache->lock, flags);
1279
1280 wake_worker(cache);
1281}
1282
1283static void queue_quiesced_migrations(struct cache *cache, struct list_head *work)
1284{
1285 unsigned long flags;
1286 struct dm_cache_migration *mg, *tmp;
1287
1288 spin_lock_irqsave(&cache->lock, flags);
1289 list_for_each_entry_safe(mg, tmp, work, list)
1290 __queue_quiesced_migration(mg);
1291 spin_unlock_irqrestore(&cache->lock, flags);
1292
1293 wake_worker(cache);
1294}
1295
1296static void check_for_quiesced_migrations(struct cache *cache,
1297 struct per_bio_data *pb)
1298{
1299 struct list_head work;
1300
1301 if (!pb->all_io_entry)
1302 return;
1303
1304 INIT_LIST_HEAD(&work);
Joe Thornber8c081b52014-05-13 16:18:38 +01001305 dm_deferred_entry_dec(pb->all_io_entry, &work);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001306
1307 if (!list_empty(&work))
1308 queue_quiesced_migrations(cache, &work);
1309}
1310
1311static void quiesce_migration(struct dm_cache_migration *mg)
1312{
1313 if (!dm_deferred_set_add_work(mg->cache->all_io_ds, &mg->list))
1314 queue_quiesced_migration(mg);
1315}
1316
1317static void promote(struct cache *cache, struct prealloc *structs,
1318 dm_oblock_t oblock, dm_cblock_t cblock,
1319 struct dm_bio_prison_cell *cell)
1320{
1321 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1322
1323 mg->err = false;
Joe Thornber7ae34e72014-11-06 10:18:04 +00001324 mg->discard = false;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001325 mg->writeback = false;
1326 mg->demote = false;
1327 mg->promote = true;
Joe Thornberc9d28d52013-10-31 13:55:48 -04001328 mg->requeue_holder = true;
Joe Thornber65790ff2013-11-08 16:39:50 +00001329 mg->invalidate = false;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001330 mg->cache = cache;
1331 mg->new_oblock = oblock;
1332 mg->cblock = cblock;
1333 mg->old_ocell = NULL;
1334 mg->new_ocell = cell;
1335 mg->start_jiffies = jiffies;
1336
Joe Thornbera59db672015-01-23 10:16:16 +00001337 inc_io_migrations(cache);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001338 quiesce_migration(mg);
1339}
1340
1341static void writeback(struct cache *cache, struct prealloc *structs,
1342 dm_oblock_t oblock, dm_cblock_t cblock,
1343 struct dm_bio_prison_cell *cell)
1344{
1345 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1346
1347 mg->err = false;
Joe Thornber7ae34e72014-11-06 10:18:04 +00001348 mg->discard = false;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001349 mg->writeback = true;
1350 mg->demote = false;
1351 mg->promote = false;
Joe Thornberc9d28d52013-10-31 13:55:48 -04001352 mg->requeue_holder = true;
Joe Thornber65790ff2013-11-08 16:39:50 +00001353 mg->invalidate = false;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001354 mg->cache = cache;
1355 mg->old_oblock = oblock;
1356 mg->cblock = cblock;
1357 mg->old_ocell = cell;
1358 mg->new_ocell = NULL;
1359 mg->start_jiffies = jiffies;
1360
Joe Thornbera59db672015-01-23 10:16:16 +00001361 inc_io_migrations(cache);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001362 quiesce_migration(mg);
1363}
1364
1365static void demote_then_promote(struct cache *cache, struct prealloc *structs,
1366 dm_oblock_t old_oblock, dm_oblock_t new_oblock,
1367 dm_cblock_t cblock,
1368 struct dm_bio_prison_cell *old_ocell,
1369 struct dm_bio_prison_cell *new_ocell)
1370{
1371 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1372
1373 mg->err = false;
Joe Thornber7ae34e72014-11-06 10:18:04 +00001374 mg->discard = false;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001375 mg->writeback = false;
1376 mg->demote = true;
1377 mg->promote = true;
Joe Thornberc9d28d52013-10-31 13:55:48 -04001378 mg->requeue_holder = true;
Joe Thornber65790ff2013-11-08 16:39:50 +00001379 mg->invalidate = false;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001380 mg->cache = cache;
1381 mg->old_oblock = old_oblock;
1382 mg->new_oblock = new_oblock;
1383 mg->cblock = cblock;
1384 mg->old_ocell = old_ocell;
1385 mg->new_ocell = new_ocell;
1386 mg->start_jiffies = jiffies;
1387
Joe Thornbera59db672015-01-23 10:16:16 +00001388 inc_io_migrations(cache);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001389 quiesce_migration(mg);
1390}
1391
Joe Thornber2ee57d52013-10-24 14:10:29 -04001392/*
1393 * Invalidate a cache entry. No writeback occurs; any changes in the cache
1394 * block are thrown away.
1395 */
1396static void invalidate(struct cache *cache, struct prealloc *structs,
1397 dm_oblock_t oblock, dm_cblock_t cblock,
1398 struct dm_bio_prison_cell *cell)
1399{
1400 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1401
1402 mg->err = false;
Joe Thornber7ae34e72014-11-06 10:18:04 +00001403 mg->discard = false;
Joe Thornber2ee57d52013-10-24 14:10:29 -04001404 mg->writeback = false;
1405 mg->demote = true;
1406 mg->promote = false;
1407 mg->requeue_holder = true;
Joe Thornber65790ff2013-11-08 16:39:50 +00001408 mg->invalidate = true;
Joe Thornber2ee57d52013-10-24 14:10:29 -04001409 mg->cache = cache;
1410 mg->old_oblock = oblock;
1411 mg->cblock = cblock;
1412 mg->old_ocell = cell;
1413 mg->new_ocell = NULL;
1414 mg->start_jiffies = jiffies;
1415
Joe Thornbera59db672015-01-23 10:16:16 +00001416 inc_io_migrations(cache);
Joe Thornber2ee57d52013-10-24 14:10:29 -04001417 quiesce_migration(mg);
1418}
1419
Joe Thornber7ae34e72014-11-06 10:18:04 +00001420static void discard(struct cache *cache, struct prealloc *structs,
1421 struct dm_bio_prison_cell *cell)
1422{
1423 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1424
1425 mg->err = false;
1426 mg->discard = true;
1427 mg->writeback = false;
1428 mg->demote = false;
1429 mg->promote = false;
1430 mg->requeue_holder = false;
1431 mg->invalidate = false;
1432 mg->cache = cache;
1433 mg->old_ocell = NULL;
1434 mg->new_ocell = cell;
1435 mg->start_jiffies = jiffies;
1436
1437 quiesce_migration(mg);
1438}
1439
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001440/*----------------------------------------------------------------
1441 * bio processing
1442 *--------------------------------------------------------------*/
1443static void defer_bio(struct cache *cache, struct bio *bio)
1444{
1445 unsigned long flags;
1446
1447 spin_lock_irqsave(&cache->lock, flags);
1448 bio_list_add(&cache->deferred_bios, bio);
1449 spin_unlock_irqrestore(&cache->lock, flags);
1450
1451 wake_worker(cache);
1452}
1453
1454static void process_flush_bio(struct cache *cache, struct bio *bio)
1455{
Mike Snitzer19b00922013-04-05 15:36:34 +01001456 size_t pb_data_size = get_per_bio_data_size(cache);
1457 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001458
Kent Overstreet4f024f32013-10-11 15:44:27 -07001459 BUG_ON(bio->bi_iter.bi_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001460 if (!pb->req_nr)
1461 remap_to_origin(cache, bio);
1462 else
1463 remap_to_cache(cache, bio, 0);
1464
Joe Thornber8c081b52014-05-13 16:18:38 +01001465 /*
1466 * REQ_FLUSH is not directed at any particular block so we don't
1467 * need to inc_ds(). REQ_FUA's are split into a write + REQ_FLUSH
1468 * by dm-core.
1469 */
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001470 issue(cache, bio);
1471}
1472
Joe Thornber7ae34e72014-11-06 10:18:04 +00001473static void process_discard_bio(struct cache *cache, struct prealloc *structs,
1474 struct bio *bio)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001475{
Joe Thornber7ae34e72014-11-06 10:18:04 +00001476 int r;
1477 dm_dblock_t b, e;
1478 struct dm_bio_prison_cell *cell_prealloc, *new_ocell;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001479
Joe Thornber7ae34e72014-11-06 10:18:04 +00001480 calc_discard_block_range(cache, bio, &b, &e);
1481 if (b == e) {
1482 bio_endio(bio, 0);
1483 return;
1484 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001485
Joe Thornber7ae34e72014-11-06 10:18:04 +00001486 cell_prealloc = prealloc_get_cell(structs);
1487 r = bio_detain_range(cache, dblock_to_oblock(cache, b), dblock_to_oblock(cache, e), bio, cell_prealloc,
1488 (cell_free_fn) prealloc_put_cell,
1489 structs, &new_ocell);
1490 if (r > 0)
1491 return;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001492
Joe Thornber7ae34e72014-11-06 10:18:04 +00001493 discard(cache, structs, new_ocell);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001494}
1495
1496static bool spare_migration_bandwidth(struct cache *cache)
1497{
Joe Thornbera59db672015-01-23 10:16:16 +00001498 sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001499 cache->sectors_per_block;
1500 return current_volume < cache->migration_threshold;
1501}
1502
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001503static void inc_hit_counter(struct cache *cache, struct bio *bio)
1504{
1505 atomic_inc(bio_data_dir(bio) == READ ?
1506 &cache->stats.read_hit : &cache->stats.write_hit);
1507}
1508
1509static void inc_miss_counter(struct cache *cache, struct bio *bio)
1510{
1511 atomic_inc(bio_data_dir(bio) == READ ?
1512 &cache->stats.read_miss : &cache->stats.write_miss);
1513}
1514
Joe Thornberfb4100a2015-05-20 10:30:32 +01001515/*----------------------------------------------------------------*/
1516
1517struct old_oblock_lock {
1518 struct policy_locker locker;
1519 struct cache *cache;
1520 struct prealloc *structs;
1521 struct dm_bio_prison_cell *cell;
1522};
1523
1524static int null_locker(struct policy_locker *locker, dm_oblock_t b)
1525{
1526 /* This should never be called */
1527 BUG();
1528 return 0;
1529}
1530
1531static int cell_locker(struct policy_locker *locker, dm_oblock_t b)
1532{
1533 struct old_oblock_lock *l = container_of(locker, struct old_oblock_lock, locker);
1534 struct dm_bio_prison_cell *cell_prealloc = prealloc_get_cell(l->structs);
1535
1536 return bio_detain(l->cache, b, NULL, cell_prealloc,
1537 (cell_free_fn) prealloc_put_cell,
1538 l->structs, &l->cell);
1539}
1540
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001541static void process_bio(struct cache *cache, struct prealloc *structs,
1542 struct bio *bio)
1543{
1544 int r;
1545 bool release_cell = true;
1546 dm_oblock_t block = get_bio_block(cache, bio);
Joe Thornberfb4100a2015-05-20 10:30:32 +01001547 struct dm_bio_prison_cell *cell_prealloc, *new_ocell;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001548 struct policy_result lookup_result;
Joe Thornber2ee57d52013-10-24 14:10:29 -04001549 bool passthrough = passthrough_mode(&cache->features);
Joe Thornber43c32bf2014-11-25 13:14:57 +00001550 bool discarded_block, can_migrate;
Joe Thornberfb4100a2015-05-20 10:30:32 +01001551 struct old_oblock_lock ool;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001552
1553 /*
1554 * Check to see if that block is currently migrating.
1555 */
1556 cell_prealloc = prealloc_get_cell(structs);
1557 r = bio_detain(cache, block, bio, cell_prealloc,
1558 (cell_free_fn) prealloc_put_cell,
1559 structs, &new_ocell);
1560 if (r > 0)
1561 return;
1562
Joe Thornber43c32bf2014-11-25 13:14:57 +00001563 discarded_block = is_discarded_oblock(cache, block);
1564 can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache));
1565
Joe Thornberfb4100a2015-05-20 10:30:32 +01001566 ool.locker.fn = cell_locker;
1567 ool.cache = cache;
1568 ool.structs = structs;
1569 ool.cell = NULL;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001570 r = policy_map(cache->policy, block, true, can_migrate, discarded_block,
Joe Thornberfb4100a2015-05-20 10:30:32 +01001571 bio, &ool.locker, &lookup_result);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001572
1573 if (r == -EWOULDBLOCK)
1574 /* migration has been denied */
1575 lookup_result.op = POLICY_MISS;
1576
1577 switch (lookup_result.op) {
1578 case POLICY_HIT:
Joe Thornber2ee57d52013-10-24 14:10:29 -04001579 if (passthrough) {
1580 inc_miss_counter(cache, bio);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001581
Joe Thornber2ee57d52013-10-24 14:10:29 -04001582 /*
1583 * Passthrough always maps to the origin,
1584 * invalidating any cache blocks that are written
1585 * to.
1586 */
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001587
Joe Thornber2ee57d52013-10-24 14:10:29 -04001588 if (bio_data_dir(bio) == WRITE) {
1589 atomic_inc(&cache->stats.demotion);
1590 invalidate(cache, structs, block, lookup_result.cblock, new_ocell);
1591 release_cell = false;
1592
1593 } else {
1594 /* FIXME: factor out issue_origin() */
Joe Thornber2ee57d52013-10-24 14:10:29 -04001595 remap_to_origin_clear_discard(cache, bio, block);
Joe Thornber8c081b52014-05-13 16:18:38 +01001596 inc_and_issue(cache, bio, new_ocell);
Joe Thornber2ee57d52013-10-24 14:10:29 -04001597 }
1598 } else {
1599 inc_hit_counter(cache, bio);
1600
1601 if (bio_data_dir(bio) == WRITE &&
1602 writethrough_mode(&cache->features) &&
1603 !is_dirty(cache, lookup_result.cblock)) {
Joe Thornber2ee57d52013-10-24 14:10:29 -04001604 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
Joe Thornber8c081b52014-05-13 16:18:38 +01001605 inc_and_issue(cache, bio, new_ocell);
1606
1607 } else {
1608 remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
1609 inc_and_issue(cache, bio, new_ocell);
1610 }
Joe Thornber2ee57d52013-10-24 14:10:29 -04001611 }
1612
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001613 break;
1614
1615 case POLICY_MISS:
1616 inc_miss_counter(cache, bio);
Joe Thornbere2e74d62013-03-20 17:21:27 +00001617 remap_to_origin_clear_discard(cache, bio, block);
Joe Thornber8c081b52014-05-13 16:18:38 +01001618 inc_and_issue(cache, bio, new_ocell);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001619 break;
1620
1621 case POLICY_NEW:
1622 atomic_inc(&cache->stats.promotion);
1623 promote(cache, structs, block, lookup_result.cblock, new_ocell);
1624 release_cell = false;
1625 break;
1626
1627 case POLICY_REPLACE:
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001628 atomic_inc(&cache->stats.demotion);
1629 atomic_inc(&cache->stats.promotion);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001630 demote_then_promote(cache, structs, lookup_result.old_oblock,
1631 block, lookup_result.cblock,
Joe Thornberfb4100a2015-05-20 10:30:32 +01001632 ool.cell, new_ocell);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001633 release_cell = false;
1634 break;
1635
1636 default:
1637 DMERR_LIMIT("%s: erroring bio, unknown policy op: %u", __func__,
1638 (unsigned) lookup_result.op);
1639 bio_io_error(bio);
1640 }
1641
1642 if (release_cell)
1643 cell_defer(cache, new_ocell, false);
1644}
1645
1646static int need_commit_due_to_time(struct cache *cache)
1647{
Manuel Schölling0f30af92014-05-22 22:42:37 +02001648 return !time_in_range(jiffies, cache->last_commit_jiffies,
1649 cache->last_commit_jiffies + COMMIT_PERIOD);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001650}
1651
1652static int commit_if_needed(struct cache *cache)
1653{
Heinz Mauelshagenffcbcb62013-10-14 17:24:43 +02001654 int r = 0;
1655
1656 if ((cache->commit_requested || need_commit_due_to_time(cache)) &&
1657 dm_cache_changed_this_transaction(cache->cmd)) {
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001658 atomic_inc(&cache->stats.commit_count);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001659 cache->commit_requested = false;
Heinz Mauelshagenffcbcb62013-10-14 17:24:43 +02001660 r = dm_cache_commit(cache->cmd, false);
1661 cache->last_commit_jiffies = jiffies;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001662 }
1663
Heinz Mauelshagenffcbcb62013-10-14 17:24:43 +02001664 return r;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001665}
1666
1667static void process_deferred_bios(struct cache *cache)
1668{
1669 unsigned long flags;
1670 struct bio_list bios;
1671 struct bio *bio;
1672 struct prealloc structs;
1673
1674 memset(&structs, 0, sizeof(structs));
1675 bio_list_init(&bios);
1676
1677 spin_lock_irqsave(&cache->lock, flags);
1678 bio_list_merge(&bios, &cache->deferred_bios);
1679 bio_list_init(&cache->deferred_bios);
1680 spin_unlock_irqrestore(&cache->lock, flags);
1681
1682 while (!bio_list_empty(&bios)) {
1683 /*
1684 * If we've got no free migration structs, and processing
1685 * this bio might require one, we pause until there are some
1686 * prepared mappings to process.
1687 */
1688 if (prealloc_data_structs(cache, &structs)) {
1689 spin_lock_irqsave(&cache->lock, flags);
1690 bio_list_merge(&cache->deferred_bios, &bios);
1691 spin_unlock_irqrestore(&cache->lock, flags);
1692 break;
1693 }
1694
1695 bio = bio_list_pop(&bios);
1696
1697 if (bio->bi_rw & REQ_FLUSH)
1698 process_flush_bio(cache, bio);
1699 else if (bio->bi_rw & REQ_DISCARD)
Joe Thornber7ae34e72014-11-06 10:18:04 +00001700 process_discard_bio(cache, &structs, bio);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001701 else
1702 process_bio(cache, &structs, bio);
1703 }
1704
1705 prealloc_free_structs(cache, &structs);
1706}
1707
1708static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
1709{
1710 unsigned long flags;
1711 struct bio_list bios;
1712 struct bio *bio;
1713
1714 bio_list_init(&bios);
1715
1716 spin_lock_irqsave(&cache->lock, flags);
1717 bio_list_merge(&bios, &cache->deferred_flush_bios);
1718 bio_list_init(&cache->deferred_flush_bios);
1719 spin_unlock_irqrestore(&cache->lock, flags);
1720
Joe Thornber8c081b52014-05-13 16:18:38 +01001721 /*
1722 * These bios have already been through inc_ds()
1723 */
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001724 while ((bio = bio_list_pop(&bios)))
1725 submit_bios ? generic_make_request(bio) : bio_io_error(bio);
1726}
1727
Joe Thornbere2e74d62013-03-20 17:21:27 +00001728static void process_deferred_writethrough_bios(struct cache *cache)
1729{
1730 unsigned long flags;
1731 struct bio_list bios;
1732 struct bio *bio;
1733
1734 bio_list_init(&bios);
1735
1736 spin_lock_irqsave(&cache->lock, flags);
1737 bio_list_merge(&bios, &cache->deferred_writethrough_bios);
1738 bio_list_init(&cache->deferred_writethrough_bios);
1739 spin_unlock_irqrestore(&cache->lock, flags);
1740
Joe Thornber8c081b52014-05-13 16:18:38 +01001741 /*
1742 * These bios have already been through inc_ds()
1743 */
Joe Thornbere2e74d62013-03-20 17:21:27 +00001744 while ((bio = bio_list_pop(&bios)))
1745 generic_make_request(bio);
1746}
1747
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001748static void writeback_some_dirty_blocks(struct cache *cache)
1749{
1750 int r = 0;
1751 dm_oblock_t oblock;
1752 dm_cblock_t cblock;
1753 struct prealloc structs;
1754 struct dm_bio_prison_cell *old_ocell;
1755
1756 memset(&structs, 0, sizeof(structs));
1757
1758 while (spare_migration_bandwidth(cache)) {
1759 if (prealloc_data_structs(cache, &structs))
1760 break;
1761
1762 r = policy_writeback_work(cache->policy, &oblock, &cblock);
1763 if (r)
1764 break;
1765
1766 r = get_cell(cache, oblock, &structs, &old_ocell);
1767 if (r) {
1768 policy_set_dirty(cache->policy, oblock);
1769 break;
1770 }
1771
1772 writeback(cache, &structs, oblock, cblock, old_ocell);
1773 }
1774
1775 prealloc_free_structs(cache, &structs);
1776}
1777
1778/*----------------------------------------------------------------
Joe Thornber65790ff2013-11-08 16:39:50 +00001779 * Invalidations.
1780 * Dropping something from the cache *without* writing back.
1781 *--------------------------------------------------------------*/
1782
1783static void process_invalidation_request(struct cache *cache, struct invalidation_request *req)
1784{
1785 int r = 0;
1786 uint64_t begin = from_cblock(req->cblocks->begin);
1787 uint64_t end = from_cblock(req->cblocks->end);
1788
1789 while (begin != end) {
1790 r = policy_remove_cblock(cache->policy, to_cblock(begin));
1791 if (!r) {
1792 r = dm_cache_remove_mapping(cache->cmd, to_cblock(begin));
1793 if (r)
1794 break;
1795
1796 } else if (r == -ENODATA) {
1797 /* harmless, already unmapped */
1798 r = 0;
1799
1800 } else {
1801 DMERR("policy_remove_cblock failed");
1802 break;
1803 }
1804
1805 begin++;
1806 }
1807
1808 cache->commit_requested = true;
1809
1810 req->err = r;
1811 atomic_set(&req->complete, 1);
1812
1813 wake_up(&req->result_wait);
1814}
1815
1816static void process_invalidation_requests(struct cache *cache)
1817{
1818 struct list_head list;
1819 struct invalidation_request *req, *tmp;
1820
1821 INIT_LIST_HEAD(&list);
1822 spin_lock(&cache->invalidation_lock);
1823 list_splice_init(&cache->invalidation_requests, &list);
1824 spin_unlock(&cache->invalidation_lock);
1825
1826 list_for_each_entry_safe (req, tmp, &list, list)
1827 process_invalidation_request(cache, req);
1828}
1829
1830/*----------------------------------------------------------------
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001831 * Main worker loop
1832 *--------------------------------------------------------------*/
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001833static bool is_quiescing(struct cache *cache)
1834{
Joe Thornber238f8362013-10-30 17:29:30 +00001835 return atomic_read(&cache->quiescing);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001836}
1837
Joe Thornber66cb1912013-10-30 17:11:58 +00001838static void ack_quiescing(struct cache *cache)
1839{
1840 if (is_quiescing(cache)) {
1841 atomic_inc(&cache->quiescing_ack);
1842 wake_up(&cache->quiescing_wait);
1843 }
1844}
1845
1846static void wait_for_quiescing_ack(struct cache *cache)
1847{
1848 wait_event(cache->quiescing_wait, atomic_read(&cache->quiescing_ack));
1849}
1850
1851static void start_quiescing(struct cache *cache)
1852{
Joe Thornber238f8362013-10-30 17:29:30 +00001853 atomic_inc(&cache->quiescing);
Joe Thornber66cb1912013-10-30 17:11:58 +00001854 wait_for_quiescing_ack(cache);
1855}
1856
1857static void stop_quiescing(struct cache *cache)
1858{
Joe Thornber238f8362013-10-30 17:29:30 +00001859 atomic_set(&cache->quiescing, 0);
Joe Thornber66cb1912013-10-30 17:11:58 +00001860 atomic_set(&cache->quiescing_ack, 0);
1861}
1862
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001863static void wait_for_migrations(struct cache *cache)
1864{
Joe Thornbera59db672015-01-23 10:16:16 +00001865 wait_event(cache->migration_wait, !atomic_read(&cache->nr_allocated_migrations));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001866}
1867
1868static void stop_worker(struct cache *cache)
1869{
1870 cancel_delayed_work(&cache->waker);
1871 flush_workqueue(cache->wq);
1872}
1873
1874static void requeue_deferred_io(struct cache *cache)
1875{
1876 struct bio *bio;
1877 struct bio_list bios;
1878
1879 bio_list_init(&bios);
1880 bio_list_merge(&bios, &cache->deferred_bios);
1881 bio_list_init(&cache->deferred_bios);
1882
1883 while ((bio = bio_list_pop(&bios)))
1884 bio_endio(bio, DM_ENDIO_REQUEUE);
1885}
1886
1887static int more_work(struct cache *cache)
1888{
1889 if (is_quiescing(cache))
1890 return !list_empty(&cache->quiesced_migrations) ||
1891 !list_empty(&cache->completed_migrations) ||
1892 !list_empty(&cache->need_commit_migrations);
1893 else
1894 return !bio_list_empty(&cache->deferred_bios) ||
1895 !bio_list_empty(&cache->deferred_flush_bios) ||
Joe Thornbere2e74d62013-03-20 17:21:27 +00001896 !bio_list_empty(&cache->deferred_writethrough_bios) ||
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001897 !list_empty(&cache->quiesced_migrations) ||
1898 !list_empty(&cache->completed_migrations) ||
Joe Thornber65790ff2013-11-08 16:39:50 +00001899 !list_empty(&cache->need_commit_migrations) ||
1900 cache->invalidate;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001901}
1902
1903static void do_worker(struct work_struct *ws)
1904{
1905 struct cache *cache = container_of(ws, struct cache, worker);
1906
1907 do {
Joe Thornber66cb1912013-10-30 17:11:58 +00001908 if (!is_quiescing(cache)) {
1909 writeback_some_dirty_blocks(cache);
1910 process_deferred_writethrough_bios(cache);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001911 process_deferred_bios(cache);
Joe Thornber65790ff2013-11-08 16:39:50 +00001912 process_invalidation_requests(cache);
Joe Thornber66cb1912013-10-30 17:11:58 +00001913 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001914
Joe Thornber7ae34e72014-11-06 10:18:04 +00001915 process_migrations(cache, &cache->quiesced_migrations, issue_copy_or_discard);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001916 process_migrations(cache, &cache->completed_migrations, complete_migration);
1917
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001918 if (commit_if_needed(cache)) {
1919 process_deferred_flush_bios(cache, false);
Joe Thornber304affa2014-06-24 15:36:58 -04001920 process_migrations(cache, &cache->need_commit_migrations, migration_failure);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001921
1922 /*
1923 * FIXME: rollback metadata or just go into a
1924 * failure mode and error everything
1925 */
1926 } else {
1927 process_deferred_flush_bios(cache, true);
1928 process_migrations(cache, &cache->need_commit_migrations,
1929 migration_success_post_commit);
1930 }
Joe Thornber66cb1912013-10-30 17:11:58 +00001931
1932 ack_quiescing(cache);
1933
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001934 } while (more_work(cache));
1935}
1936
1937/*
1938 * We want to commit periodically so that not too much
1939 * unwritten metadata builds up.
1940 */
1941static void do_waker(struct work_struct *ws)
1942{
1943 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker);
Joe Thornberf8350da2013-05-10 14:37:16 +01001944 policy_tick(cache->policy);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001945 wake_worker(cache);
1946 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD);
1947}
1948
1949/*----------------------------------------------------------------*/
1950
1951static int is_congested(struct dm_dev *dev, int bdi_bits)
1952{
1953 struct request_queue *q = bdev_get_queue(dev->bdev);
1954 return bdi_congested(&q->backing_dev_info, bdi_bits);
1955}
1956
1957static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
1958{
1959 struct cache *cache = container_of(cb, struct cache, callbacks);
1960
1961 return is_congested(cache->origin_dev, bdi_bits) ||
1962 is_congested(cache->cache_dev, bdi_bits);
1963}
1964
1965/*----------------------------------------------------------------
1966 * Target methods
1967 *--------------------------------------------------------------*/
1968
1969/*
1970 * This function gets called on the error paths of the constructor, so we
1971 * have to cope with a partially initialised struct.
1972 */
1973static void destroy(struct cache *cache)
1974{
1975 unsigned i;
1976
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001977 if (cache->migration_pool)
1978 mempool_destroy(cache->migration_pool);
1979
1980 if (cache->all_io_ds)
1981 dm_deferred_set_destroy(cache->all_io_ds);
1982
1983 if (cache->prison)
1984 dm_bio_prison_destroy(cache->prison);
1985
1986 if (cache->wq)
1987 destroy_workqueue(cache->wq);
1988
1989 if (cache->dirty_bitset)
1990 free_bitset(cache->dirty_bitset);
1991
1992 if (cache->discard_bitset)
1993 free_bitset(cache->discard_bitset);
1994
1995 if (cache->copier)
1996 dm_kcopyd_client_destroy(cache->copier);
1997
1998 if (cache->cmd)
1999 dm_cache_metadata_close(cache->cmd);
2000
2001 if (cache->metadata_dev)
2002 dm_put_device(cache->ti, cache->metadata_dev);
2003
2004 if (cache->origin_dev)
2005 dm_put_device(cache->ti, cache->origin_dev);
2006
2007 if (cache->cache_dev)
2008 dm_put_device(cache->ti, cache->cache_dev);
2009
2010 if (cache->policy)
2011 dm_cache_policy_destroy(cache->policy);
2012
2013 for (i = 0; i < cache->nr_ctr_args ; i++)
2014 kfree(cache->ctr_args[i]);
2015 kfree(cache->ctr_args);
2016
2017 kfree(cache);
2018}
2019
2020static void cache_dtr(struct dm_target *ti)
2021{
2022 struct cache *cache = ti->private;
2023
2024 destroy(cache);
2025}
2026
2027static sector_t get_dev_size(struct dm_dev *dev)
2028{
2029 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
2030}
2031
2032/*----------------------------------------------------------------*/
2033
2034/*
2035 * Construct a cache device mapping.
2036 *
2037 * cache <metadata dev> <cache dev> <origin dev> <block size>
2038 * <#feature args> [<feature arg>]*
2039 * <policy> <#policy args> [<policy arg>]*
2040 *
2041 * metadata dev : fast device holding the persistent metadata
2042 * cache dev : fast device holding cached data blocks
2043 * origin dev : slow device holding original data blocks
2044 * block size : cache unit size in sectors
2045 *
2046 * #feature args : number of feature arguments passed
2047 * feature args : writethrough. (The default is writeback.)
2048 *
2049 * policy : the replacement policy to use
2050 * #policy args : an even number of policy arguments corresponding
2051 * to key/value pairs passed to the policy
2052 * policy args : key/value pairs passed to the policy
2053 * E.g. 'sequential_threshold 1024'
2054 * See cache-policies.txt for details.
2055 *
2056 * Optional feature arguments are:
2057 * writethrough : write through caching that prohibits cache block
2058 * content from being different from origin block content.
2059 * Without this argument, the default behaviour is to write
2060 * back cache block contents later for performance reasons,
2061 * so they may differ from the corresponding origin blocks.
2062 */
2063struct cache_args {
2064 struct dm_target *ti;
2065
2066 struct dm_dev *metadata_dev;
2067
2068 struct dm_dev *cache_dev;
2069 sector_t cache_sectors;
2070
2071 struct dm_dev *origin_dev;
2072 sector_t origin_sectors;
2073
2074 uint32_t block_size;
2075
2076 const char *policy_name;
2077 int policy_argc;
2078 const char **policy_argv;
2079
2080 struct cache_features features;
2081};
2082
2083static void destroy_cache_args(struct cache_args *ca)
2084{
2085 if (ca->metadata_dev)
2086 dm_put_device(ca->ti, ca->metadata_dev);
2087
2088 if (ca->cache_dev)
2089 dm_put_device(ca->ti, ca->cache_dev);
2090
2091 if (ca->origin_dev)
2092 dm_put_device(ca->ti, ca->origin_dev);
2093
2094 kfree(ca);
2095}
2096
2097static bool at_least_one_arg(struct dm_arg_set *as, char **error)
2098{
2099 if (!as->argc) {
2100 *error = "Insufficient args";
2101 return false;
2102 }
2103
2104 return true;
2105}
2106
2107static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,
2108 char **error)
2109{
2110 int r;
2111 sector_t metadata_dev_size;
2112 char b[BDEVNAME_SIZE];
2113
2114 if (!at_least_one_arg(as, error))
2115 return -EINVAL;
2116
2117 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2118 &ca->metadata_dev);
2119 if (r) {
2120 *error = "Error opening metadata device";
2121 return r;
2122 }
2123
2124 metadata_dev_size = get_dev_size(ca->metadata_dev);
2125 if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING)
2126 DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
2127 bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS);
2128
2129 return 0;
2130}
2131
2132static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as,
2133 char **error)
2134{
2135 int r;
2136
2137 if (!at_least_one_arg(as, error))
2138 return -EINVAL;
2139
2140 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2141 &ca->cache_dev);
2142 if (r) {
2143 *error = "Error opening cache device";
2144 return r;
2145 }
2146 ca->cache_sectors = get_dev_size(ca->cache_dev);
2147
2148 return 0;
2149}
2150
2151static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
2152 char **error)
2153{
2154 int r;
2155
2156 if (!at_least_one_arg(as, error))
2157 return -EINVAL;
2158
2159 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2160 &ca->origin_dev);
2161 if (r) {
2162 *error = "Error opening origin device";
2163 return r;
2164 }
2165
2166 ca->origin_sectors = get_dev_size(ca->origin_dev);
2167 if (ca->ti->len > ca->origin_sectors) {
2168 *error = "Device size larger than cached device";
2169 return -EINVAL;
2170 }
2171
2172 return 0;
2173}
2174
2175static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as,
2176 char **error)
2177{
Mike Snitzer05473042013-08-16 10:54:19 -04002178 unsigned long block_size;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002179
2180 if (!at_least_one_arg(as, error))
2181 return -EINVAL;
2182
Mike Snitzer05473042013-08-16 10:54:19 -04002183 if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size ||
2184 block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
2185 block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
2186 block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002187 *error = "Invalid data block size";
2188 return -EINVAL;
2189 }
2190
Mike Snitzer05473042013-08-16 10:54:19 -04002191 if (block_size > ca->cache_sectors) {
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002192 *error = "Data block size is larger than the cache device";
2193 return -EINVAL;
2194 }
2195
Mike Snitzer05473042013-08-16 10:54:19 -04002196 ca->block_size = block_size;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002197
2198 return 0;
2199}
2200
2201static void init_features(struct cache_features *cf)
2202{
2203 cf->mode = CM_WRITE;
Joe Thornber2ee57d52013-10-24 14:10:29 -04002204 cf->io_mode = CM_IO_WRITEBACK;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002205}
2206
2207static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
2208 char **error)
2209{
2210 static struct dm_arg _args[] = {
2211 {0, 1, "Invalid number of cache feature arguments"},
2212 };
2213
2214 int r;
2215 unsigned argc;
2216 const char *arg;
2217 struct cache_features *cf = &ca->features;
2218
2219 init_features(cf);
2220
2221 r = dm_read_arg_group(_args, as, &argc, error);
2222 if (r)
2223 return -EINVAL;
2224
2225 while (argc--) {
2226 arg = dm_shift_arg(as);
2227
2228 if (!strcasecmp(arg, "writeback"))
Joe Thornber2ee57d52013-10-24 14:10:29 -04002229 cf->io_mode = CM_IO_WRITEBACK;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002230
2231 else if (!strcasecmp(arg, "writethrough"))
Joe Thornber2ee57d52013-10-24 14:10:29 -04002232 cf->io_mode = CM_IO_WRITETHROUGH;
2233
2234 else if (!strcasecmp(arg, "passthrough"))
2235 cf->io_mode = CM_IO_PASSTHROUGH;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002236
2237 else {
2238 *error = "Unrecognised cache feature requested";
2239 return -EINVAL;
2240 }
2241 }
2242
2243 return 0;
2244}
2245
2246static int parse_policy(struct cache_args *ca, struct dm_arg_set *as,
2247 char **error)
2248{
2249 static struct dm_arg _args[] = {
2250 {0, 1024, "Invalid number of policy arguments"},
2251 };
2252
2253 int r;
2254
2255 if (!at_least_one_arg(as, error))
2256 return -EINVAL;
2257
2258 ca->policy_name = dm_shift_arg(as);
2259
2260 r = dm_read_arg_group(_args, as, &ca->policy_argc, error);
2261 if (r)
2262 return -EINVAL;
2263
2264 ca->policy_argv = (const char **)as->argv;
2265 dm_consume_args(as, ca->policy_argc);
2266
2267 return 0;
2268}
2269
2270static int parse_cache_args(struct cache_args *ca, int argc, char **argv,
2271 char **error)
2272{
2273 int r;
2274 struct dm_arg_set as;
2275
2276 as.argc = argc;
2277 as.argv = argv;
2278
2279 r = parse_metadata_dev(ca, &as, error);
2280 if (r)
2281 return r;
2282
2283 r = parse_cache_dev(ca, &as, error);
2284 if (r)
2285 return r;
2286
2287 r = parse_origin_dev(ca, &as, error);
2288 if (r)
2289 return r;
2290
2291 r = parse_block_size(ca, &as, error);
2292 if (r)
2293 return r;
2294
2295 r = parse_features(ca, &as, error);
2296 if (r)
2297 return r;
2298
2299 r = parse_policy(ca, &as, error);
2300 if (r)
2301 return r;
2302
2303 return 0;
2304}
2305
2306/*----------------------------------------------------------------*/
2307
2308static struct kmem_cache *migration_cache;
2309
Alasdair G Kergon2c73c472013-05-10 14:37:21 +01002310#define NOT_CORE_OPTION 1
2311
Joe Thornber2f14f4b2013-05-10 14:37:21 +01002312static int process_config_option(struct cache *cache, const char *key, const char *value)
Alasdair G Kergon2c73c472013-05-10 14:37:21 +01002313{
2314 unsigned long tmp;
2315
Joe Thornber2f14f4b2013-05-10 14:37:21 +01002316 if (!strcasecmp(key, "migration_threshold")) {
2317 if (kstrtoul(value, 10, &tmp))
Alasdair G Kergon2c73c472013-05-10 14:37:21 +01002318 return -EINVAL;
2319
2320 cache->migration_threshold = tmp;
2321 return 0;
2322 }
2323
2324 return NOT_CORE_OPTION;
2325}
2326
Joe Thornber2f14f4b2013-05-10 14:37:21 +01002327static int set_config_value(struct cache *cache, const char *key, const char *value)
2328{
2329 int r = process_config_option(cache, key, value);
2330
2331 if (r == NOT_CORE_OPTION)
2332 r = policy_set_config_value(cache->policy, key, value);
2333
2334 if (r)
2335 DMWARN("bad config value for %s: %s", key, value);
2336
2337 return r;
2338}
2339
2340static int set_config_values(struct cache *cache, int argc, const char **argv)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002341{
2342 int r = 0;
2343
2344 if (argc & 1) {
2345 DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs.");
2346 return -EINVAL;
2347 }
2348
2349 while (argc) {
Joe Thornber2f14f4b2013-05-10 14:37:21 +01002350 r = set_config_value(cache, argv[0], argv[1]);
2351 if (r)
2352 break;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002353
2354 argc -= 2;
2355 argv += 2;
2356 }
2357
2358 return r;
2359}
2360
2361static int create_cache_policy(struct cache *cache, struct cache_args *ca,
2362 char **error)
2363{
Mikulas Patocka4cb3e1d2013-10-01 18:35:39 -04002364 struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name,
2365 cache->cache_size,
2366 cache->origin_sectors,
2367 cache->sectors_per_block);
2368 if (IS_ERR(p)) {
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002369 *error = "Error creating cache's policy";
Mikulas Patocka4cb3e1d2013-10-01 18:35:39 -04002370 return PTR_ERR(p);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002371 }
Mikulas Patocka4cb3e1d2013-10-01 18:35:39 -04002372 cache->policy = p;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002373
Joe Thornber2f14f4b2013-05-10 14:37:21 +01002374 return 0;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002375}
2376
Joe Thornber08b18452014-11-06 14:38:01 +00002377/*
Joe Thornber2bb812d2014-11-26 16:07:50 +00002378 * We want the discard block size to be at least the size of the cache
2379 * block size and have no more than 2^14 discard blocks across the origin.
Joe Thornber08b18452014-11-06 14:38:01 +00002380 */
2381#define MAX_DISCARD_BLOCKS (1 << 14)
2382
2383static bool too_many_discard_blocks(sector_t discard_block_size,
2384 sector_t origin_size)
2385{
2386 (void) sector_div(origin_size, discard_block_size);
2387
2388 return origin_size > MAX_DISCARD_BLOCKS;
2389}
2390
2391static sector_t calculate_discard_block_size(sector_t cache_block_size,
2392 sector_t origin_size)
2393{
Joe Thornber2bb812d2014-11-26 16:07:50 +00002394 sector_t discard_block_size = cache_block_size;
Joe Thornber08b18452014-11-06 14:38:01 +00002395
2396 if (origin_size)
2397 while (too_many_discard_blocks(discard_block_size, origin_size))
2398 discard_block_size *= 2;
2399
2400 return discard_block_size;
2401}
2402
Joe Thornberd1d92202014-11-11 11:58:32 +00002403static void set_cache_size(struct cache *cache, dm_cblock_t size)
2404{
2405 dm_block_t nr_blocks = from_cblock(size);
2406
2407 if (nr_blocks > (1 << 20) && cache->cache_size != size)
2408 DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n"
2409 "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n"
2410 "Please consider increasing the cache block size to reduce the overall cache block count.",
2411 (unsigned long long) nr_blocks);
2412
2413 cache->cache_size = size;
2414}
2415
Joe Thornberf8350da2013-05-10 14:37:16 +01002416#define DEFAULT_MIGRATION_THRESHOLD 2048
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002417
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002418static int cache_create(struct cache_args *ca, struct cache **result)
2419{
2420 int r = 0;
2421 char **error = &ca->ti->error;
2422 struct cache *cache;
2423 struct dm_target *ti = ca->ti;
2424 dm_block_t origin_blocks;
2425 struct dm_cache_metadata *cmd;
2426 bool may_format = ca->features.mode == CM_WRITE;
2427
2428 cache = kzalloc(sizeof(*cache), GFP_KERNEL);
2429 if (!cache)
2430 return -ENOMEM;
2431
2432 cache->ti = ca->ti;
2433 ti->private = cache;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002434 ti->num_flush_bios = 2;
2435 ti->flush_supported = true;
2436
2437 ti->num_discard_bios = 1;
2438 ti->discards_supported = true;
2439 ti->discard_zeroes_data_unsupported = true;
Joe Thornber25726292014-11-24 14:05:16 +00002440 ti->split_discard_bios = false;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002441
Joe Thornber8c5008f2013-05-10 14:37:18 +01002442 cache->features = ca->features;
Mike Snitzer19b00922013-04-05 15:36:34 +01002443 ti->per_bio_data_size = get_per_bio_data_size(cache);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002444
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002445 cache->callbacks.congested_fn = cache_is_congested;
2446 dm_table_add_target_callbacks(ti->table, &cache->callbacks);
2447
2448 cache->metadata_dev = ca->metadata_dev;
2449 cache->origin_dev = ca->origin_dev;
2450 cache->cache_dev = ca->cache_dev;
2451
2452 ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL;
2453
2454 /* FIXME: factor out this whole section */
2455 origin_blocks = cache->origin_sectors = ca->origin_sectors;
Joe Thornber414dd672013-03-20 17:21:25 +00002456 origin_blocks = block_div(origin_blocks, ca->block_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002457 cache->origin_blocks = to_oblock(origin_blocks);
2458
2459 cache->sectors_per_block = ca->block_size;
2460 if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) {
2461 r = -EINVAL;
2462 goto bad;
2463 }
2464
2465 if (ca->block_size & (ca->block_size - 1)) {
2466 dm_block_t cache_size = ca->cache_sectors;
2467
2468 cache->sectors_per_block_shift = -1;
Joe Thornber414dd672013-03-20 17:21:25 +00002469 cache_size = block_div(cache_size, ca->block_size);
Joe Thornberd1d92202014-11-11 11:58:32 +00002470 set_cache_size(cache, to_cblock(cache_size));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002471 } else {
2472 cache->sectors_per_block_shift = __ffs(ca->block_size);
Joe Thornberd1d92202014-11-11 11:58:32 +00002473 set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002474 }
2475
2476 r = create_cache_policy(cache, ca, error);
2477 if (r)
2478 goto bad;
Joe Thornber2f14f4b2013-05-10 14:37:21 +01002479
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002480 cache->policy_nr_args = ca->policy_argc;
Joe Thornber2f14f4b2013-05-10 14:37:21 +01002481 cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD;
2482
2483 r = set_config_values(cache, ca->policy_argc, ca->policy_argv);
2484 if (r) {
2485 *error = "Error setting cache policy's config values";
2486 goto bad;
2487 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002488
2489 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev,
2490 ca->block_size, may_format,
2491 dm_cache_policy_get_hint_size(cache->policy));
2492 if (IS_ERR(cmd)) {
2493 *error = "Error creating metadata object";
2494 r = PTR_ERR(cmd);
2495 goto bad;
2496 }
2497 cache->cmd = cmd;
2498
Joe Thornber2ee57d52013-10-24 14:10:29 -04002499 if (passthrough_mode(&cache->features)) {
2500 bool all_clean;
2501
2502 r = dm_cache_metadata_all_clean(cache->cmd, &all_clean);
2503 if (r) {
2504 *error = "dm_cache_metadata_all_clean() failed";
2505 goto bad;
2506 }
2507
2508 if (!all_clean) {
2509 *error = "Cannot enter passthrough mode unless all blocks are clean";
2510 r = -EINVAL;
2511 goto bad;
2512 }
2513 }
2514
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002515 spin_lock_init(&cache->lock);
2516 bio_list_init(&cache->deferred_bios);
2517 bio_list_init(&cache->deferred_flush_bios);
Joe Thornbere2e74d62013-03-20 17:21:27 +00002518 bio_list_init(&cache->deferred_writethrough_bios);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002519 INIT_LIST_HEAD(&cache->quiesced_migrations);
2520 INIT_LIST_HEAD(&cache->completed_migrations);
2521 INIT_LIST_HEAD(&cache->need_commit_migrations);
Joe Thornbera59db672015-01-23 10:16:16 +00002522 atomic_set(&cache->nr_allocated_migrations, 0);
2523 atomic_set(&cache->nr_io_migrations, 0);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002524 init_waitqueue_head(&cache->migration_wait);
2525
Joe Thornber66cb1912013-10-30 17:11:58 +00002526 init_waitqueue_head(&cache->quiescing_wait);
Joe Thornber238f8362013-10-30 17:29:30 +00002527 atomic_set(&cache->quiescing, 0);
Joe Thornber66cb1912013-10-30 17:11:58 +00002528 atomic_set(&cache->quiescing_ack, 0);
2529
Wei Yongjunfa4d6832013-05-10 14:37:14 +01002530 r = -ENOMEM;
Anssi Hannula44fa8162014-08-01 11:55:47 -04002531 atomic_set(&cache->nr_dirty, 0);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002532 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size));
2533 if (!cache->dirty_bitset) {
2534 *error = "could not allocate dirty bitset";
2535 goto bad;
2536 }
2537 clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size));
2538
Joe Thornber08b18452014-11-06 14:38:01 +00002539 cache->discard_block_size =
2540 calculate_discard_block_size(cache->sectors_per_block,
2541 cache->origin_sectors);
Joe Thornber25726292014-11-24 14:05:16 +00002542 cache->discard_nr_blocks = to_dblock(dm_sector_div_up(cache->origin_sectors,
2543 cache->discard_block_size));
Joe Thornber1bad9bc2014-11-07 14:47:07 +00002544 cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002545 if (!cache->discard_bitset) {
2546 *error = "could not allocate discard bitset";
2547 goto bad;
2548 }
Joe Thornber1bad9bc2014-11-07 14:47:07 +00002549 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002550
2551 cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
2552 if (IS_ERR(cache->copier)) {
2553 *error = "could not create kcopyd client";
2554 r = PTR_ERR(cache->copier);
2555 goto bad;
2556 }
2557
2558 cache->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
2559 if (!cache->wq) {
2560 *error = "could not create workqueue for metadata object";
2561 goto bad;
2562 }
2563 INIT_WORK(&cache->worker, do_worker);
2564 INIT_DELAYED_WORK(&cache->waker, do_waker);
2565 cache->last_commit_jiffies = jiffies;
2566
Joe Thornbera195db22014-10-06 16:30:06 -04002567 cache->prison = dm_bio_prison_create();
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002568 if (!cache->prison) {
2569 *error = "could not create bio prison";
2570 goto bad;
2571 }
2572
2573 cache->all_io_ds = dm_deferred_set_create();
2574 if (!cache->all_io_ds) {
2575 *error = "could not create all_io deferred set";
2576 goto bad;
2577 }
2578
2579 cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE,
2580 migration_cache);
2581 if (!cache->migration_pool) {
2582 *error = "Error creating cache's migration mempool";
2583 goto bad;
2584 }
2585
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002586 cache->need_tick_bio = true;
2587 cache->sized = false;
Joe Thornber65790ff2013-11-08 16:39:50 +00002588 cache->invalidate = false;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002589 cache->commit_requested = false;
2590 cache->loaded_mappings = false;
2591 cache->loaded_discards = false;
2592
2593 load_stats(cache);
2594
2595 atomic_set(&cache->stats.demotion, 0);
2596 atomic_set(&cache->stats.promotion, 0);
2597 atomic_set(&cache->stats.copies_avoided, 0);
2598 atomic_set(&cache->stats.cache_cell_clash, 0);
2599 atomic_set(&cache->stats.commit_count, 0);
2600 atomic_set(&cache->stats.discard_count, 0);
2601
Joe Thornber65790ff2013-11-08 16:39:50 +00002602 spin_lock_init(&cache->invalidation_lock);
2603 INIT_LIST_HEAD(&cache->invalidation_requests);
2604
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002605 *result = cache;
2606 return 0;
2607
2608bad:
2609 destroy(cache);
2610 return r;
2611}
2612
2613static int copy_ctr_args(struct cache *cache, int argc, const char **argv)
2614{
2615 unsigned i;
2616 const char **copy;
2617
2618 copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL);
2619 if (!copy)
2620 return -ENOMEM;
2621 for (i = 0; i < argc; i++) {
2622 copy[i] = kstrdup(argv[i], GFP_KERNEL);
2623 if (!copy[i]) {
2624 while (i--)
2625 kfree(copy[i]);
2626 kfree(copy);
2627 return -ENOMEM;
2628 }
2629 }
2630
2631 cache->nr_ctr_args = argc;
2632 cache->ctr_args = copy;
2633
2634 return 0;
2635}
2636
2637static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv)
2638{
2639 int r = -EINVAL;
2640 struct cache_args *ca;
2641 struct cache *cache = NULL;
2642
2643 ca = kzalloc(sizeof(*ca), GFP_KERNEL);
2644 if (!ca) {
2645 ti->error = "Error allocating memory for cache";
2646 return -ENOMEM;
2647 }
2648 ca->ti = ti;
2649
2650 r = parse_cache_args(ca, argc, argv, &ti->error);
2651 if (r)
2652 goto out;
2653
2654 r = cache_create(ca, &cache);
Heinz Mauelshagen617a0b82013-03-20 17:21:26 +00002655 if (r)
2656 goto out;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002657
2658 r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3);
2659 if (r) {
2660 destroy(cache);
2661 goto out;
2662 }
2663
2664 ti->private = cache;
2665
2666out:
2667 destroy_cache_args(ca);
2668 return r;
2669}
2670
Joe Thornber8c081b52014-05-13 16:18:38 +01002671static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell **cell)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002672{
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002673 int r;
2674 dm_oblock_t block = get_bio_block(cache, bio);
Mike Snitzer19b00922013-04-05 15:36:34 +01002675 size_t pb_data_size = get_per_bio_data_size(cache);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002676 bool can_migrate = false;
2677 bool discarded_block;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002678 struct policy_result lookup_result;
Heinz Mauelshagene893fba2014-03-12 16:13:39 +01002679 struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size);
Joe Thornberfb4100a2015-05-20 10:30:32 +01002680 struct old_oblock_lock ool;
2681
2682 ool.locker.fn = null_locker;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002683
Heinz Mauelshagene893fba2014-03-12 16:13:39 +01002684 if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) {
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002685 /*
2686 * This can only occur if the io goes to a partial block at
2687 * the end of the origin device. We don't cache these.
2688 * Just remap to the origin and carry on.
2689 */
Heinz Mauelshagene893fba2014-03-12 16:13:39 +01002690 remap_to_origin(cache, bio);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002691 return DM_MAPIO_REMAPPED;
2692 }
2693
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002694 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD)) {
2695 defer_bio(cache, bio);
2696 return DM_MAPIO_SUBMITTED;
2697 }
2698
2699 /*
2700 * Check to see if that block is currently migrating.
2701 */
Joe Thornber8c081b52014-05-13 16:18:38 +01002702 *cell = alloc_prison_cell(cache);
2703 if (!*cell) {
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002704 defer_bio(cache, bio);
2705 return DM_MAPIO_SUBMITTED;
2706 }
2707
Joe Thornber8c081b52014-05-13 16:18:38 +01002708 r = bio_detain(cache, block, bio, *cell,
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002709 (cell_free_fn) free_prison_cell,
Joe Thornber8c081b52014-05-13 16:18:38 +01002710 cache, cell);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002711 if (r) {
2712 if (r < 0)
2713 defer_bio(cache, bio);
2714
2715 return DM_MAPIO_SUBMITTED;
2716 }
2717
2718 discarded_block = is_discarded_oblock(cache, block);
2719
2720 r = policy_map(cache->policy, block, false, can_migrate, discarded_block,
Joe Thornberfb4100a2015-05-20 10:30:32 +01002721 bio, &ool.locker, &lookup_result);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002722 if (r == -EWOULDBLOCK) {
Joe Thornber8c081b52014-05-13 16:18:38 +01002723 cell_defer(cache, *cell, true);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002724 return DM_MAPIO_SUBMITTED;
2725
2726 } else if (r) {
2727 DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r);
Joe Thornber8c081b52014-05-13 16:18:38 +01002728 cell_defer(cache, *cell, false);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002729 bio_io_error(bio);
2730 return DM_MAPIO_SUBMITTED;
2731 }
2732
Joe Thornber2ee57d52013-10-24 14:10:29 -04002733 r = DM_MAPIO_REMAPPED;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002734 switch (lookup_result.op) {
2735 case POLICY_HIT:
Joe Thornber2ee57d52013-10-24 14:10:29 -04002736 if (passthrough_mode(&cache->features)) {
2737 if (bio_data_dir(bio) == WRITE) {
2738 /*
2739 * We need to invalidate this block, so
2740 * defer for the worker thread.
2741 */
Joe Thornber8c081b52014-05-13 16:18:38 +01002742 cell_defer(cache, *cell, true);
Joe Thornber2ee57d52013-10-24 14:10:29 -04002743 r = DM_MAPIO_SUBMITTED;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002744
Joe Thornber2ee57d52013-10-24 14:10:29 -04002745 } else {
Joe Thornber2ee57d52013-10-24 14:10:29 -04002746 inc_miss_counter(cache, bio);
2747 remap_to_origin_clear_discard(cache, bio, block);
Joe Thornber2ee57d52013-10-24 14:10:29 -04002748 }
2749
2750 } else {
2751 inc_hit_counter(cache, bio);
Joe Thornber2ee57d52013-10-24 14:10:29 -04002752 if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
2753 !is_dirty(cache, lookup_result.cblock))
2754 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
2755 else
2756 remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
Joe Thornber2ee57d52013-10-24 14:10:29 -04002757 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002758 break;
2759
2760 case POLICY_MISS:
2761 inc_miss_counter(cache, bio);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002762 if (pb->req_nr != 0) {
2763 /*
2764 * This is a duplicate writethrough io that is no
2765 * longer needed because the block has been demoted.
2766 */
2767 bio_endio(bio, 0);
Joe Thornber8c081b52014-05-13 16:18:38 +01002768 cell_defer(cache, *cell, false);
2769 r = DM_MAPIO_SUBMITTED;
2770
2771 } else
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002772 remap_to_origin_clear_discard(cache, bio, block);
Joe Thornber8c081b52014-05-13 16:18:38 +01002773
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002774 break;
2775
2776 default:
2777 DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__,
2778 (unsigned) lookup_result.op);
Joe Thornber8c081b52014-05-13 16:18:38 +01002779 cell_defer(cache, *cell, false);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002780 bio_io_error(bio);
Joe Thornber2ee57d52013-10-24 14:10:29 -04002781 r = DM_MAPIO_SUBMITTED;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002782 }
2783
Joe Thornber2ee57d52013-10-24 14:10:29 -04002784 return r;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002785}
2786
Joe Thornber8c081b52014-05-13 16:18:38 +01002787static int cache_map(struct dm_target *ti, struct bio *bio)
2788{
2789 int r;
Joe Thornberf824a2a2014-11-28 09:48:25 +00002790 struct dm_bio_prison_cell *cell = NULL;
Joe Thornber8c081b52014-05-13 16:18:38 +01002791 struct cache *cache = ti->private;
2792
2793 r = __cache_map(cache, bio, &cell);
Joe Thornberf824a2a2014-11-28 09:48:25 +00002794 if (r == DM_MAPIO_REMAPPED && cell) {
Joe Thornber8c081b52014-05-13 16:18:38 +01002795 inc_ds(cache, bio, cell);
2796 cell_defer(cache, cell, false);
2797 }
2798
2799 return r;
2800}
2801
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002802static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
2803{
2804 struct cache *cache = ti->private;
2805 unsigned long flags;
Mike Snitzer19b00922013-04-05 15:36:34 +01002806 size_t pb_data_size = get_per_bio_data_size(cache);
2807 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002808
2809 if (pb->tick) {
2810 policy_tick(cache->policy);
2811
2812 spin_lock_irqsave(&cache->lock, flags);
2813 cache->need_tick_bio = true;
2814 spin_unlock_irqrestore(&cache->lock, flags);
2815 }
2816
2817 check_for_quiesced_migrations(cache, pb);
2818
2819 return 0;
2820}
2821
2822static int write_dirty_bitset(struct cache *cache)
2823{
2824 unsigned i, r;
2825
2826 for (i = 0; i < from_cblock(cache->cache_size); i++) {
2827 r = dm_cache_set_dirty(cache->cmd, to_cblock(i),
2828 is_dirty(cache, to_cblock(i)));
2829 if (r)
2830 return r;
2831 }
2832
2833 return 0;
2834}
2835
2836static int write_discard_bitset(struct cache *cache)
2837{
2838 unsigned i, r;
2839
Joe Thornber1bad9bc2014-11-07 14:47:07 +00002840 r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
2841 cache->discard_nr_blocks);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002842 if (r) {
2843 DMERR("could not resize on-disk discard bitset");
2844 return r;
2845 }
2846
Joe Thornber1bad9bc2014-11-07 14:47:07 +00002847 for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
2848 r = dm_cache_set_discard(cache->cmd, to_dblock(i),
2849 is_discarded(cache, to_dblock(i)));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002850 if (r)
2851 return r;
2852 }
2853
2854 return 0;
2855}
2856
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002857/*
2858 * returns true on success
2859 */
2860static bool sync_metadata(struct cache *cache)
2861{
2862 int r1, r2, r3, r4;
2863
2864 r1 = write_dirty_bitset(cache);
2865 if (r1)
2866 DMERR("could not write dirty bitset");
2867
2868 r2 = write_discard_bitset(cache);
2869 if (r2)
2870 DMERR("could not write discard bitset");
2871
2872 save_stats(cache);
2873
Joe Thornber05966612014-04-03 16:16:44 +01002874 r3 = dm_cache_write_hints(cache->cmd, cache->policy);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002875 if (r3)
2876 DMERR("could not write hints");
2877
2878 /*
2879 * If writing the above metadata failed, we still commit, but don't
2880 * set the clean shutdown flag. This will effectively force every
2881 * dirty bit to be set on reload.
2882 */
2883 r4 = dm_cache_commit(cache->cmd, !r1 && !r2 && !r3);
2884 if (r4)
2885 DMERR("could not write cache metadata. Data loss may occur.");
2886
2887 return !r1 && !r2 && !r3 && !r4;
2888}
2889
2890static void cache_postsuspend(struct dm_target *ti)
2891{
2892 struct cache *cache = ti->private;
2893
2894 start_quiescing(cache);
2895 wait_for_migrations(cache);
2896 stop_worker(cache);
2897 requeue_deferred_io(cache);
2898 stop_quiescing(cache);
2899
2900 (void) sync_metadata(cache);
2901}
2902
2903static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
2904 bool dirty, uint32_t hint, bool hint_valid)
2905{
2906 int r;
2907 struct cache *cache = context;
2908
2909 r = policy_load_mapping(cache->policy, oblock, cblock, hint, hint_valid);
2910 if (r)
2911 return r;
2912
2913 if (dirty)
2914 set_dirty(cache, oblock, cblock);
2915 else
2916 clear_dirty(cache, oblock, cblock);
2917
2918 return 0;
2919}
2920
Joe Thornber3e2e1c32014-11-24 14:06:22 +00002921/*
2922 * The discard block size in the on disk metadata is not
2923 * neccessarily the same as we're currently using. So we have to
2924 * be careful to only set the discarded attribute if we know it
2925 * covers a complete block of the new size.
2926 */
2927struct discard_load_info {
2928 struct cache *cache;
2929
2930 /*
2931 * These blocks are sized using the on disk dblock size, rather
2932 * than the current one.
2933 */
2934 dm_block_t block_size;
2935 dm_block_t discard_begin, discard_end;
2936};
2937
2938static void discard_load_info_init(struct cache *cache,
2939 struct discard_load_info *li)
2940{
2941 li->cache = cache;
2942 li->discard_begin = li->discard_end = 0;
2943}
2944
2945static void set_discard_range(struct discard_load_info *li)
2946{
2947 sector_t b, e;
2948
2949 if (li->discard_begin == li->discard_end)
2950 return;
2951
2952 /*
2953 * Convert to sectors.
2954 */
2955 b = li->discard_begin * li->block_size;
2956 e = li->discard_end * li->block_size;
2957
2958 /*
2959 * Then convert back to the current dblock size.
2960 */
2961 b = dm_sector_div_up(b, li->cache->discard_block_size);
2962 sector_div(e, li->cache->discard_block_size);
2963
2964 /*
2965 * The origin may have shrunk, so we need to check we're still in
2966 * bounds.
2967 */
2968 if (e > from_dblock(li->cache->discard_nr_blocks))
2969 e = from_dblock(li->cache->discard_nr_blocks);
2970
2971 for (; b < e; b++)
2972 set_discard(li->cache, to_dblock(b));
2973}
2974
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002975static int load_discard(void *context, sector_t discard_block_size,
Joe Thornber1bad9bc2014-11-07 14:47:07 +00002976 dm_dblock_t dblock, bool discard)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002977{
Joe Thornber3e2e1c32014-11-24 14:06:22 +00002978 struct discard_load_info *li = context;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002979
Joe Thornber3e2e1c32014-11-24 14:06:22 +00002980 li->block_size = discard_block_size;
Joe Thornber1bad9bc2014-11-07 14:47:07 +00002981
Joe Thornber3e2e1c32014-11-24 14:06:22 +00002982 if (discard) {
2983 if (from_dblock(dblock) == li->discard_end)
2984 /*
2985 * We're already in a discard range, just extend it.
2986 */
2987 li->discard_end = li->discard_end + 1ULL;
2988
2989 else {
2990 /*
2991 * Emit the old range and start a new one.
2992 */
2993 set_discard_range(li);
2994 li->discard_begin = from_dblock(dblock);
2995 li->discard_end = li->discard_begin + 1ULL;
2996 }
2997 } else {
2998 set_discard_range(li);
2999 li->discard_begin = li->discard_end = 0;
3000 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003001
3002 return 0;
3003}
3004
Joe Thornberf494a9c2013-10-31 13:55:49 -04003005static dm_cblock_t get_cache_dev_size(struct cache *cache)
3006{
3007 sector_t size = get_dev_size(cache->cache_dev);
3008 (void) sector_div(size, cache->sectors_per_block);
3009 return to_cblock(size);
3010}
3011
3012static bool can_resize(struct cache *cache, dm_cblock_t new_size)
3013{
3014 if (from_cblock(new_size) > from_cblock(cache->cache_size))
3015 return true;
3016
3017 /*
3018 * We can't drop a dirty block when shrinking the cache.
3019 */
3020 while (from_cblock(new_size) < from_cblock(cache->cache_size)) {
3021 new_size = to_cblock(from_cblock(new_size) + 1);
3022 if (is_dirty(cache, new_size)) {
3023 DMERR("unable to shrink cache; cache block %llu is dirty",
3024 (unsigned long long) from_cblock(new_size));
3025 return false;
3026 }
3027 }
3028
3029 return true;
3030}
3031
3032static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size)
3033{
3034 int r;
3035
Vincent Pelletier08844802013-11-30 12:58:42 +01003036 r = dm_cache_resize(cache->cmd, new_size);
Joe Thornberf494a9c2013-10-31 13:55:49 -04003037 if (r) {
3038 DMERR("could not resize cache metadata");
3039 return r;
3040 }
3041
Joe Thornberd1d92202014-11-11 11:58:32 +00003042 set_cache_size(cache, new_size);
Joe Thornberf494a9c2013-10-31 13:55:49 -04003043
3044 return 0;
3045}
3046
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003047static int cache_preresume(struct dm_target *ti)
3048{
3049 int r = 0;
3050 struct cache *cache = ti->private;
Joe Thornberf494a9c2013-10-31 13:55:49 -04003051 dm_cblock_t csize = get_cache_dev_size(cache);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003052
3053 /*
3054 * Check to see if the cache has resized.
3055 */
Joe Thornberf494a9c2013-10-31 13:55:49 -04003056 if (!cache->sized) {
3057 r = resize_cache_dev(cache, csize);
3058 if (r)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003059 return r;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003060
3061 cache->sized = true;
Joe Thornberf494a9c2013-10-31 13:55:49 -04003062
3063 } else if (csize != cache->cache_size) {
3064 if (!can_resize(cache, csize))
3065 return -EINVAL;
3066
3067 r = resize_cache_dev(cache, csize);
3068 if (r)
3069 return r;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003070 }
3071
3072 if (!cache->loaded_mappings) {
Mike Snitzerea2dd8c2013-03-20 17:21:28 +00003073 r = dm_cache_load_mappings(cache->cmd, cache->policy,
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003074 load_mapping, cache);
3075 if (r) {
3076 DMERR("could not load cache mappings");
3077 return r;
3078 }
3079
3080 cache->loaded_mappings = true;
3081 }
3082
3083 if (!cache->loaded_discards) {
Joe Thornber3e2e1c32014-11-24 14:06:22 +00003084 struct discard_load_info li;
3085
3086 /*
3087 * The discard bitset could have been resized, or the
3088 * discard block size changed. To be safe we start by
3089 * setting every dblock to not discarded.
3090 */
3091 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
3092
3093 discard_load_info_init(cache, &li);
3094 r = dm_cache_load_discards(cache->cmd, load_discard, &li);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003095 if (r) {
3096 DMERR("could not load origin discards");
3097 return r;
3098 }
Joe Thornber3e2e1c32014-11-24 14:06:22 +00003099 set_discard_range(&li);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003100
3101 cache->loaded_discards = true;
3102 }
3103
3104 return r;
3105}
3106
3107static void cache_resume(struct dm_target *ti)
3108{
3109 struct cache *cache = ti->private;
3110
3111 cache->need_tick_bio = true;
3112 do_waker(&cache->waker.work);
3113}
3114
3115/*
3116 * Status format:
3117 *
Mike Snitzer6a388612014-01-09 16:04:12 -05003118 * <metadata block size> <#used metadata blocks>/<#total metadata blocks>
3119 * <cache block size> <#used cache blocks>/<#total cache blocks>
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003120 * <#read hits> <#read misses> <#write hits> <#write misses>
Mike Snitzer6a388612014-01-09 16:04:12 -05003121 * <#demotions> <#promotions> <#dirty>
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003122 * <#features> <features>*
3123 * <#core args> <core args>
Mike Snitzer2e68c4e2014-01-15 21:06:55 -05003124 * <policy name> <#policy args> <policy args>*
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003125 */
3126static void cache_status(struct dm_target *ti, status_type_t type,
3127 unsigned status_flags, char *result, unsigned maxlen)
3128{
3129 int r = 0;
3130 unsigned i;
3131 ssize_t sz = 0;
3132 dm_block_t nr_free_blocks_metadata = 0;
3133 dm_block_t nr_blocks_metadata = 0;
3134 char buf[BDEVNAME_SIZE];
3135 struct cache *cache = ti->private;
3136 dm_cblock_t residency;
3137
3138 switch (type) {
3139 case STATUSTYPE_INFO:
3140 /* Commit to ensure statistics aren't out-of-date */
3141 if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) {
3142 r = dm_cache_commit(cache->cmd, false);
3143 if (r)
3144 DMERR("could not commit metadata for accurate status");
3145 }
3146
3147 r = dm_cache_get_free_metadata_block_count(cache->cmd,
3148 &nr_free_blocks_metadata);
3149 if (r) {
3150 DMERR("could not get metadata free block count");
3151 goto err;
3152 }
3153
3154 r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata);
3155 if (r) {
3156 DMERR("could not get metadata device size");
3157 goto err;
3158 }
3159
3160 residency = policy_residency(cache->policy);
3161
Anssi Hannula44fa8162014-08-01 11:55:47 -04003162 DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ",
Mike Snitzer895b47d2014-07-14 15:37:18 -04003163 (unsigned)DM_CACHE_METADATA_BLOCK_SIZE,
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003164 (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
3165 (unsigned long long)nr_blocks_metadata,
Mike Snitzer6a388612014-01-09 16:04:12 -05003166 cache->sectors_per_block,
3167 (unsigned long long) from_cblock(residency),
3168 (unsigned long long) from_cblock(cache->cache_size),
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003169 (unsigned) atomic_read(&cache->stats.read_hit),
3170 (unsigned) atomic_read(&cache->stats.read_miss),
3171 (unsigned) atomic_read(&cache->stats.write_hit),
3172 (unsigned) atomic_read(&cache->stats.write_miss),
3173 (unsigned) atomic_read(&cache->stats.demotion),
3174 (unsigned) atomic_read(&cache->stats.promotion),
Anssi Hannula44fa8162014-08-01 11:55:47 -04003175 (unsigned long) atomic_read(&cache->nr_dirty));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003176
Joe Thornber2ee57d52013-10-24 14:10:29 -04003177 if (writethrough_mode(&cache->features))
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003178 DMEMIT("1 writethrough ");
Joe Thornber2ee57d52013-10-24 14:10:29 -04003179
3180 else if (passthrough_mode(&cache->features))
3181 DMEMIT("1 passthrough ");
3182
3183 else if (writeback_mode(&cache->features))
3184 DMEMIT("1 writeback ");
3185
3186 else {
3187 DMERR("internal error: unknown io mode: %d", (int) cache->features.io_mode);
3188 goto err;
3189 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003190
3191 DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
Mike Snitzer2e68c4e2014-01-15 21:06:55 -05003192
3193 DMEMIT("%s ", dm_cache_policy_get_name(cache->policy));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003194 if (sz < maxlen) {
3195 r = policy_emit_config_values(cache->policy, result + sz, maxlen - sz);
3196 if (r)
3197 DMERR("policy_emit_config_values returned %d", r);
3198 }
3199
3200 break;
3201
3202 case STATUSTYPE_TABLE:
3203 format_dev_t(buf, cache->metadata_dev->bdev->bd_dev);
3204 DMEMIT("%s ", buf);
3205 format_dev_t(buf, cache->cache_dev->bdev->bd_dev);
3206 DMEMIT("%s ", buf);
3207 format_dev_t(buf, cache->origin_dev->bdev->bd_dev);
3208 DMEMIT("%s", buf);
3209
3210 for (i = 0; i < cache->nr_ctr_args - 1; i++)
3211 DMEMIT(" %s", cache->ctr_args[i]);
3212 if (cache->nr_ctr_args)
3213 DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]);
3214 }
3215
3216 return;
3217
3218err:
3219 DMEMIT("Error");
3220}
3221
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003222/*
Joe Thornber65790ff2013-11-08 16:39:50 +00003223 * A cache block range can take two forms:
3224 *
3225 * i) A single cblock, eg. '3456'
3226 * ii) A begin and end cblock with dots between, eg. 123-234
3227 */
3228static int parse_cblock_range(struct cache *cache, const char *str,
3229 struct cblock_range *result)
3230{
3231 char dummy;
3232 uint64_t b, e;
3233 int r;
3234
3235 /*
3236 * Try and parse form (ii) first.
3237 */
3238 r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy);
3239 if (r < 0)
3240 return r;
3241
3242 if (r == 2) {
3243 result->begin = to_cblock(b);
3244 result->end = to_cblock(e);
3245 return 0;
3246 }
3247
3248 /*
3249 * That didn't work, try form (i).
3250 */
3251 r = sscanf(str, "%llu%c", &b, &dummy);
3252 if (r < 0)
3253 return r;
3254
3255 if (r == 1) {
3256 result->begin = to_cblock(b);
3257 result->end = to_cblock(from_cblock(result->begin) + 1u);
3258 return 0;
3259 }
3260
3261 DMERR("invalid cblock range '%s'", str);
3262 return -EINVAL;
3263}
3264
3265static int validate_cblock_range(struct cache *cache, struct cblock_range *range)
3266{
3267 uint64_t b = from_cblock(range->begin);
3268 uint64_t e = from_cblock(range->end);
3269 uint64_t n = from_cblock(cache->cache_size);
3270
3271 if (b >= n) {
3272 DMERR("begin cblock out of range: %llu >= %llu", b, n);
3273 return -EINVAL;
3274 }
3275
3276 if (e > n) {
3277 DMERR("end cblock out of range: %llu > %llu", e, n);
3278 return -EINVAL;
3279 }
3280
3281 if (b >= e) {
3282 DMERR("invalid cblock range: %llu >= %llu", b, e);
3283 return -EINVAL;
3284 }
3285
3286 return 0;
3287}
3288
3289static int request_invalidation(struct cache *cache, struct cblock_range *range)
3290{
3291 struct invalidation_request req;
3292
3293 INIT_LIST_HEAD(&req.list);
3294 req.cblocks = range;
3295 atomic_set(&req.complete, 0);
3296 req.err = 0;
3297 init_waitqueue_head(&req.result_wait);
3298
3299 spin_lock(&cache->invalidation_lock);
3300 list_add(&req.list, &cache->invalidation_requests);
3301 spin_unlock(&cache->invalidation_lock);
3302 wake_worker(cache);
3303
3304 wait_event(req.result_wait, atomic_read(&req.complete));
3305 return req.err;
3306}
3307
3308static int process_invalidate_cblocks_message(struct cache *cache, unsigned count,
3309 const char **cblock_ranges)
3310{
3311 int r = 0;
3312 unsigned i;
3313 struct cblock_range range;
3314
3315 if (!passthrough_mode(&cache->features)) {
3316 DMERR("cache has to be in passthrough mode for invalidation");
3317 return -EPERM;
3318 }
3319
3320 for (i = 0; i < count; i++) {
3321 r = parse_cblock_range(cache, cblock_ranges[i], &range);
3322 if (r)
3323 break;
3324
3325 r = validate_cblock_range(cache, &range);
3326 if (r)
3327 break;
3328
3329 /*
3330 * Pass begin and end origin blocks to the worker and wake it.
3331 */
3332 r = request_invalidation(cache, &range);
3333 if (r)
3334 break;
3335 }
3336
3337 return r;
3338}
3339
3340/*
3341 * Supports
3342 * "<key> <value>"
3343 * and
3344 * "invalidate_cblocks [(<begin>)|(<begin>-<end>)]*
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003345 *
3346 * The key migration_threshold is supported by the cache target core.
3347 */
3348static int cache_message(struct dm_target *ti, unsigned argc, char **argv)
3349{
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003350 struct cache *cache = ti->private;
3351
Joe Thornber65790ff2013-11-08 16:39:50 +00003352 if (!argc)
3353 return -EINVAL;
3354
Mike Snitzer7b6b2bc2013-11-12 12:17:43 -05003355 if (!strcasecmp(argv[0], "invalidate_cblocks"))
Joe Thornber65790ff2013-11-08 16:39:50 +00003356 return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1);
3357
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003358 if (argc != 2)
3359 return -EINVAL;
3360
Joe Thornber2f14f4b2013-05-10 14:37:21 +01003361 return set_config_value(cache, argv[0], argv[1]);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003362}
3363
3364static int cache_iterate_devices(struct dm_target *ti,
3365 iterate_devices_callout_fn fn, void *data)
3366{
3367 int r = 0;
3368 struct cache *cache = ti->private;
3369
3370 r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data);
3371 if (!r)
3372 r = fn(ti, cache->origin_dev, 0, ti->len, data);
3373
3374 return r;
3375}
3376
3377/*
3378 * We assume I/O is going to the origin (which is the volume
3379 * more likely to have restrictions e.g. by being striped).
3380 * (Looking up the exact location of the data would be expensive
3381 * and could always be out of date by the time the bio is submitted.)
3382 */
3383static int cache_bvec_merge(struct dm_target *ti,
3384 struct bvec_merge_data *bvm,
3385 struct bio_vec *biovec, int max_size)
3386{
3387 struct cache *cache = ti->private;
3388 struct request_queue *q = bdev_get_queue(cache->origin_dev->bdev);
3389
3390 if (!q->merge_bvec_fn)
3391 return max_size;
3392
3393 bvm->bi_bdev = cache->origin_dev->bdev;
3394 return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
3395}
3396
3397static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
3398{
3399 /*
3400 * FIXME: these limits may be incompatible with the cache device
3401 */
Joe Thornber7ae34e72014-11-06 10:18:04 +00003402 limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
3403 cache->origin_sectors);
Joe Thornber1bad9bc2014-11-07 14:47:07 +00003404 limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003405}
3406
3407static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
3408{
3409 struct cache *cache = ti->private;
Mike Snitzerf6109372013-08-20 15:02:41 -04003410 uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003411
Mike Snitzerf6109372013-08-20 15:02:41 -04003412 /*
3413 * If the system-determined stacked limits are compatible with the
3414 * cache's blocksize (io_opt is a factor) do not override them.
3415 */
3416 if (io_opt_sectors < cache->sectors_per_block ||
3417 do_div(io_opt_sectors, cache->sectors_per_block)) {
Mike Snitzerb0246532014-07-19 13:25:46 -04003418 blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
Mike Snitzerf6109372013-08-20 15:02:41 -04003419 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
3420 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003421 set_discard_limits(cache, limits);
3422}
3423
3424/*----------------------------------------------------------------*/
3425
3426static struct target_type cache_target = {
3427 .name = "cache",
Joe Thornber7ae34e72014-11-06 10:18:04 +00003428 .version = {1, 6, 0},
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003429 .module = THIS_MODULE,
3430 .ctr = cache_ctr,
3431 .dtr = cache_dtr,
3432 .map = cache_map,
3433 .end_io = cache_end_io,
3434 .postsuspend = cache_postsuspend,
3435 .preresume = cache_preresume,
3436 .resume = cache_resume,
3437 .status = cache_status,
3438 .message = cache_message,
3439 .iterate_devices = cache_iterate_devices,
3440 .merge = cache_bvec_merge,
3441 .io_hints = cache_io_hints,
3442};
3443
3444static int __init dm_cache_init(void)
3445{
3446 int r;
3447
3448 r = dm_register_target(&cache_target);
3449 if (r) {
3450 DMERR("cache target registration failed: %d", r);
3451 return r;
3452 }
3453
3454 migration_cache = KMEM_CACHE(dm_cache_migration, 0);
3455 if (!migration_cache) {
3456 dm_unregister_target(&cache_target);
3457 return -ENOMEM;
3458 }
3459
3460 return 0;
3461}
3462
3463static void __exit dm_cache_exit(void)
3464{
3465 dm_unregister_target(&cache_target);
3466 kmem_cache_destroy(migration_cache);
3467}
3468
3469module_init(dm_cache_init);
3470module_exit(dm_cache_exit);
3471
3472MODULE_DESCRIPTION(DM_NAME " cache target");
3473MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>");
3474MODULE_LICENSE("GPL");