blob: db0944465127d7d8eea09185e004d1beaa375445 [file] [log] [blame]
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001/*
2 * Copyright (C) 2012 Red Hat. All rights reserved.
3 *
4 * This file is released under the GPL.
5 */
6
7#include "dm.h"
8#include "dm-bio-prison.h"
Darrick J. Wongb844fe62013-04-05 15:36:32 +01009#include "dm-bio-record.h"
Joe Thornberc6b4fcb2013-03-01 22:45:51 +000010#include "dm-cache-metadata.h"
11
12#include <linux/dm-io.h>
13#include <linux/dm-kcopyd.h>
14#include <linux/init.h>
15#include <linux/mempool.h>
16#include <linux/module.h>
17#include <linux/slab.h>
18#include <linux/vmalloc.h>
19
20#define DM_MSG_PREFIX "cache"
21
22DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle,
23 "A percentage of time allocated for copying to and/or from cache");
24
25/*----------------------------------------------------------------*/
26
27/*
28 * Glossary:
29 *
30 * oblock: index of an origin block
31 * cblock: index of a cache block
32 * promotion: movement of a block from origin to cache
33 * demotion: movement of a block from cache to origin
34 * migration: movement of a block between the origin and cache device,
35 * either direction
36 */
37
38/*----------------------------------------------------------------*/
39
40static size_t bitset_size_in_bytes(unsigned nr_entries)
41{
42 return sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG);
43}
44
45static unsigned long *alloc_bitset(unsigned nr_entries)
46{
47 size_t s = bitset_size_in_bytes(nr_entries);
48 return vzalloc(s);
49}
50
51static void clear_bitset(void *bitset, unsigned nr_entries)
52{
53 size_t s = bitset_size_in_bytes(nr_entries);
54 memset(bitset, 0, s);
55}
56
57static void free_bitset(unsigned long *bits)
58{
59 vfree(bits);
60}
61
62/*----------------------------------------------------------------*/
63
Joe Thornberc9d28d52013-10-31 13:55:48 -040064/*
65 * There are a couple of places where we let a bio run, but want to do some
66 * work before calling its endio function. We do this by temporarily
67 * changing the endio fn.
68 */
69struct dm_hook_info {
70 bio_end_io_t *bi_end_io;
71 void *bi_private;
72};
73
74static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
75 bio_end_io_t *bi_end_io, void *bi_private)
76{
77 h->bi_end_io = bio->bi_end_io;
78 h->bi_private = bio->bi_private;
79
80 bio->bi_end_io = bi_end_io;
81 bio->bi_private = bi_private;
82}
83
84static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
85{
86 bio->bi_end_io = h->bi_end_io;
87 bio->bi_private = h->bi_private;
Mike Snitzer8d307262013-12-03 19:16:04 -070088
89 /*
90 * Must bump bi_remaining to allow bio to complete with
91 * restored bi_end_io.
92 */
93 atomic_inc(&bio->bi_remaining);
Joe Thornberc9d28d52013-10-31 13:55:48 -040094}
95
96/*----------------------------------------------------------------*/
97
Joe Thornberc6b4fcb2013-03-01 22:45:51 +000098#define PRISON_CELLS 1024
99#define MIGRATION_POOL_SIZE 128
100#define COMMIT_PERIOD HZ
101#define MIGRATION_COUNT_WINDOW 10
102
103/*
Mike Snitzer05473042013-08-16 10:54:19 -0400104 * The block size of the device holding cache data must be
105 * between 32KB and 1GB.
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000106 */
107#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT)
Mike Snitzer05473042013-08-16 10:54:19 -0400108#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000109
110/*
111 * FIXME: the cache is read/write for the time being.
112 */
Joe Thornber2ee57d52013-10-24 14:10:29 -0400113enum cache_metadata_mode {
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000114 CM_WRITE, /* metadata may be changed */
115 CM_READ_ONLY, /* metadata may not be changed */
116};
117
Joe Thornber2ee57d52013-10-24 14:10:29 -0400118enum cache_io_mode {
119 /*
120 * Data is written to cached blocks only. These blocks are marked
121 * dirty. If you lose the cache device you will lose data.
122 * Potential performance increase for both reads and writes.
123 */
124 CM_IO_WRITEBACK,
125
126 /*
127 * Data is written to both cache and origin. Blocks are never
128 * dirty. Potential performance benfit for reads only.
129 */
130 CM_IO_WRITETHROUGH,
131
132 /*
133 * A degraded mode useful for various cache coherency situations
134 * (eg, rolling back snapshots). Reads and writes always go to the
135 * origin. If a write goes to a cached oblock, then the cache
136 * block is invalidated.
137 */
138 CM_IO_PASSTHROUGH
139};
140
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000141struct cache_features {
Joe Thornber2ee57d52013-10-24 14:10:29 -0400142 enum cache_metadata_mode mode;
143 enum cache_io_mode io_mode;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000144};
145
146struct cache_stats {
147 atomic_t read_hit;
148 atomic_t read_miss;
149 atomic_t write_hit;
150 atomic_t write_miss;
151 atomic_t demotion;
152 atomic_t promotion;
153 atomic_t copies_avoided;
154 atomic_t cache_cell_clash;
155 atomic_t commit_count;
156 atomic_t discard_count;
157};
158
Joe Thornber65790ff2013-11-08 16:39:50 +0000159/*
160 * Defines a range of cblocks, begin to (end - 1) are in the range. end is
161 * the one-past-the-end value.
162 */
163struct cblock_range {
164 dm_cblock_t begin;
165 dm_cblock_t end;
166};
167
168struct invalidation_request {
169 struct list_head list;
170 struct cblock_range *cblocks;
171
172 atomic_t complete;
173 int err;
174
175 wait_queue_head_t result_wait;
176};
177
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000178struct cache {
179 struct dm_target *ti;
180 struct dm_target_callbacks callbacks;
181
Mike Snitzerc9ec5d72013-08-16 10:54:21 -0400182 struct dm_cache_metadata *cmd;
183
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000184 /*
185 * Metadata is written to this device.
186 */
187 struct dm_dev *metadata_dev;
188
189 /*
190 * The slower of the two data devices. Typically a spindle.
191 */
192 struct dm_dev *origin_dev;
193
194 /*
195 * The faster of the two data devices. Typically an SSD.
196 */
197 struct dm_dev *cache_dev;
198
199 /*
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000200 * Size of the origin device in _complete_ blocks and native sectors.
201 */
202 dm_oblock_t origin_blocks;
203 sector_t origin_sectors;
204
205 /*
206 * Size of the cache device in blocks.
207 */
208 dm_cblock_t cache_size;
209
210 /*
211 * Fields for converting from sectors to blocks.
212 */
213 uint32_t sectors_per_block;
214 int sectors_per_block_shift;
215
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000216 spinlock_t lock;
217 struct bio_list deferred_bios;
218 struct bio_list deferred_flush_bios;
Joe Thornbere2e74d62013-03-20 17:21:27 +0000219 struct bio_list deferred_writethrough_bios;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000220 struct list_head quiesced_migrations;
221 struct list_head completed_migrations;
222 struct list_head need_commit_migrations;
223 sector_t migration_threshold;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000224 wait_queue_head_t migration_wait;
Mike Snitzerc9ec5d72013-08-16 10:54:21 -0400225 atomic_t nr_migrations;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000226
Joe Thornber66cb1912013-10-30 17:11:58 +0000227 wait_queue_head_t quiescing_wait;
Joe Thornber238f8362013-10-30 17:29:30 +0000228 atomic_t quiescing;
Joe Thornber66cb1912013-10-30 17:11:58 +0000229 atomic_t quiescing_ack;
230
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000231 /*
232 * cache_size entries, dirty if set
233 */
234 dm_cblock_t nr_dirty;
235 unsigned long *dirty_bitset;
236
237 /*
238 * origin_blocks entries, discarded if set.
239 */
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000240 dm_dblock_t discard_nr_blocks;
241 unsigned long *discard_bitset;
Mike Snitzerc9ec5d72013-08-16 10:54:21 -0400242 uint32_t discard_block_size; /* a power of 2 times sectors per block */
243
244 /*
245 * Rather than reconstructing the table line for the status we just
246 * save it and regurgitate.
247 */
248 unsigned nr_ctr_args;
249 const char **ctr_args;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000250
251 struct dm_kcopyd_client *copier;
252 struct workqueue_struct *wq;
253 struct work_struct worker;
254
255 struct delayed_work waker;
256 unsigned long last_commit_jiffies;
257
258 struct dm_bio_prison *prison;
259 struct dm_deferred_set *all_io_ds;
260
261 mempool_t *migration_pool;
262 struct dm_cache_migration *next_migration;
263
264 struct dm_cache_policy *policy;
265 unsigned policy_nr_args;
266
267 bool need_tick_bio:1;
268 bool sized:1;
Joe Thornber65790ff2013-11-08 16:39:50 +0000269 bool invalidate:1;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000270 bool commit_requested:1;
271 bool loaded_mappings:1;
272 bool loaded_discards:1;
273
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000274 /*
Mike Snitzerc9ec5d72013-08-16 10:54:21 -0400275 * Cache features such as write-through.
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000276 */
Mike Snitzerc9ec5d72013-08-16 10:54:21 -0400277 struct cache_features features;
278
279 struct cache_stats stats;
Joe Thornber65790ff2013-11-08 16:39:50 +0000280
281 /*
282 * Invalidation fields.
283 */
284 spinlock_t invalidation_lock;
285 struct list_head invalidation_requests;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000286};
287
288struct per_bio_data {
289 bool tick:1;
290 unsigned req_nr:2;
291 struct dm_deferred_entry *all_io_entry;
Mike Snitzerc6eda5e2014-01-31 14:11:54 -0500292 struct dm_hook_info hook_info;
Joe Thornbere2e74d62013-03-20 17:21:27 +0000293
Mike Snitzer19b00922013-04-05 15:36:34 +0100294 /*
295 * writethrough fields. These MUST remain at the end of this
296 * structure and the 'cache' member must be the first as it
Joe Thornberaeed1422013-05-10 14:37:18 +0100297 * is used to determine the offset of the writethrough fields.
Mike Snitzer19b00922013-04-05 15:36:34 +0100298 */
Joe Thornbere2e74d62013-03-20 17:21:27 +0000299 struct cache *cache;
300 dm_cblock_t cblock;
Darrick J. Wongb844fe62013-04-05 15:36:32 +0100301 struct dm_bio_details bio_details;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000302};
303
304struct dm_cache_migration {
305 struct list_head list;
306 struct cache *cache;
307
308 unsigned long start_jiffies;
309 dm_oblock_t old_oblock;
310 dm_oblock_t new_oblock;
311 dm_cblock_t cblock;
312
313 bool err:1;
314 bool writeback:1;
315 bool demote:1;
316 bool promote:1;
Joe Thornberc9d28d52013-10-31 13:55:48 -0400317 bool requeue_holder:1;
Joe Thornber65790ff2013-11-08 16:39:50 +0000318 bool invalidate:1;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000319
320 struct dm_bio_prison_cell *old_ocell;
321 struct dm_bio_prison_cell *new_ocell;
322};
323
324/*
325 * Processing a bio in the worker thread may require these memory
326 * allocations. We prealloc to avoid deadlocks (the same worker thread
327 * frees them back to the mempool).
328 */
329struct prealloc {
330 struct dm_cache_migration *mg;
331 struct dm_bio_prison_cell *cell1;
332 struct dm_bio_prison_cell *cell2;
333};
334
335static void wake_worker(struct cache *cache)
336{
337 queue_work(cache->wq, &cache->worker);
338}
339
340/*----------------------------------------------------------------*/
341
342static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache)
343{
344 /* FIXME: change to use a local slab. */
345 return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT);
346}
347
348static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell)
349{
350 dm_bio_prison_free_cell(cache->prison, cell);
351}
352
353static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
354{
355 if (!p->mg) {
356 p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
357 if (!p->mg)
358 return -ENOMEM;
359 }
360
361 if (!p->cell1) {
362 p->cell1 = alloc_prison_cell(cache);
363 if (!p->cell1)
364 return -ENOMEM;
365 }
366
367 if (!p->cell2) {
368 p->cell2 = alloc_prison_cell(cache);
369 if (!p->cell2)
370 return -ENOMEM;
371 }
372
373 return 0;
374}
375
376static void prealloc_free_structs(struct cache *cache, struct prealloc *p)
377{
378 if (p->cell2)
379 free_prison_cell(cache, p->cell2);
380
381 if (p->cell1)
382 free_prison_cell(cache, p->cell1);
383
384 if (p->mg)
385 mempool_free(p->mg, cache->migration_pool);
386}
387
388static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p)
389{
390 struct dm_cache_migration *mg = p->mg;
391
392 BUG_ON(!mg);
393 p->mg = NULL;
394
395 return mg;
396}
397
398/*
399 * You must have a cell within the prealloc struct to return. If not this
400 * function will BUG() rather than returning NULL.
401 */
402static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p)
403{
404 struct dm_bio_prison_cell *r = NULL;
405
406 if (p->cell1) {
407 r = p->cell1;
408 p->cell1 = NULL;
409
410 } else if (p->cell2) {
411 r = p->cell2;
412 p->cell2 = NULL;
413 } else
414 BUG();
415
416 return r;
417}
418
419/*
420 * You can't have more than two cells in a prealloc struct. BUG() will be
421 * called if you try and overfill.
422 */
423static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell)
424{
425 if (!p->cell2)
426 p->cell2 = cell;
427
428 else if (!p->cell1)
429 p->cell1 = cell;
430
431 else
432 BUG();
433}
434
435/*----------------------------------------------------------------*/
436
437static void build_key(dm_oblock_t oblock, struct dm_cell_key *key)
438{
439 key->virtual = 0;
440 key->dev = 0;
441 key->block = from_oblock(oblock);
442}
443
444/*
445 * The caller hands in a preallocated cell, and a free function for it.
446 * The cell will be freed if there's an error, or if it wasn't used because
447 * a cell with that key already exists.
448 */
449typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell);
450
451static int bio_detain(struct cache *cache, dm_oblock_t oblock,
452 struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
453 cell_free_fn free_fn, void *free_context,
454 struct dm_bio_prison_cell **cell_result)
455{
456 int r;
457 struct dm_cell_key key;
458
459 build_key(oblock, &key);
460 r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result);
461 if (r)
462 free_fn(free_context, cell_prealloc);
463
464 return r;
465}
466
467static int get_cell(struct cache *cache,
468 dm_oblock_t oblock,
469 struct prealloc *structs,
470 struct dm_bio_prison_cell **cell_result)
471{
472 int r;
473 struct dm_cell_key key;
474 struct dm_bio_prison_cell *cell_prealloc;
475
476 cell_prealloc = prealloc_get_cell(structs);
477
478 build_key(oblock, &key);
479 r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result);
480 if (r)
481 prealloc_put_cell(structs, cell_prealloc);
482
483 return r;
484}
485
Joe Thornberaeed1422013-05-10 14:37:18 +0100486/*----------------------------------------------------------------*/
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000487
488static bool is_dirty(struct cache *cache, dm_cblock_t b)
489{
490 return test_bit(from_cblock(b), cache->dirty_bitset);
491}
492
493static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
494{
495 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) {
496 cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) + 1);
497 policy_set_dirty(cache->policy, oblock);
498 }
499}
500
501static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
502{
503 if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) {
504 policy_clear_dirty(cache->policy, oblock);
505 cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) - 1);
506 if (!from_cblock(cache->nr_dirty))
507 dm_table_event(cache->ti->table);
508 }
509}
510
511/*----------------------------------------------------------------*/
Joe Thornberaeed1422013-05-10 14:37:18 +0100512
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000513static bool block_size_is_power_of_two(struct cache *cache)
514{
515 return cache->sectors_per_block_shift >= 0;
516}
517
Mikulas Patocka43aeaa22013-07-10 23:41:17 +0100518/* gcc on ARM generates spurious references to __udivdi3 and __umoddi3 */
519#if defined(CONFIG_ARM) && __GNUC__ == 4 && __GNUC_MINOR__ <= 6
520__always_inline
521#endif
Joe Thornber414dd672013-03-20 17:21:25 +0000522static dm_block_t block_div(dm_block_t b, uint32_t n)
523{
524 do_div(b, n);
525
526 return b;
527}
528
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000529static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
530{
Joe Thornber414dd672013-03-20 17:21:25 +0000531 uint32_t discard_blocks = cache->discard_block_size;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000532 dm_block_t b = from_oblock(oblock);
533
534 if (!block_size_is_power_of_two(cache))
Joe Thornber414dd672013-03-20 17:21:25 +0000535 discard_blocks = discard_blocks / cache->sectors_per_block;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000536 else
537 discard_blocks >>= cache->sectors_per_block_shift;
538
Joe Thornber414dd672013-03-20 17:21:25 +0000539 b = block_div(b, discard_blocks);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000540
541 return to_dblock(b);
542}
543
544static void set_discard(struct cache *cache, dm_dblock_t b)
545{
546 unsigned long flags;
547
548 atomic_inc(&cache->stats.discard_count);
549
550 spin_lock_irqsave(&cache->lock, flags);
551 set_bit(from_dblock(b), cache->discard_bitset);
552 spin_unlock_irqrestore(&cache->lock, flags);
553}
554
555static void clear_discard(struct cache *cache, dm_dblock_t b)
556{
557 unsigned long flags;
558
559 spin_lock_irqsave(&cache->lock, flags);
560 clear_bit(from_dblock(b), cache->discard_bitset);
561 spin_unlock_irqrestore(&cache->lock, flags);
562}
563
564static bool is_discarded(struct cache *cache, dm_dblock_t b)
565{
566 int r;
567 unsigned long flags;
568
569 spin_lock_irqsave(&cache->lock, flags);
570 r = test_bit(from_dblock(b), cache->discard_bitset);
571 spin_unlock_irqrestore(&cache->lock, flags);
572
573 return r;
574}
575
576static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
577{
578 int r;
579 unsigned long flags;
580
581 spin_lock_irqsave(&cache->lock, flags);
582 r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
583 cache->discard_bitset);
584 spin_unlock_irqrestore(&cache->lock, flags);
585
586 return r;
587}
588
589/*----------------------------------------------------------------*/
590
591static void load_stats(struct cache *cache)
592{
593 struct dm_cache_statistics stats;
594
595 dm_cache_metadata_get_stats(cache->cmd, &stats);
596 atomic_set(&cache->stats.read_hit, stats.read_hits);
597 atomic_set(&cache->stats.read_miss, stats.read_misses);
598 atomic_set(&cache->stats.write_hit, stats.write_hits);
599 atomic_set(&cache->stats.write_miss, stats.write_misses);
600}
601
602static void save_stats(struct cache *cache)
603{
604 struct dm_cache_statistics stats;
605
606 stats.read_hits = atomic_read(&cache->stats.read_hit);
607 stats.read_misses = atomic_read(&cache->stats.read_miss);
608 stats.write_hits = atomic_read(&cache->stats.write_hit);
609 stats.write_misses = atomic_read(&cache->stats.write_miss);
610
611 dm_cache_metadata_set_stats(cache->cmd, &stats);
612}
613
614/*----------------------------------------------------------------
615 * Per bio data
616 *--------------------------------------------------------------*/
Mike Snitzer19b00922013-04-05 15:36:34 +0100617
618/*
619 * If using writeback, leave out struct per_bio_data's writethrough fields.
620 */
621#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache))
622#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data))
623
Joe Thornber2ee57d52013-10-24 14:10:29 -0400624static bool writethrough_mode(struct cache_features *f)
625{
626 return f->io_mode == CM_IO_WRITETHROUGH;
627}
628
629static bool writeback_mode(struct cache_features *f)
630{
631 return f->io_mode == CM_IO_WRITEBACK;
632}
633
634static bool passthrough_mode(struct cache_features *f)
635{
636 return f->io_mode == CM_IO_PASSTHROUGH;
637}
638
Mike Snitzer19b00922013-04-05 15:36:34 +0100639static size_t get_per_bio_data_size(struct cache *cache)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000640{
Joe Thornber2ee57d52013-10-24 14:10:29 -0400641 return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
Mike Snitzer19b00922013-04-05 15:36:34 +0100642}
643
644static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size)
645{
646 struct per_bio_data *pb = dm_per_bio_data(bio, data_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000647 BUG_ON(!pb);
648 return pb;
649}
650
Mike Snitzer19b00922013-04-05 15:36:34 +0100651static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000652{
Mike Snitzer19b00922013-04-05 15:36:34 +0100653 struct per_bio_data *pb = get_per_bio_data(bio, data_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000654
655 pb->tick = false;
656 pb->req_nr = dm_bio_get_target_bio_nr(bio);
657 pb->all_io_entry = NULL;
658
659 return pb;
660}
661
662/*----------------------------------------------------------------
663 * Remapping
664 *--------------------------------------------------------------*/
665static void remap_to_origin(struct cache *cache, struct bio *bio)
666{
667 bio->bi_bdev = cache->origin_dev->bdev;
668}
669
670static void remap_to_cache(struct cache *cache, struct bio *bio,
671 dm_cblock_t cblock)
672{
Kent Overstreet4f024f32013-10-11 15:44:27 -0700673 sector_t bi_sector = bio->bi_iter.bi_sector;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000674
675 bio->bi_bdev = cache->cache_dev->bdev;
676 if (!block_size_is_power_of_two(cache))
Kent Overstreet4f024f32013-10-11 15:44:27 -0700677 bio->bi_iter.bi_sector =
678 (from_cblock(cblock) * cache->sectors_per_block) +
679 sector_div(bi_sector, cache->sectors_per_block);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000680 else
Kent Overstreet4f024f32013-10-11 15:44:27 -0700681 bio->bi_iter.bi_sector =
682 (from_cblock(cblock) << cache->sectors_per_block_shift) |
683 (bi_sector & (cache->sectors_per_block - 1));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000684}
685
686static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
687{
688 unsigned long flags;
Mike Snitzer19b00922013-04-05 15:36:34 +0100689 size_t pb_data_size = get_per_bio_data_size(cache);
690 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000691
692 spin_lock_irqsave(&cache->lock, flags);
693 if (cache->need_tick_bio &&
694 !(bio->bi_rw & (REQ_FUA | REQ_FLUSH | REQ_DISCARD))) {
695 pb->tick = true;
696 cache->need_tick_bio = false;
697 }
698 spin_unlock_irqrestore(&cache->lock, flags);
699}
700
701static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
702 dm_oblock_t oblock)
703{
704 check_if_tick_bio_needed(cache, bio);
705 remap_to_origin(cache, bio);
706 if (bio_data_dir(bio) == WRITE)
707 clear_discard(cache, oblock_to_dblock(cache, oblock));
708}
709
710static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
711 dm_oblock_t oblock, dm_cblock_t cblock)
712{
Joe Thornberf8e5f012013-10-21 12:51:45 +0100713 check_if_tick_bio_needed(cache, bio);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000714 remap_to_cache(cache, bio, cblock);
715 if (bio_data_dir(bio) == WRITE) {
716 set_dirty(cache, oblock, cblock);
717 clear_discard(cache, oblock_to_dblock(cache, oblock));
718 }
719}
720
721static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
722{
Kent Overstreet4f024f32013-10-11 15:44:27 -0700723 sector_t block_nr = bio->bi_iter.bi_sector;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000724
725 if (!block_size_is_power_of_two(cache))
726 (void) sector_div(block_nr, cache->sectors_per_block);
727 else
728 block_nr >>= cache->sectors_per_block_shift;
729
730 return to_oblock(block_nr);
731}
732
733static int bio_triggers_commit(struct cache *cache, struct bio *bio)
734{
735 return bio->bi_rw & (REQ_FLUSH | REQ_FUA);
736}
737
738static void issue(struct cache *cache, struct bio *bio)
739{
740 unsigned long flags;
741
742 if (!bio_triggers_commit(cache, bio)) {
743 generic_make_request(bio);
744 return;
745 }
746
747 /*
748 * Batch together any bios that trigger commits and then issue a
749 * single commit for them in do_worker().
750 */
751 spin_lock_irqsave(&cache->lock, flags);
752 cache->commit_requested = true;
753 bio_list_add(&cache->deferred_flush_bios, bio);
754 spin_unlock_irqrestore(&cache->lock, flags);
755}
756
Joe Thornbere2e74d62013-03-20 17:21:27 +0000757static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
758{
759 unsigned long flags;
760
761 spin_lock_irqsave(&cache->lock, flags);
762 bio_list_add(&cache->deferred_writethrough_bios, bio);
763 spin_unlock_irqrestore(&cache->lock, flags);
764
765 wake_worker(cache);
766}
767
768static void writethrough_endio(struct bio *bio, int err)
769{
Mike Snitzer19b00922013-04-05 15:36:34 +0100770 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
Joe Thornberc9d28d52013-10-31 13:55:48 -0400771
772 dm_unhook_bio(&pb->hook_info, bio);
Joe Thornbere2e74d62013-03-20 17:21:27 +0000773
774 if (err) {
775 bio_endio(bio, err);
776 return;
777 }
778
Darrick J. Wongb844fe62013-04-05 15:36:32 +0100779 dm_bio_restore(&pb->bio_details, bio);
Joe Thornbere2e74d62013-03-20 17:21:27 +0000780 remap_to_cache(pb->cache, bio, pb->cblock);
781
782 /*
783 * We can't issue this bio directly, since we're in interrupt
Joe Thornberaeed1422013-05-10 14:37:18 +0100784 * context. So it gets put on a bio list for processing by the
Joe Thornbere2e74d62013-03-20 17:21:27 +0000785 * worker thread.
786 */
787 defer_writethrough_bio(pb->cache, bio);
788}
789
790/*
791 * When running in writethrough mode we need to send writes to clean blocks
792 * to both the cache and origin devices. In future we'd like to clone the
793 * bio and send them in parallel, but for now we're doing them in
794 * series as this is easier.
795 */
796static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
797 dm_oblock_t oblock, dm_cblock_t cblock)
798{
Mike Snitzer19b00922013-04-05 15:36:34 +0100799 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
Joe Thornbere2e74d62013-03-20 17:21:27 +0000800
801 pb->cache = cache;
802 pb->cblock = cblock;
Joe Thornberc9d28d52013-10-31 13:55:48 -0400803 dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL);
Darrick J. Wongb844fe62013-04-05 15:36:32 +0100804 dm_bio_record(&pb->bio_details, bio);
Joe Thornbere2e74d62013-03-20 17:21:27 +0000805
806 remap_to_origin_clear_discard(pb->cache, bio, oblock);
807}
808
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000809/*----------------------------------------------------------------
810 * Migration processing
811 *
812 * Migration covers moving data from the origin device to the cache, or
813 * vice versa.
814 *--------------------------------------------------------------*/
815static void free_migration(struct dm_cache_migration *mg)
816{
817 mempool_free(mg, mg->cache->migration_pool);
818}
819
820static void inc_nr_migrations(struct cache *cache)
821{
822 atomic_inc(&cache->nr_migrations);
823}
824
825static void dec_nr_migrations(struct cache *cache)
826{
827 atomic_dec(&cache->nr_migrations);
828
829 /*
830 * Wake the worker in case we're suspending the target.
831 */
832 wake_up(&cache->migration_wait);
833}
834
835static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
836 bool holder)
837{
838 (holder ? dm_cell_release : dm_cell_release_no_holder)
839 (cache->prison, cell, &cache->deferred_bios);
840 free_prison_cell(cache, cell);
841}
842
843static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
844 bool holder)
845{
846 unsigned long flags;
847
848 spin_lock_irqsave(&cache->lock, flags);
849 __cell_defer(cache, cell, holder);
850 spin_unlock_irqrestore(&cache->lock, flags);
851
852 wake_worker(cache);
853}
854
855static void cleanup_migration(struct dm_cache_migration *mg)
856{
Joe Thornber66cb1912013-10-30 17:11:58 +0000857 struct cache *cache = mg->cache;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000858 free_migration(mg);
Joe Thornber66cb1912013-10-30 17:11:58 +0000859 dec_nr_migrations(cache);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000860}
861
862static void migration_failure(struct dm_cache_migration *mg)
863{
864 struct cache *cache = mg->cache;
865
866 if (mg->writeback) {
867 DMWARN_LIMIT("writeback failed; couldn't copy block");
868 set_dirty(cache, mg->old_oblock, mg->cblock);
869 cell_defer(cache, mg->old_ocell, false);
870
871 } else if (mg->demote) {
872 DMWARN_LIMIT("demotion failed; couldn't copy block");
873 policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock);
874
Heinz Mauelshagen80f659f2013-10-14 17:10:47 +0200875 cell_defer(cache, mg->old_ocell, mg->promote ? false : true);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000876 if (mg->promote)
Heinz Mauelshagen80f659f2013-10-14 17:10:47 +0200877 cell_defer(cache, mg->new_ocell, true);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000878 } else {
879 DMWARN_LIMIT("promotion failed; couldn't copy block");
880 policy_remove_mapping(cache->policy, mg->new_oblock);
Heinz Mauelshagen80f659f2013-10-14 17:10:47 +0200881 cell_defer(cache, mg->new_ocell, true);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000882 }
883
884 cleanup_migration(mg);
885}
886
887static void migration_success_pre_commit(struct dm_cache_migration *mg)
888{
889 unsigned long flags;
890 struct cache *cache = mg->cache;
891
892 if (mg->writeback) {
893 cell_defer(cache, mg->old_ocell, false);
894 clear_dirty(cache, mg->old_oblock, mg->cblock);
895 cleanup_migration(mg);
896 return;
897
898 } else if (mg->demote) {
899 if (dm_cache_remove_mapping(cache->cmd, mg->cblock)) {
900 DMWARN_LIMIT("demotion failed; couldn't update on disk metadata");
901 policy_force_mapping(cache->policy, mg->new_oblock,
902 mg->old_oblock);
903 if (mg->promote)
904 cell_defer(cache, mg->new_ocell, true);
905 cleanup_migration(mg);
906 return;
907 }
908 } else {
909 if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) {
910 DMWARN_LIMIT("promotion failed; couldn't update on disk metadata");
911 policy_remove_mapping(cache->policy, mg->new_oblock);
912 cleanup_migration(mg);
913 return;
914 }
915 }
916
917 spin_lock_irqsave(&cache->lock, flags);
918 list_add_tail(&mg->list, &cache->need_commit_migrations);
919 cache->commit_requested = true;
920 spin_unlock_irqrestore(&cache->lock, flags);
921}
922
923static void migration_success_post_commit(struct dm_cache_migration *mg)
924{
925 unsigned long flags;
926 struct cache *cache = mg->cache;
927
928 if (mg->writeback) {
929 DMWARN("writeback unexpectedly triggered commit");
930 return;
931
932 } else if (mg->demote) {
Heinz Mauelshagen80f659f2013-10-14 17:10:47 +0200933 cell_defer(cache, mg->old_ocell, mg->promote ? false : true);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000934
935 if (mg->promote) {
936 mg->demote = false;
937
938 spin_lock_irqsave(&cache->lock, flags);
939 list_add_tail(&mg->list, &cache->quiesced_migrations);
940 spin_unlock_irqrestore(&cache->lock, flags);
941
Joe Thornber65790ff2013-11-08 16:39:50 +0000942 } else {
943 if (mg->invalidate)
944 policy_remove_mapping(cache->policy, mg->old_oblock);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000945 cleanup_migration(mg);
Joe Thornber65790ff2013-11-08 16:39:50 +0000946 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000947
948 } else {
Joe Thornberc9d28d52013-10-31 13:55:48 -0400949 if (mg->requeue_holder)
950 cell_defer(cache, mg->new_ocell, true);
951 else {
952 bio_endio(mg->new_ocell->holder, 0);
953 cell_defer(cache, mg->new_ocell, false);
954 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000955 clear_dirty(cache, mg->new_oblock, mg->cblock);
956 cleanup_migration(mg);
957 }
958}
959
960static void copy_complete(int read_err, unsigned long write_err, void *context)
961{
962 unsigned long flags;
963 struct dm_cache_migration *mg = (struct dm_cache_migration *) context;
964 struct cache *cache = mg->cache;
965
966 if (read_err || write_err)
967 mg->err = true;
968
969 spin_lock_irqsave(&cache->lock, flags);
970 list_add_tail(&mg->list, &cache->completed_migrations);
971 spin_unlock_irqrestore(&cache->lock, flags);
972
973 wake_worker(cache);
974}
975
976static void issue_copy_real(struct dm_cache_migration *mg)
977{
978 int r;
979 struct dm_io_region o_region, c_region;
980 struct cache *cache = mg->cache;
981
982 o_region.bdev = cache->origin_dev->bdev;
983 o_region.count = cache->sectors_per_block;
984
985 c_region.bdev = cache->cache_dev->bdev;
986 c_region.sector = from_cblock(mg->cblock) * cache->sectors_per_block;
987 c_region.count = cache->sectors_per_block;
988
989 if (mg->writeback || mg->demote) {
990 /* demote */
991 o_region.sector = from_oblock(mg->old_oblock) * cache->sectors_per_block;
992 r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, mg);
993 } else {
994 /* promote */
995 o_region.sector = from_oblock(mg->new_oblock) * cache->sectors_per_block;
996 r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg);
997 }
998
Heinz Mauelshagen2c2263c2013-10-14 17:14:45 +0200999 if (r < 0) {
1000 DMERR_LIMIT("issuing migration failed");
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001001 migration_failure(mg);
Heinz Mauelshagen2c2263c2013-10-14 17:14:45 +02001002 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001003}
1004
Joe Thornberc9d28d52013-10-31 13:55:48 -04001005static void overwrite_endio(struct bio *bio, int err)
1006{
1007 struct dm_cache_migration *mg = bio->bi_private;
1008 struct cache *cache = mg->cache;
1009 size_t pb_data_size = get_per_bio_data_size(cache);
1010 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1011 unsigned long flags;
1012
Mike Snitzer80ae49a2014-01-31 14:30:37 -05001013 dm_unhook_bio(&pb->hook_info, bio);
1014
Joe Thornberc9d28d52013-10-31 13:55:48 -04001015 if (err)
1016 mg->err = true;
1017
Mike Snitzer80ae49a2014-01-31 14:30:37 -05001018 mg->requeue_holder = false;
1019
Joe Thornberc9d28d52013-10-31 13:55:48 -04001020 spin_lock_irqsave(&cache->lock, flags);
1021 list_add_tail(&mg->list, &cache->completed_migrations);
Joe Thornberc9d28d52013-10-31 13:55:48 -04001022 spin_unlock_irqrestore(&cache->lock, flags);
1023
1024 wake_worker(cache);
1025}
1026
1027static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio)
1028{
1029 size_t pb_data_size = get_per_bio_data_size(mg->cache);
1030 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1031
1032 dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
1033 remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock);
1034 generic_make_request(bio);
1035}
1036
1037static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
1038{
1039 return (bio_data_dir(bio) == WRITE) &&
Kent Overstreet4f024f32013-10-11 15:44:27 -07001040 (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
Joe Thornberc9d28d52013-10-31 13:55:48 -04001041}
1042
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001043static void avoid_copy(struct dm_cache_migration *mg)
1044{
1045 atomic_inc(&mg->cache->stats.copies_avoided);
1046 migration_success_pre_commit(mg);
1047}
1048
1049static void issue_copy(struct dm_cache_migration *mg)
1050{
1051 bool avoid;
1052 struct cache *cache = mg->cache;
1053
1054 if (mg->writeback || mg->demote)
1055 avoid = !is_dirty(cache, mg->cblock) ||
1056 is_discarded_oblock(cache, mg->old_oblock);
Joe Thornberc9d28d52013-10-31 13:55:48 -04001057 else {
1058 struct bio *bio = mg->new_ocell->holder;
1059
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001060 avoid = is_discarded_oblock(cache, mg->new_oblock);
1061
Joe Thornberc9d28d52013-10-31 13:55:48 -04001062 if (!avoid && bio_writes_complete_block(cache, bio)) {
1063 issue_overwrite(mg, bio);
1064 return;
1065 }
1066 }
1067
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001068 avoid ? avoid_copy(mg) : issue_copy_real(mg);
1069}
1070
1071static void complete_migration(struct dm_cache_migration *mg)
1072{
1073 if (mg->err)
1074 migration_failure(mg);
1075 else
1076 migration_success_pre_commit(mg);
1077}
1078
1079static void process_migrations(struct cache *cache, struct list_head *head,
1080 void (*fn)(struct dm_cache_migration *))
1081{
1082 unsigned long flags;
1083 struct list_head list;
1084 struct dm_cache_migration *mg, *tmp;
1085
1086 INIT_LIST_HEAD(&list);
1087 spin_lock_irqsave(&cache->lock, flags);
1088 list_splice_init(head, &list);
1089 spin_unlock_irqrestore(&cache->lock, flags);
1090
1091 list_for_each_entry_safe(mg, tmp, &list, list)
1092 fn(mg);
1093}
1094
1095static void __queue_quiesced_migration(struct dm_cache_migration *mg)
1096{
1097 list_add_tail(&mg->list, &mg->cache->quiesced_migrations);
1098}
1099
1100static void queue_quiesced_migration(struct dm_cache_migration *mg)
1101{
1102 unsigned long flags;
1103 struct cache *cache = mg->cache;
1104
1105 spin_lock_irqsave(&cache->lock, flags);
1106 __queue_quiesced_migration(mg);
1107 spin_unlock_irqrestore(&cache->lock, flags);
1108
1109 wake_worker(cache);
1110}
1111
1112static void queue_quiesced_migrations(struct cache *cache, struct list_head *work)
1113{
1114 unsigned long flags;
1115 struct dm_cache_migration *mg, *tmp;
1116
1117 spin_lock_irqsave(&cache->lock, flags);
1118 list_for_each_entry_safe(mg, tmp, work, list)
1119 __queue_quiesced_migration(mg);
1120 spin_unlock_irqrestore(&cache->lock, flags);
1121
1122 wake_worker(cache);
1123}
1124
1125static void check_for_quiesced_migrations(struct cache *cache,
1126 struct per_bio_data *pb)
1127{
1128 struct list_head work;
1129
1130 if (!pb->all_io_entry)
1131 return;
1132
1133 INIT_LIST_HEAD(&work);
1134 if (pb->all_io_entry)
1135 dm_deferred_entry_dec(pb->all_io_entry, &work);
1136
1137 if (!list_empty(&work))
1138 queue_quiesced_migrations(cache, &work);
1139}
1140
1141static void quiesce_migration(struct dm_cache_migration *mg)
1142{
1143 if (!dm_deferred_set_add_work(mg->cache->all_io_ds, &mg->list))
1144 queue_quiesced_migration(mg);
1145}
1146
1147static void promote(struct cache *cache, struct prealloc *structs,
1148 dm_oblock_t oblock, dm_cblock_t cblock,
1149 struct dm_bio_prison_cell *cell)
1150{
1151 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1152
1153 mg->err = false;
1154 mg->writeback = false;
1155 mg->demote = false;
1156 mg->promote = true;
Joe Thornberc9d28d52013-10-31 13:55:48 -04001157 mg->requeue_holder = true;
Joe Thornber65790ff2013-11-08 16:39:50 +00001158 mg->invalidate = false;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001159 mg->cache = cache;
1160 mg->new_oblock = oblock;
1161 mg->cblock = cblock;
1162 mg->old_ocell = NULL;
1163 mg->new_ocell = cell;
1164 mg->start_jiffies = jiffies;
1165
1166 inc_nr_migrations(cache);
1167 quiesce_migration(mg);
1168}
1169
1170static void writeback(struct cache *cache, struct prealloc *structs,
1171 dm_oblock_t oblock, dm_cblock_t cblock,
1172 struct dm_bio_prison_cell *cell)
1173{
1174 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1175
1176 mg->err = false;
1177 mg->writeback = true;
1178 mg->demote = false;
1179 mg->promote = false;
Joe Thornberc9d28d52013-10-31 13:55:48 -04001180 mg->requeue_holder = true;
Joe Thornber65790ff2013-11-08 16:39:50 +00001181 mg->invalidate = false;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001182 mg->cache = cache;
1183 mg->old_oblock = oblock;
1184 mg->cblock = cblock;
1185 mg->old_ocell = cell;
1186 mg->new_ocell = NULL;
1187 mg->start_jiffies = jiffies;
1188
1189 inc_nr_migrations(cache);
1190 quiesce_migration(mg);
1191}
1192
1193static void demote_then_promote(struct cache *cache, struct prealloc *structs,
1194 dm_oblock_t old_oblock, dm_oblock_t new_oblock,
1195 dm_cblock_t cblock,
1196 struct dm_bio_prison_cell *old_ocell,
1197 struct dm_bio_prison_cell *new_ocell)
1198{
1199 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1200
1201 mg->err = false;
1202 mg->writeback = false;
1203 mg->demote = true;
1204 mg->promote = true;
Joe Thornberc9d28d52013-10-31 13:55:48 -04001205 mg->requeue_holder = true;
Joe Thornber65790ff2013-11-08 16:39:50 +00001206 mg->invalidate = false;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001207 mg->cache = cache;
1208 mg->old_oblock = old_oblock;
1209 mg->new_oblock = new_oblock;
1210 mg->cblock = cblock;
1211 mg->old_ocell = old_ocell;
1212 mg->new_ocell = new_ocell;
1213 mg->start_jiffies = jiffies;
1214
1215 inc_nr_migrations(cache);
1216 quiesce_migration(mg);
1217}
1218
Joe Thornber2ee57d52013-10-24 14:10:29 -04001219/*
1220 * Invalidate a cache entry. No writeback occurs; any changes in the cache
1221 * block are thrown away.
1222 */
1223static void invalidate(struct cache *cache, struct prealloc *structs,
1224 dm_oblock_t oblock, dm_cblock_t cblock,
1225 struct dm_bio_prison_cell *cell)
1226{
1227 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1228
1229 mg->err = false;
1230 mg->writeback = false;
1231 mg->demote = true;
1232 mg->promote = false;
1233 mg->requeue_holder = true;
Joe Thornber65790ff2013-11-08 16:39:50 +00001234 mg->invalidate = true;
Joe Thornber2ee57d52013-10-24 14:10:29 -04001235 mg->cache = cache;
1236 mg->old_oblock = oblock;
1237 mg->cblock = cblock;
1238 mg->old_ocell = cell;
1239 mg->new_ocell = NULL;
1240 mg->start_jiffies = jiffies;
1241
1242 inc_nr_migrations(cache);
1243 quiesce_migration(mg);
1244}
1245
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001246/*----------------------------------------------------------------
1247 * bio processing
1248 *--------------------------------------------------------------*/
1249static void defer_bio(struct cache *cache, struct bio *bio)
1250{
1251 unsigned long flags;
1252
1253 spin_lock_irqsave(&cache->lock, flags);
1254 bio_list_add(&cache->deferred_bios, bio);
1255 spin_unlock_irqrestore(&cache->lock, flags);
1256
1257 wake_worker(cache);
1258}
1259
1260static void process_flush_bio(struct cache *cache, struct bio *bio)
1261{
Mike Snitzer19b00922013-04-05 15:36:34 +01001262 size_t pb_data_size = get_per_bio_data_size(cache);
1263 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001264
Kent Overstreet4f024f32013-10-11 15:44:27 -07001265 BUG_ON(bio->bi_iter.bi_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001266 if (!pb->req_nr)
1267 remap_to_origin(cache, bio);
1268 else
1269 remap_to_cache(cache, bio, 0);
1270
1271 issue(cache, bio);
1272}
1273
1274/*
1275 * People generally discard large parts of a device, eg, the whole device
1276 * when formatting. Splitting these large discards up into cache block
1277 * sized ios and then quiescing (always neccessary for discard) takes too
1278 * long.
1279 *
1280 * We keep it simple, and allow any size of discard to come in, and just
1281 * mark off blocks on the discard bitset. No passdown occurs!
1282 *
1283 * To implement passdown we need to change the bio_prison such that a cell
1284 * can have a key that spans many blocks.
1285 */
1286static void process_discard_bio(struct cache *cache, struct bio *bio)
1287{
Kent Overstreet4f024f32013-10-11 15:44:27 -07001288 dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector,
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001289 cache->discard_block_size);
Kent Overstreet4f024f32013-10-11 15:44:27 -07001290 dm_block_t end_block = bio_end_sector(bio);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001291 dm_block_t b;
1292
Joe Thornber414dd672013-03-20 17:21:25 +00001293 end_block = block_div(end_block, cache->discard_block_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001294
1295 for (b = start_block; b < end_block; b++)
1296 set_discard(cache, to_dblock(b));
1297
1298 bio_endio(bio, 0);
1299}
1300
1301static bool spare_migration_bandwidth(struct cache *cache)
1302{
1303 sector_t current_volume = (atomic_read(&cache->nr_migrations) + 1) *
1304 cache->sectors_per_block;
1305 return current_volume < cache->migration_threshold;
1306}
1307
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001308static void inc_hit_counter(struct cache *cache, struct bio *bio)
1309{
1310 atomic_inc(bio_data_dir(bio) == READ ?
1311 &cache->stats.read_hit : &cache->stats.write_hit);
1312}
1313
1314static void inc_miss_counter(struct cache *cache, struct bio *bio)
1315{
1316 atomic_inc(bio_data_dir(bio) == READ ?
1317 &cache->stats.read_miss : &cache->stats.write_miss);
1318}
1319
Joe Thornber2ee57d52013-10-24 14:10:29 -04001320static void issue_cache_bio(struct cache *cache, struct bio *bio,
1321 struct per_bio_data *pb,
1322 dm_oblock_t oblock, dm_cblock_t cblock)
1323{
1324 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
1325 remap_to_cache_dirty(cache, bio, oblock, cblock);
1326 issue(cache, bio);
1327}
1328
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001329static void process_bio(struct cache *cache, struct prealloc *structs,
1330 struct bio *bio)
1331{
1332 int r;
1333 bool release_cell = true;
1334 dm_oblock_t block = get_bio_block(cache, bio);
1335 struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell;
1336 struct policy_result lookup_result;
Mike Snitzer19b00922013-04-05 15:36:34 +01001337 size_t pb_data_size = get_per_bio_data_size(cache);
1338 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001339 bool discarded_block = is_discarded_oblock(cache, block);
Joe Thornber2ee57d52013-10-24 14:10:29 -04001340 bool passthrough = passthrough_mode(&cache->features);
1341 bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001342
1343 /*
1344 * Check to see if that block is currently migrating.
1345 */
1346 cell_prealloc = prealloc_get_cell(structs);
1347 r = bio_detain(cache, block, bio, cell_prealloc,
1348 (cell_free_fn) prealloc_put_cell,
1349 structs, &new_ocell);
1350 if (r > 0)
1351 return;
1352
1353 r = policy_map(cache->policy, block, true, can_migrate, discarded_block,
1354 bio, &lookup_result);
1355
1356 if (r == -EWOULDBLOCK)
1357 /* migration has been denied */
1358 lookup_result.op = POLICY_MISS;
1359
1360 switch (lookup_result.op) {
1361 case POLICY_HIT:
Joe Thornber2ee57d52013-10-24 14:10:29 -04001362 if (passthrough) {
1363 inc_miss_counter(cache, bio);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001364
Joe Thornber2ee57d52013-10-24 14:10:29 -04001365 /*
1366 * Passthrough always maps to the origin,
1367 * invalidating any cache blocks that are written
1368 * to.
1369 */
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001370
Joe Thornber2ee57d52013-10-24 14:10:29 -04001371 if (bio_data_dir(bio) == WRITE) {
1372 atomic_inc(&cache->stats.demotion);
1373 invalidate(cache, structs, block, lookup_result.cblock, new_ocell);
1374 release_cell = false;
1375
1376 } else {
1377 /* FIXME: factor out issue_origin() */
1378 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
1379 remap_to_origin_clear_discard(cache, bio, block);
1380 issue(cache, bio);
1381 }
1382 } else {
1383 inc_hit_counter(cache, bio);
1384
1385 if (bio_data_dir(bio) == WRITE &&
1386 writethrough_mode(&cache->features) &&
1387 !is_dirty(cache, lookup_result.cblock)) {
1388 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
1389 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
1390 issue(cache, bio);
1391 } else
1392 issue_cache_bio(cache, bio, pb, block, lookup_result.cblock);
1393 }
1394
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001395 break;
1396
1397 case POLICY_MISS:
1398 inc_miss_counter(cache, bio);
1399 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
Joe Thornbere2e74d62013-03-20 17:21:27 +00001400 remap_to_origin_clear_discard(cache, bio, block);
1401 issue(cache, bio);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001402 break;
1403
1404 case POLICY_NEW:
1405 atomic_inc(&cache->stats.promotion);
1406 promote(cache, structs, block, lookup_result.cblock, new_ocell);
1407 release_cell = false;
1408 break;
1409
1410 case POLICY_REPLACE:
1411 cell_prealloc = prealloc_get_cell(structs);
1412 r = bio_detain(cache, lookup_result.old_oblock, bio, cell_prealloc,
1413 (cell_free_fn) prealloc_put_cell,
1414 structs, &old_ocell);
1415 if (r > 0) {
1416 /*
1417 * We have to be careful to avoid lock inversion of
1418 * the cells. So we back off, and wait for the
1419 * old_ocell to become free.
1420 */
1421 policy_force_mapping(cache->policy, block,
1422 lookup_result.old_oblock);
1423 atomic_inc(&cache->stats.cache_cell_clash);
1424 break;
1425 }
1426 atomic_inc(&cache->stats.demotion);
1427 atomic_inc(&cache->stats.promotion);
1428
1429 demote_then_promote(cache, structs, lookup_result.old_oblock,
1430 block, lookup_result.cblock,
1431 old_ocell, new_ocell);
1432 release_cell = false;
1433 break;
1434
1435 default:
1436 DMERR_LIMIT("%s: erroring bio, unknown policy op: %u", __func__,
1437 (unsigned) lookup_result.op);
1438 bio_io_error(bio);
1439 }
1440
1441 if (release_cell)
1442 cell_defer(cache, new_ocell, false);
1443}
1444
1445static int need_commit_due_to_time(struct cache *cache)
1446{
1447 return jiffies < cache->last_commit_jiffies ||
1448 jiffies > cache->last_commit_jiffies + COMMIT_PERIOD;
1449}
1450
1451static int commit_if_needed(struct cache *cache)
1452{
Heinz Mauelshagenffcbcb62013-10-14 17:24:43 +02001453 int r = 0;
1454
1455 if ((cache->commit_requested || need_commit_due_to_time(cache)) &&
1456 dm_cache_changed_this_transaction(cache->cmd)) {
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001457 atomic_inc(&cache->stats.commit_count);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001458 cache->commit_requested = false;
Heinz Mauelshagenffcbcb62013-10-14 17:24:43 +02001459 r = dm_cache_commit(cache->cmd, false);
1460 cache->last_commit_jiffies = jiffies;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001461 }
1462
Heinz Mauelshagenffcbcb62013-10-14 17:24:43 +02001463 return r;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001464}
1465
1466static void process_deferred_bios(struct cache *cache)
1467{
1468 unsigned long flags;
1469 struct bio_list bios;
1470 struct bio *bio;
1471 struct prealloc structs;
1472
1473 memset(&structs, 0, sizeof(structs));
1474 bio_list_init(&bios);
1475
1476 spin_lock_irqsave(&cache->lock, flags);
1477 bio_list_merge(&bios, &cache->deferred_bios);
1478 bio_list_init(&cache->deferred_bios);
1479 spin_unlock_irqrestore(&cache->lock, flags);
1480
1481 while (!bio_list_empty(&bios)) {
1482 /*
1483 * If we've got no free migration structs, and processing
1484 * this bio might require one, we pause until there are some
1485 * prepared mappings to process.
1486 */
1487 if (prealloc_data_structs(cache, &structs)) {
1488 spin_lock_irqsave(&cache->lock, flags);
1489 bio_list_merge(&cache->deferred_bios, &bios);
1490 spin_unlock_irqrestore(&cache->lock, flags);
1491 break;
1492 }
1493
1494 bio = bio_list_pop(&bios);
1495
1496 if (bio->bi_rw & REQ_FLUSH)
1497 process_flush_bio(cache, bio);
1498 else if (bio->bi_rw & REQ_DISCARD)
1499 process_discard_bio(cache, bio);
1500 else
1501 process_bio(cache, &structs, bio);
1502 }
1503
1504 prealloc_free_structs(cache, &structs);
1505}
1506
1507static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
1508{
1509 unsigned long flags;
1510 struct bio_list bios;
1511 struct bio *bio;
1512
1513 bio_list_init(&bios);
1514
1515 spin_lock_irqsave(&cache->lock, flags);
1516 bio_list_merge(&bios, &cache->deferred_flush_bios);
1517 bio_list_init(&cache->deferred_flush_bios);
1518 spin_unlock_irqrestore(&cache->lock, flags);
1519
1520 while ((bio = bio_list_pop(&bios)))
1521 submit_bios ? generic_make_request(bio) : bio_io_error(bio);
1522}
1523
Joe Thornbere2e74d62013-03-20 17:21:27 +00001524static void process_deferred_writethrough_bios(struct cache *cache)
1525{
1526 unsigned long flags;
1527 struct bio_list bios;
1528 struct bio *bio;
1529
1530 bio_list_init(&bios);
1531
1532 spin_lock_irqsave(&cache->lock, flags);
1533 bio_list_merge(&bios, &cache->deferred_writethrough_bios);
1534 bio_list_init(&cache->deferred_writethrough_bios);
1535 spin_unlock_irqrestore(&cache->lock, flags);
1536
1537 while ((bio = bio_list_pop(&bios)))
1538 generic_make_request(bio);
1539}
1540
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001541static void writeback_some_dirty_blocks(struct cache *cache)
1542{
1543 int r = 0;
1544 dm_oblock_t oblock;
1545 dm_cblock_t cblock;
1546 struct prealloc structs;
1547 struct dm_bio_prison_cell *old_ocell;
1548
1549 memset(&structs, 0, sizeof(structs));
1550
1551 while (spare_migration_bandwidth(cache)) {
1552 if (prealloc_data_structs(cache, &structs))
1553 break;
1554
1555 r = policy_writeback_work(cache->policy, &oblock, &cblock);
1556 if (r)
1557 break;
1558
1559 r = get_cell(cache, oblock, &structs, &old_ocell);
1560 if (r) {
1561 policy_set_dirty(cache->policy, oblock);
1562 break;
1563 }
1564
1565 writeback(cache, &structs, oblock, cblock, old_ocell);
1566 }
1567
1568 prealloc_free_structs(cache, &structs);
1569}
1570
1571/*----------------------------------------------------------------
Joe Thornber65790ff2013-11-08 16:39:50 +00001572 * Invalidations.
1573 * Dropping something from the cache *without* writing back.
1574 *--------------------------------------------------------------*/
1575
1576static void process_invalidation_request(struct cache *cache, struct invalidation_request *req)
1577{
1578 int r = 0;
1579 uint64_t begin = from_cblock(req->cblocks->begin);
1580 uint64_t end = from_cblock(req->cblocks->end);
1581
1582 while (begin != end) {
1583 r = policy_remove_cblock(cache->policy, to_cblock(begin));
1584 if (!r) {
1585 r = dm_cache_remove_mapping(cache->cmd, to_cblock(begin));
1586 if (r)
1587 break;
1588
1589 } else if (r == -ENODATA) {
1590 /* harmless, already unmapped */
1591 r = 0;
1592
1593 } else {
1594 DMERR("policy_remove_cblock failed");
1595 break;
1596 }
1597
1598 begin++;
1599 }
1600
1601 cache->commit_requested = true;
1602
1603 req->err = r;
1604 atomic_set(&req->complete, 1);
1605
1606 wake_up(&req->result_wait);
1607}
1608
1609static void process_invalidation_requests(struct cache *cache)
1610{
1611 struct list_head list;
1612 struct invalidation_request *req, *tmp;
1613
1614 INIT_LIST_HEAD(&list);
1615 spin_lock(&cache->invalidation_lock);
1616 list_splice_init(&cache->invalidation_requests, &list);
1617 spin_unlock(&cache->invalidation_lock);
1618
1619 list_for_each_entry_safe (req, tmp, &list, list)
1620 process_invalidation_request(cache, req);
1621}
1622
1623/*----------------------------------------------------------------
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001624 * Main worker loop
1625 *--------------------------------------------------------------*/
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001626static bool is_quiescing(struct cache *cache)
1627{
Joe Thornber238f8362013-10-30 17:29:30 +00001628 return atomic_read(&cache->quiescing);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001629}
1630
Joe Thornber66cb1912013-10-30 17:11:58 +00001631static void ack_quiescing(struct cache *cache)
1632{
1633 if (is_quiescing(cache)) {
1634 atomic_inc(&cache->quiescing_ack);
1635 wake_up(&cache->quiescing_wait);
1636 }
1637}
1638
1639static void wait_for_quiescing_ack(struct cache *cache)
1640{
1641 wait_event(cache->quiescing_wait, atomic_read(&cache->quiescing_ack));
1642}
1643
1644static void start_quiescing(struct cache *cache)
1645{
Joe Thornber238f8362013-10-30 17:29:30 +00001646 atomic_inc(&cache->quiescing);
Joe Thornber66cb1912013-10-30 17:11:58 +00001647 wait_for_quiescing_ack(cache);
1648}
1649
1650static void stop_quiescing(struct cache *cache)
1651{
Joe Thornber238f8362013-10-30 17:29:30 +00001652 atomic_set(&cache->quiescing, 0);
Joe Thornber66cb1912013-10-30 17:11:58 +00001653 atomic_set(&cache->quiescing_ack, 0);
1654}
1655
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001656static void wait_for_migrations(struct cache *cache)
1657{
1658 wait_event(cache->migration_wait, !atomic_read(&cache->nr_migrations));
1659}
1660
1661static void stop_worker(struct cache *cache)
1662{
1663 cancel_delayed_work(&cache->waker);
1664 flush_workqueue(cache->wq);
1665}
1666
1667static void requeue_deferred_io(struct cache *cache)
1668{
1669 struct bio *bio;
1670 struct bio_list bios;
1671
1672 bio_list_init(&bios);
1673 bio_list_merge(&bios, &cache->deferred_bios);
1674 bio_list_init(&cache->deferred_bios);
1675
1676 while ((bio = bio_list_pop(&bios)))
1677 bio_endio(bio, DM_ENDIO_REQUEUE);
1678}
1679
1680static int more_work(struct cache *cache)
1681{
1682 if (is_quiescing(cache))
1683 return !list_empty(&cache->quiesced_migrations) ||
1684 !list_empty(&cache->completed_migrations) ||
1685 !list_empty(&cache->need_commit_migrations);
1686 else
1687 return !bio_list_empty(&cache->deferred_bios) ||
1688 !bio_list_empty(&cache->deferred_flush_bios) ||
Joe Thornbere2e74d62013-03-20 17:21:27 +00001689 !bio_list_empty(&cache->deferred_writethrough_bios) ||
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001690 !list_empty(&cache->quiesced_migrations) ||
1691 !list_empty(&cache->completed_migrations) ||
Joe Thornber65790ff2013-11-08 16:39:50 +00001692 !list_empty(&cache->need_commit_migrations) ||
1693 cache->invalidate;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001694}
1695
1696static void do_worker(struct work_struct *ws)
1697{
1698 struct cache *cache = container_of(ws, struct cache, worker);
1699
1700 do {
Joe Thornber66cb1912013-10-30 17:11:58 +00001701 if (!is_quiescing(cache)) {
1702 writeback_some_dirty_blocks(cache);
1703 process_deferred_writethrough_bios(cache);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001704 process_deferred_bios(cache);
Joe Thornber65790ff2013-11-08 16:39:50 +00001705 process_invalidation_requests(cache);
Joe Thornber66cb1912013-10-30 17:11:58 +00001706 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001707
1708 process_migrations(cache, &cache->quiesced_migrations, issue_copy);
1709 process_migrations(cache, &cache->completed_migrations, complete_migration);
1710
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001711 if (commit_if_needed(cache)) {
1712 process_deferred_flush_bios(cache, false);
1713
1714 /*
1715 * FIXME: rollback metadata or just go into a
1716 * failure mode and error everything
1717 */
1718 } else {
1719 process_deferred_flush_bios(cache, true);
1720 process_migrations(cache, &cache->need_commit_migrations,
1721 migration_success_post_commit);
1722 }
Joe Thornber66cb1912013-10-30 17:11:58 +00001723
1724 ack_quiescing(cache);
1725
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001726 } while (more_work(cache));
1727}
1728
1729/*
1730 * We want to commit periodically so that not too much
1731 * unwritten metadata builds up.
1732 */
1733static void do_waker(struct work_struct *ws)
1734{
1735 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker);
Joe Thornberf8350da2013-05-10 14:37:16 +01001736 policy_tick(cache->policy);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001737 wake_worker(cache);
1738 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD);
1739}
1740
1741/*----------------------------------------------------------------*/
1742
1743static int is_congested(struct dm_dev *dev, int bdi_bits)
1744{
1745 struct request_queue *q = bdev_get_queue(dev->bdev);
1746 return bdi_congested(&q->backing_dev_info, bdi_bits);
1747}
1748
1749static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
1750{
1751 struct cache *cache = container_of(cb, struct cache, callbacks);
1752
1753 return is_congested(cache->origin_dev, bdi_bits) ||
1754 is_congested(cache->cache_dev, bdi_bits);
1755}
1756
1757/*----------------------------------------------------------------
1758 * Target methods
1759 *--------------------------------------------------------------*/
1760
1761/*
1762 * This function gets called on the error paths of the constructor, so we
1763 * have to cope with a partially initialised struct.
1764 */
1765static void destroy(struct cache *cache)
1766{
1767 unsigned i;
1768
1769 if (cache->next_migration)
1770 mempool_free(cache->next_migration, cache->migration_pool);
1771
1772 if (cache->migration_pool)
1773 mempool_destroy(cache->migration_pool);
1774
1775 if (cache->all_io_ds)
1776 dm_deferred_set_destroy(cache->all_io_ds);
1777
1778 if (cache->prison)
1779 dm_bio_prison_destroy(cache->prison);
1780
1781 if (cache->wq)
1782 destroy_workqueue(cache->wq);
1783
1784 if (cache->dirty_bitset)
1785 free_bitset(cache->dirty_bitset);
1786
1787 if (cache->discard_bitset)
1788 free_bitset(cache->discard_bitset);
1789
1790 if (cache->copier)
1791 dm_kcopyd_client_destroy(cache->copier);
1792
1793 if (cache->cmd)
1794 dm_cache_metadata_close(cache->cmd);
1795
1796 if (cache->metadata_dev)
1797 dm_put_device(cache->ti, cache->metadata_dev);
1798
1799 if (cache->origin_dev)
1800 dm_put_device(cache->ti, cache->origin_dev);
1801
1802 if (cache->cache_dev)
1803 dm_put_device(cache->ti, cache->cache_dev);
1804
1805 if (cache->policy)
1806 dm_cache_policy_destroy(cache->policy);
1807
1808 for (i = 0; i < cache->nr_ctr_args ; i++)
1809 kfree(cache->ctr_args[i]);
1810 kfree(cache->ctr_args);
1811
1812 kfree(cache);
1813}
1814
1815static void cache_dtr(struct dm_target *ti)
1816{
1817 struct cache *cache = ti->private;
1818
1819 destroy(cache);
1820}
1821
1822static sector_t get_dev_size(struct dm_dev *dev)
1823{
1824 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
1825}
1826
1827/*----------------------------------------------------------------*/
1828
1829/*
1830 * Construct a cache device mapping.
1831 *
1832 * cache <metadata dev> <cache dev> <origin dev> <block size>
1833 * <#feature args> [<feature arg>]*
1834 * <policy> <#policy args> [<policy arg>]*
1835 *
1836 * metadata dev : fast device holding the persistent metadata
1837 * cache dev : fast device holding cached data blocks
1838 * origin dev : slow device holding original data blocks
1839 * block size : cache unit size in sectors
1840 *
1841 * #feature args : number of feature arguments passed
1842 * feature args : writethrough. (The default is writeback.)
1843 *
1844 * policy : the replacement policy to use
1845 * #policy args : an even number of policy arguments corresponding
1846 * to key/value pairs passed to the policy
1847 * policy args : key/value pairs passed to the policy
1848 * E.g. 'sequential_threshold 1024'
1849 * See cache-policies.txt for details.
1850 *
1851 * Optional feature arguments are:
1852 * writethrough : write through caching that prohibits cache block
1853 * content from being different from origin block content.
1854 * Without this argument, the default behaviour is to write
1855 * back cache block contents later for performance reasons,
1856 * so they may differ from the corresponding origin blocks.
1857 */
1858struct cache_args {
1859 struct dm_target *ti;
1860
1861 struct dm_dev *metadata_dev;
1862
1863 struct dm_dev *cache_dev;
1864 sector_t cache_sectors;
1865
1866 struct dm_dev *origin_dev;
1867 sector_t origin_sectors;
1868
1869 uint32_t block_size;
1870
1871 const char *policy_name;
1872 int policy_argc;
1873 const char **policy_argv;
1874
1875 struct cache_features features;
1876};
1877
1878static void destroy_cache_args(struct cache_args *ca)
1879{
1880 if (ca->metadata_dev)
1881 dm_put_device(ca->ti, ca->metadata_dev);
1882
1883 if (ca->cache_dev)
1884 dm_put_device(ca->ti, ca->cache_dev);
1885
1886 if (ca->origin_dev)
1887 dm_put_device(ca->ti, ca->origin_dev);
1888
1889 kfree(ca);
1890}
1891
1892static bool at_least_one_arg(struct dm_arg_set *as, char **error)
1893{
1894 if (!as->argc) {
1895 *error = "Insufficient args";
1896 return false;
1897 }
1898
1899 return true;
1900}
1901
1902static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,
1903 char **error)
1904{
1905 int r;
1906 sector_t metadata_dev_size;
1907 char b[BDEVNAME_SIZE];
1908
1909 if (!at_least_one_arg(as, error))
1910 return -EINVAL;
1911
1912 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
1913 &ca->metadata_dev);
1914 if (r) {
1915 *error = "Error opening metadata device";
1916 return r;
1917 }
1918
1919 metadata_dev_size = get_dev_size(ca->metadata_dev);
1920 if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING)
1921 DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
1922 bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS);
1923
1924 return 0;
1925}
1926
1927static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as,
1928 char **error)
1929{
1930 int r;
1931
1932 if (!at_least_one_arg(as, error))
1933 return -EINVAL;
1934
1935 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
1936 &ca->cache_dev);
1937 if (r) {
1938 *error = "Error opening cache device";
1939 return r;
1940 }
1941 ca->cache_sectors = get_dev_size(ca->cache_dev);
1942
1943 return 0;
1944}
1945
1946static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
1947 char **error)
1948{
1949 int r;
1950
1951 if (!at_least_one_arg(as, error))
1952 return -EINVAL;
1953
1954 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
1955 &ca->origin_dev);
1956 if (r) {
1957 *error = "Error opening origin device";
1958 return r;
1959 }
1960
1961 ca->origin_sectors = get_dev_size(ca->origin_dev);
1962 if (ca->ti->len > ca->origin_sectors) {
1963 *error = "Device size larger than cached device";
1964 return -EINVAL;
1965 }
1966
1967 return 0;
1968}
1969
1970static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as,
1971 char **error)
1972{
Mike Snitzer05473042013-08-16 10:54:19 -04001973 unsigned long block_size;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001974
1975 if (!at_least_one_arg(as, error))
1976 return -EINVAL;
1977
Mike Snitzer05473042013-08-16 10:54:19 -04001978 if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size ||
1979 block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
1980 block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
1981 block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001982 *error = "Invalid data block size";
1983 return -EINVAL;
1984 }
1985
Mike Snitzer05473042013-08-16 10:54:19 -04001986 if (block_size > ca->cache_sectors) {
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001987 *error = "Data block size is larger than the cache device";
1988 return -EINVAL;
1989 }
1990
Mike Snitzer05473042013-08-16 10:54:19 -04001991 ca->block_size = block_size;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001992
1993 return 0;
1994}
1995
1996static void init_features(struct cache_features *cf)
1997{
1998 cf->mode = CM_WRITE;
Joe Thornber2ee57d52013-10-24 14:10:29 -04001999 cf->io_mode = CM_IO_WRITEBACK;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002000}
2001
2002static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
2003 char **error)
2004{
2005 static struct dm_arg _args[] = {
2006 {0, 1, "Invalid number of cache feature arguments"},
2007 };
2008
2009 int r;
2010 unsigned argc;
2011 const char *arg;
2012 struct cache_features *cf = &ca->features;
2013
2014 init_features(cf);
2015
2016 r = dm_read_arg_group(_args, as, &argc, error);
2017 if (r)
2018 return -EINVAL;
2019
2020 while (argc--) {
2021 arg = dm_shift_arg(as);
2022
2023 if (!strcasecmp(arg, "writeback"))
Joe Thornber2ee57d52013-10-24 14:10:29 -04002024 cf->io_mode = CM_IO_WRITEBACK;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002025
2026 else if (!strcasecmp(arg, "writethrough"))
Joe Thornber2ee57d52013-10-24 14:10:29 -04002027 cf->io_mode = CM_IO_WRITETHROUGH;
2028
2029 else if (!strcasecmp(arg, "passthrough"))
2030 cf->io_mode = CM_IO_PASSTHROUGH;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002031
2032 else {
2033 *error = "Unrecognised cache feature requested";
2034 return -EINVAL;
2035 }
2036 }
2037
2038 return 0;
2039}
2040
2041static int parse_policy(struct cache_args *ca, struct dm_arg_set *as,
2042 char **error)
2043{
2044 static struct dm_arg _args[] = {
2045 {0, 1024, "Invalid number of policy arguments"},
2046 };
2047
2048 int r;
2049
2050 if (!at_least_one_arg(as, error))
2051 return -EINVAL;
2052
2053 ca->policy_name = dm_shift_arg(as);
2054
2055 r = dm_read_arg_group(_args, as, &ca->policy_argc, error);
2056 if (r)
2057 return -EINVAL;
2058
2059 ca->policy_argv = (const char **)as->argv;
2060 dm_consume_args(as, ca->policy_argc);
2061
2062 return 0;
2063}
2064
2065static int parse_cache_args(struct cache_args *ca, int argc, char **argv,
2066 char **error)
2067{
2068 int r;
2069 struct dm_arg_set as;
2070
2071 as.argc = argc;
2072 as.argv = argv;
2073
2074 r = parse_metadata_dev(ca, &as, error);
2075 if (r)
2076 return r;
2077
2078 r = parse_cache_dev(ca, &as, error);
2079 if (r)
2080 return r;
2081
2082 r = parse_origin_dev(ca, &as, error);
2083 if (r)
2084 return r;
2085
2086 r = parse_block_size(ca, &as, error);
2087 if (r)
2088 return r;
2089
2090 r = parse_features(ca, &as, error);
2091 if (r)
2092 return r;
2093
2094 r = parse_policy(ca, &as, error);
2095 if (r)
2096 return r;
2097
2098 return 0;
2099}
2100
2101/*----------------------------------------------------------------*/
2102
2103static struct kmem_cache *migration_cache;
2104
Alasdair G Kergon2c73c472013-05-10 14:37:21 +01002105#define NOT_CORE_OPTION 1
2106
Joe Thornber2f14f4b2013-05-10 14:37:21 +01002107static int process_config_option(struct cache *cache, const char *key, const char *value)
Alasdair G Kergon2c73c472013-05-10 14:37:21 +01002108{
2109 unsigned long tmp;
2110
Joe Thornber2f14f4b2013-05-10 14:37:21 +01002111 if (!strcasecmp(key, "migration_threshold")) {
2112 if (kstrtoul(value, 10, &tmp))
Alasdair G Kergon2c73c472013-05-10 14:37:21 +01002113 return -EINVAL;
2114
2115 cache->migration_threshold = tmp;
2116 return 0;
2117 }
2118
2119 return NOT_CORE_OPTION;
2120}
2121
Joe Thornber2f14f4b2013-05-10 14:37:21 +01002122static int set_config_value(struct cache *cache, const char *key, const char *value)
2123{
2124 int r = process_config_option(cache, key, value);
2125
2126 if (r == NOT_CORE_OPTION)
2127 r = policy_set_config_value(cache->policy, key, value);
2128
2129 if (r)
2130 DMWARN("bad config value for %s: %s", key, value);
2131
2132 return r;
2133}
2134
2135static int set_config_values(struct cache *cache, int argc, const char **argv)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002136{
2137 int r = 0;
2138
2139 if (argc & 1) {
2140 DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs.");
2141 return -EINVAL;
2142 }
2143
2144 while (argc) {
Joe Thornber2f14f4b2013-05-10 14:37:21 +01002145 r = set_config_value(cache, argv[0], argv[1]);
2146 if (r)
2147 break;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002148
2149 argc -= 2;
2150 argv += 2;
2151 }
2152
2153 return r;
2154}
2155
2156static int create_cache_policy(struct cache *cache, struct cache_args *ca,
2157 char **error)
2158{
Mikulas Patocka4cb3e1d2013-10-01 18:35:39 -04002159 struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name,
2160 cache->cache_size,
2161 cache->origin_sectors,
2162 cache->sectors_per_block);
2163 if (IS_ERR(p)) {
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002164 *error = "Error creating cache's policy";
Mikulas Patocka4cb3e1d2013-10-01 18:35:39 -04002165 return PTR_ERR(p);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002166 }
Mikulas Patocka4cb3e1d2013-10-01 18:35:39 -04002167 cache->policy = p;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002168
Joe Thornber2f14f4b2013-05-10 14:37:21 +01002169 return 0;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002170}
2171
2172/*
2173 * We want the discard block size to be a power of two, at least the size
2174 * of the cache block size, and have no more than 2^14 discard blocks
2175 * across the origin.
2176 */
2177#define MAX_DISCARD_BLOCKS (1 << 14)
2178
2179static bool too_many_discard_blocks(sector_t discard_block_size,
2180 sector_t origin_size)
2181{
2182 (void) sector_div(origin_size, discard_block_size);
2183
2184 return origin_size > MAX_DISCARD_BLOCKS;
2185}
2186
2187static sector_t calculate_discard_block_size(sector_t cache_block_size,
2188 sector_t origin_size)
2189{
2190 sector_t discard_block_size;
2191
2192 discard_block_size = roundup_pow_of_two(cache_block_size);
2193
2194 if (origin_size)
2195 while (too_many_discard_blocks(discard_block_size, origin_size))
2196 discard_block_size *= 2;
2197
2198 return discard_block_size;
2199}
2200
Joe Thornberf8350da2013-05-10 14:37:16 +01002201#define DEFAULT_MIGRATION_THRESHOLD 2048
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002202
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002203static int cache_create(struct cache_args *ca, struct cache **result)
2204{
2205 int r = 0;
2206 char **error = &ca->ti->error;
2207 struct cache *cache;
2208 struct dm_target *ti = ca->ti;
2209 dm_block_t origin_blocks;
2210 struct dm_cache_metadata *cmd;
2211 bool may_format = ca->features.mode == CM_WRITE;
2212
2213 cache = kzalloc(sizeof(*cache), GFP_KERNEL);
2214 if (!cache)
2215 return -ENOMEM;
2216
2217 cache->ti = ca->ti;
2218 ti->private = cache;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002219 ti->num_flush_bios = 2;
2220 ti->flush_supported = true;
2221
2222 ti->num_discard_bios = 1;
2223 ti->discards_supported = true;
2224 ti->discard_zeroes_data_unsupported = true;
2225
Joe Thornber8c5008f2013-05-10 14:37:18 +01002226 cache->features = ca->features;
Mike Snitzer19b00922013-04-05 15:36:34 +01002227 ti->per_bio_data_size = get_per_bio_data_size(cache);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002228
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002229 cache->callbacks.congested_fn = cache_is_congested;
2230 dm_table_add_target_callbacks(ti->table, &cache->callbacks);
2231
2232 cache->metadata_dev = ca->metadata_dev;
2233 cache->origin_dev = ca->origin_dev;
2234 cache->cache_dev = ca->cache_dev;
2235
2236 ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL;
2237
2238 /* FIXME: factor out this whole section */
2239 origin_blocks = cache->origin_sectors = ca->origin_sectors;
Joe Thornber414dd672013-03-20 17:21:25 +00002240 origin_blocks = block_div(origin_blocks, ca->block_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002241 cache->origin_blocks = to_oblock(origin_blocks);
2242
2243 cache->sectors_per_block = ca->block_size;
2244 if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) {
2245 r = -EINVAL;
2246 goto bad;
2247 }
2248
2249 if (ca->block_size & (ca->block_size - 1)) {
2250 dm_block_t cache_size = ca->cache_sectors;
2251
2252 cache->sectors_per_block_shift = -1;
Joe Thornber414dd672013-03-20 17:21:25 +00002253 cache_size = block_div(cache_size, ca->block_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002254 cache->cache_size = to_cblock(cache_size);
2255 } else {
2256 cache->sectors_per_block_shift = __ffs(ca->block_size);
2257 cache->cache_size = to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift);
2258 }
2259
2260 r = create_cache_policy(cache, ca, error);
2261 if (r)
2262 goto bad;
Joe Thornber2f14f4b2013-05-10 14:37:21 +01002263
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002264 cache->policy_nr_args = ca->policy_argc;
Joe Thornber2f14f4b2013-05-10 14:37:21 +01002265 cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD;
2266
2267 r = set_config_values(cache, ca->policy_argc, ca->policy_argv);
2268 if (r) {
2269 *error = "Error setting cache policy's config values";
2270 goto bad;
2271 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002272
2273 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev,
2274 ca->block_size, may_format,
2275 dm_cache_policy_get_hint_size(cache->policy));
2276 if (IS_ERR(cmd)) {
2277 *error = "Error creating metadata object";
2278 r = PTR_ERR(cmd);
2279 goto bad;
2280 }
2281 cache->cmd = cmd;
2282
Joe Thornber2ee57d52013-10-24 14:10:29 -04002283 if (passthrough_mode(&cache->features)) {
2284 bool all_clean;
2285
2286 r = dm_cache_metadata_all_clean(cache->cmd, &all_clean);
2287 if (r) {
2288 *error = "dm_cache_metadata_all_clean() failed";
2289 goto bad;
2290 }
2291
2292 if (!all_clean) {
2293 *error = "Cannot enter passthrough mode unless all blocks are clean";
2294 r = -EINVAL;
2295 goto bad;
2296 }
2297 }
2298
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002299 spin_lock_init(&cache->lock);
2300 bio_list_init(&cache->deferred_bios);
2301 bio_list_init(&cache->deferred_flush_bios);
Joe Thornbere2e74d62013-03-20 17:21:27 +00002302 bio_list_init(&cache->deferred_writethrough_bios);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002303 INIT_LIST_HEAD(&cache->quiesced_migrations);
2304 INIT_LIST_HEAD(&cache->completed_migrations);
2305 INIT_LIST_HEAD(&cache->need_commit_migrations);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002306 atomic_set(&cache->nr_migrations, 0);
2307 init_waitqueue_head(&cache->migration_wait);
2308
Joe Thornber66cb1912013-10-30 17:11:58 +00002309 init_waitqueue_head(&cache->quiescing_wait);
Joe Thornber238f8362013-10-30 17:29:30 +00002310 atomic_set(&cache->quiescing, 0);
Joe Thornber66cb1912013-10-30 17:11:58 +00002311 atomic_set(&cache->quiescing_ack, 0);
2312
Wei Yongjunfa4d6832013-05-10 14:37:14 +01002313 r = -ENOMEM;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002314 cache->nr_dirty = 0;
2315 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size));
2316 if (!cache->dirty_bitset) {
2317 *error = "could not allocate dirty bitset";
2318 goto bad;
2319 }
2320 clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size));
2321
2322 cache->discard_block_size =
2323 calculate_discard_block_size(cache->sectors_per_block,
2324 cache->origin_sectors);
2325 cache->discard_nr_blocks = oblock_to_dblock(cache, cache->origin_blocks);
2326 cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks));
2327 if (!cache->discard_bitset) {
2328 *error = "could not allocate discard bitset";
2329 goto bad;
2330 }
2331 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
2332
2333 cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
2334 if (IS_ERR(cache->copier)) {
2335 *error = "could not create kcopyd client";
2336 r = PTR_ERR(cache->copier);
2337 goto bad;
2338 }
2339
2340 cache->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
2341 if (!cache->wq) {
2342 *error = "could not create workqueue for metadata object";
2343 goto bad;
2344 }
2345 INIT_WORK(&cache->worker, do_worker);
2346 INIT_DELAYED_WORK(&cache->waker, do_waker);
2347 cache->last_commit_jiffies = jiffies;
2348
2349 cache->prison = dm_bio_prison_create(PRISON_CELLS);
2350 if (!cache->prison) {
2351 *error = "could not create bio prison";
2352 goto bad;
2353 }
2354
2355 cache->all_io_ds = dm_deferred_set_create();
2356 if (!cache->all_io_ds) {
2357 *error = "could not create all_io deferred set";
2358 goto bad;
2359 }
2360
2361 cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE,
2362 migration_cache);
2363 if (!cache->migration_pool) {
2364 *error = "Error creating cache's migration mempool";
2365 goto bad;
2366 }
2367
2368 cache->next_migration = NULL;
2369
2370 cache->need_tick_bio = true;
2371 cache->sized = false;
Joe Thornber65790ff2013-11-08 16:39:50 +00002372 cache->invalidate = false;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002373 cache->commit_requested = false;
2374 cache->loaded_mappings = false;
2375 cache->loaded_discards = false;
2376
2377 load_stats(cache);
2378
2379 atomic_set(&cache->stats.demotion, 0);
2380 atomic_set(&cache->stats.promotion, 0);
2381 atomic_set(&cache->stats.copies_avoided, 0);
2382 atomic_set(&cache->stats.cache_cell_clash, 0);
2383 atomic_set(&cache->stats.commit_count, 0);
2384 atomic_set(&cache->stats.discard_count, 0);
2385
Joe Thornber65790ff2013-11-08 16:39:50 +00002386 spin_lock_init(&cache->invalidation_lock);
2387 INIT_LIST_HEAD(&cache->invalidation_requests);
2388
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002389 *result = cache;
2390 return 0;
2391
2392bad:
2393 destroy(cache);
2394 return r;
2395}
2396
2397static int copy_ctr_args(struct cache *cache, int argc, const char **argv)
2398{
2399 unsigned i;
2400 const char **copy;
2401
2402 copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL);
2403 if (!copy)
2404 return -ENOMEM;
2405 for (i = 0; i < argc; i++) {
2406 copy[i] = kstrdup(argv[i], GFP_KERNEL);
2407 if (!copy[i]) {
2408 while (i--)
2409 kfree(copy[i]);
2410 kfree(copy);
2411 return -ENOMEM;
2412 }
2413 }
2414
2415 cache->nr_ctr_args = argc;
2416 cache->ctr_args = copy;
2417
2418 return 0;
2419}
2420
2421static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv)
2422{
2423 int r = -EINVAL;
2424 struct cache_args *ca;
2425 struct cache *cache = NULL;
2426
2427 ca = kzalloc(sizeof(*ca), GFP_KERNEL);
2428 if (!ca) {
2429 ti->error = "Error allocating memory for cache";
2430 return -ENOMEM;
2431 }
2432 ca->ti = ti;
2433
2434 r = parse_cache_args(ca, argc, argv, &ti->error);
2435 if (r)
2436 goto out;
2437
2438 r = cache_create(ca, &cache);
Heinz Mauelshagen617a0b82013-03-20 17:21:26 +00002439 if (r)
2440 goto out;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002441
2442 r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3);
2443 if (r) {
2444 destroy(cache);
2445 goto out;
2446 }
2447
2448 ti->private = cache;
2449
2450out:
2451 destroy_cache_args(ca);
2452 return r;
2453}
2454
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002455static int cache_map(struct dm_target *ti, struct bio *bio)
2456{
2457 struct cache *cache = ti->private;
2458
2459 int r;
2460 dm_oblock_t block = get_bio_block(cache, bio);
Mike Snitzer19b00922013-04-05 15:36:34 +01002461 size_t pb_data_size = get_per_bio_data_size(cache);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002462 bool can_migrate = false;
2463 bool discarded_block;
2464 struct dm_bio_prison_cell *cell;
2465 struct policy_result lookup_result;
2466 struct per_bio_data *pb;
2467
2468 if (from_oblock(block) > from_oblock(cache->origin_blocks)) {
2469 /*
2470 * This can only occur if the io goes to a partial block at
2471 * the end of the origin device. We don't cache these.
2472 * Just remap to the origin and carry on.
2473 */
2474 remap_to_origin_clear_discard(cache, bio, block);
2475 return DM_MAPIO_REMAPPED;
2476 }
2477
Mike Snitzer19b00922013-04-05 15:36:34 +01002478 pb = init_per_bio_data(bio, pb_data_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002479
2480 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD)) {
2481 defer_bio(cache, bio);
2482 return DM_MAPIO_SUBMITTED;
2483 }
2484
2485 /*
2486 * Check to see if that block is currently migrating.
2487 */
2488 cell = alloc_prison_cell(cache);
2489 if (!cell) {
2490 defer_bio(cache, bio);
2491 return DM_MAPIO_SUBMITTED;
2492 }
2493
2494 r = bio_detain(cache, block, bio, cell,
2495 (cell_free_fn) free_prison_cell,
2496 cache, &cell);
2497 if (r) {
2498 if (r < 0)
2499 defer_bio(cache, bio);
2500
2501 return DM_MAPIO_SUBMITTED;
2502 }
2503
2504 discarded_block = is_discarded_oblock(cache, block);
2505
2506 r = policy_map(cache->policy, block, false, can_migrate, discarded_block,
2507 bio, &lookup_result);
2508 if (r == -EWOULDBLOCK) {
2509 cell_defer(cache, cell, true);
2510 return DM_MAPIO_SUBMITTED;
2511
2512 } else if (r) {
2513 DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r);
2514 bio_io_error(bio);
2515 return DM_MAPIO_SUBMITTED;
2516 }
2517
Joe Thornber2ee57d52013-10-24 14:10:29 -04002518 r = DM_MAPIO_REMAPPED;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002519 switch (lookup_result.op) {
2520 case POLICY_HIT:
Joe Thornber2ee57d52013-10-24 14:10:29 -04002521 if (passthrough_mode(&cache->features)) {
2522 if (bio_data_dir(bio) == WRITE) {
2523 /*
2524 * We need to invalidate this block, so
2525 * defer for the worker thread.
2526 */
2527 cell_defer(cache, cell, true);
2528 r = DM_MAPIO_SUBMITTED;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002529
Joe Thornber2ee57d52013-10-24 14:10:29 -04002530 } else {
2531 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
2532 inc_miss_counter(cache, bio);
2533 remap_to_origin_clear_discard(cache, bio, block);
Joe Thornbere2e74d62013-03-20 17:21:27 +00002534
Joe Thornber2ee57d52013-10-24 14:10:29 -04002535 cell_defer(cache, cell, false);
2536 }
2537
2538 } else {
2539 inc_hit_counter(cache, bio);
2540
2541 if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
2542 !is_dirty(cache, lookup_result.cblock))
2543 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
2544 else
2545 remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
2546
2547 cell_defer(cache, cell, false);
2548 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002549 break;
2550
2551 case POLICY_MISS:
2552 inc_miss_counter(cache, bio);
2553 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
2554
2555 if (pb->req_nr != 0) {
2556 /*
2557 * This is a duplicate writethrough io that is no
2558 * longer needed because the block has been demoted.
2559 */
2560 bio_endio(bio, 0);
2561 cell_defer(cache, cell, false);
2562 return DM_MAPIO_SUBMITTED;
2563 } else {
2564 remap_to_origin_clear_discard(cache, bio, block);
2565 cell_defer(cache, cell, false);
2566 }
2567 break;
2568
2569 default:
2570 DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__,
2571 (unsigned) lookup_result.op);
2572 bio_io_error(bio);
Joe Thornber2ee57d52013-10-24 14:10:29 -04002573 r = DM_MAPIO_SUBMITTED;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002574 }
2575
Joe Thornber2ee57d52013-10-24 14:10:29 -04002576 return r;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002577}
2578
2579static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
2580{
2581 struct cache *cache = ti->private;
2582 unsigned long flags;
Mike Snitzer19b00922013-04-05 15:36:34 +01002583 size_t pb_data_size = get_per_bio_data_size(cache);
2584 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002585
2586 if (pb->tick) {
2587 policy_tick(cache->policy);
2588
2589 spin_lock_irqsave(&cache->lock, flags);
2590 cache->need_tick_bio = true;
2591 spin_unlock_irqrestore(&cache->lock, flags);
2592 }
2593
2594 check_for_quiesced_migrations(cache, pb);
2595
2596 return 0;
2597}
2598
2599static int write_dirty_bitset(struct cache *cache)
2600{
2601 unsigned i, r;
2602
2603 for (i = 0; i < from_cblock(cache->cache_size); i++) {
2604 r = dm_cache_set_dirty(cache->cmd, to_cblock(i),
2605 is_dirty(cache, to_cblock(i)));
2606 if (r)
2607 return r;
2608 }
2609
2610 return 0;
2611}
2612
2613static int write_discard_bitset(struct cache *cache)
2614{
2615 unsigned i, r;
2616
2617 r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
2618 cache->discard_nr_blocks);
2619 if (r) {
2620 DMERR("could not resize on-disk discard bitset");
2621 return r;
2622 }
2623
2624 for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
2625 r = dm_cache_set_discard(cache->cmd, to_dblock(i),
2626 is_discarded(cache, to_dblock(i)));
2627 if (r)
2628 return r;
2629 }
2630
2631 return 0;
2632}
2633
2634static int save_hint(void *context, dm_cblock_t cblock, dm_oblock_t oblock,
2635 uint32_t hint)
2636{
2637 struct cache *cache = context;
2638 return dm_cache_save_hint(cache->cmd, cblock, hint);
2639}
2640
2641static int write_hints(struct cache *cache)
2642{
2643 int r;
2644
2645 r = dm_cache_begin_hints(cache->cmd, cache->policy);
2646 if (r) {
2647 DMERR("dm_cache_begin_hints failed");
2648 return r;
2649 }
2650
2651 r = policy_walk_mappings(cache->policy, save_hint, cache);
2652 if (r)
2653 DMERR("policy_walk_mappings failed");
2654
2655 return r;
2656}
2657
2658/*
2659 * returns true on success
2660 */
2661static bool sync_metadata(struct cache *cache)
2662{
2663 int r1, r2, r3, r4;
2664
2665 r1 = write_dirty_bitset(cache);
2666 if (r1)
2667 DMERR("could not write dirty bitset");
2668
2669 r2 = write_discard_bitset(cache);
2670 if (r2)
2671 DMERR("could not write discard bitset");
2672
2673 save_stats(cache);
2674
2675 r3 = write_hints(cache);
2676 if (r3)
2677 DMERR("could not write hints");
2678
2679 /*
2680 * If writing the above metadata failed, we still commit, but don't
2681 * set the clean shutdown flag. This will effectively force every
2682 * dirty bit to be set on reload.
2683 */
2684 r4 = dm_cache_commit(cache->cmd, !r1 && !r2 && !r3);
2685 if (r4)
2686 DMERR("could not write cache metadata. Data loss may occur.");
2687
2688 return !r1 && !r2 && !r3 && !r4;
2689}
2690
2691static void cache_postsuspend(struct dm_target *ti)
2692{
2693 struct cache *cache = ti->private;
2694
2695 start_quiescing(cache);
2696 wait_for_migrations(cache);
2697 stop_worker(cache);
2698 requeue_deferred_io(cache);
2699 stop_quiescing(cache);
2700
2701 (void) sync_metadata(cache);
2702}
2703
2704static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
2705 bool dirty, uint32_t hint, bool hint_valid)
2706{
2707 int r;
2708 struct cache *cache = context;
2709
2710 r = policy_load_mapping(cache->policy, oblock, cblock, hint, hint_valid);
2711 if (r)
2712 return r;
2713
2714 if (dirty)
2715 set_dirty(cache, oblock, cblock);
2716 else
2717 clear_dirty(cache, oblock, cblock);
2718
2719 return 0;
2720}
2721
2722static int load_discard(void *context, sector_t discard_block_size,
2723 dm_dblock_t dblock, bool discard)
2724{
2725 struct cache *cache = context;
2726
2727 /* FIXME: handle mis-matched block size */
2728
2729 if (discard)
2730 set_discard(cache, dblock);
2731 else
2732 clear_discard(cache, dblock);
2733
2734 return 0;
2735}
2736
Joe Thornberf494a9c2013-10-31 13:55:49 -04002737static dm_cblock_t get_cache_dev_size(struct cache *cache)
2738{
2739 sector_t size = get_dev_size(cache->cache_dev);
2740 (void) sector_div(size, cache->sectors_per_block);
2741 return to_cblock(size);
2742}
2743
2744static bool can_resize(struct cache *cache, dm_cblock_t new_size)
2745{
2746 if (from_cblock(new_size) > from_cblock(cache->cache_size))
2747 return true;
2748
2749 /*
2750 * We can't drop a dirty block when shrinking the cache.
2751 */
2752 while (from_cblock(new_size) < from_cblock(cache->cache_size)) {
2753 new_size = to_cblock(from_cblock(new_size) + 1);
2754 if (is_dirty(cache, new_size)) {
2755 DMERR("unable to shrink cache; cache block %llu is dirty",
2756 (unsigned long long) from_cblock(new_size));
2757 return false;
2758 }
2759 }
2760
2761 return true;
2762}
2763
2764static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size)
2765{
2766 int r;
2767
Vincent Pelletier08844802013-11-30 12:58:42 +01002768 r = dm_cache_resize(cache->cmd, new_size);
Joe Thornberf494a9c2013-10-31 13:55:49 -04002769 if (r) {
2770 DMERR("could not resize cache metadata");
2771 return r;
2772 }
2773
2774 cache->cache_size = new_size;
2775
2776 return 0;
2777}
2778
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002779static int cache_preresume(struct dm_target *ti)
2780{
2781 int r = 0;
2782 struct cache *cache = ti->private;
Joe Thornberf494a9c2013-10-31 13:55:49 -04002783 dm_cblock_t csize = get_cache_dev_size(cache);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002784
2785 /*
2786 * Check to see if the cache has resized.
2787 */
Joe Thornberf494a9c2013-10-31 13:55:49 -04002788 if (!cache->sized) {
2789 r = resize_cache_dev(cache, csize);
2790 if (r)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002791 return r;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002792
2793 cache->sized = true;
Joe Thornberf494a9c2013-10-31 13:55:49 -04002794
2795 } else if (csize != cache->cache_size) {
2796 if (!can_resize(cache, csize))
2797 return -EINVAL;
2798
2799 r = resize_cache_dev(cache, csize);
2800 if (r)
2801 return r;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002802 }
2803
2804 if (!cache->loaded_mappings) {
Mike Snitzerea2dd8c2013-03-20 17:21:28 +00002805 r = dm_cache_load_mappings(cache->cmd, cache->policy,
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002806 load_mapping, cache);
2807 if (r) {
2808 DMERR("could not load cache mappings");
2809 return r;
2810 }
2811
2812 cache->loaded_mappings = true;
2813 }
2814
2815 if (!cache->loaded_discards) {
2816 r = dm_cache_load_discards(cache->cmd, load_discard, cache);
2817 if (r) {
2818 DMERR("could not load origin discards");
2819 return r;
2820 }
2821
2822 cache->loaded_discards = true;
2823 }
2824
2825 return r;
2826}
2827
2828static void cache_resume(struct dm_target *ti)
2829{
2830 struct cache *cache = ti->private;
2831
2832 cache->need_tick_bio = true;
2833 do_waker(&cache->waker.work);
2834}
2835
2836/*
2837 * Status format:
2838 *
Mike Snitzer6a388612014-01-09 16:04:12 -05002839 * <metadata block size> <#used metadata blocks>/<#total metadata blocks>
2840 * <cache block size> <#used cache blocks>/<#total cache blocks>
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002841 * <#read hits> <#read misses> <#write hits> <#write misses>
Mike Snitzer6a388612014-01-09 16:04:12 -05002842 * <#demotions> <#promotions> <#dirty>
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002843 * <#features> <features>*
2844 * <#core args> <core args>
Mike Snitzer2e68c4e2014-01-15 21:06:55 -05002845 * <policy name> <#policy args> <policy args>*
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002846 */
2847static void cache_status(struct dm_target *ti, status_type_t type,
2848 unsigned status_flags, char *result, unsigned maxlen)
2849{
2850 int r = 0;
2851 unsigned i;
2852 ssize_t sz = 0;
2853 dm_block_t nr_free_blocks_metadata = 0;
2854 dm_block_t nr_blocks_metadata = 0;
2855 char buf[BDEVNAME_SIZE];
2856 struct cache *cache = ti->private;
2857 dm_cblock_t residency;
2858
2859 switch (type) {
2860 case STATUSTYPE_INFO:
2861 /* Commit to ensure statistics aren't out-of-date */
2862 if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) {
2863 r = dm_cache_commit(cache->cmd, false);
2864 if (r)
2865 DMERR("could not commit metadata for accurate status");
2866 }
2867
2868 r = dm_cache_get_free_metadata_block_count(cache->cmd,
2869 &nr_free_blocks_metadata);
2870 if (r) {
2871 DMERR("could not get metadata free block count");
2872 goto err;
2873 }
2874
2875 r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata);
2876 if (r) {
2877 DMERR("could not get metadata device size");
2878 goto err;
2879 }
2880
2881 residency = policy_residency(cache->policy);
2882
Mike Snitzer6a388612014-01-09 16:04:12 -05002883 DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %llu ",
2884 (unsigned)(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT),
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002885 (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
2886 (unsigned long long)nr_blocks_metadata,
Mike Snitzer6a388612014-01-09 16:04:12 -05002887 cache->sectors_per_block,
2888 (unsigned long long) from_cblock(residency),
2889 (unsigned long long) from_cblock(cache->cache_size),
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002890 (unsigned) atomic_read(&cache->stats.read_hit),
2891 (unsigned) atomic_read(&cache->stats.read_miss),
2892 (unsigned) atomic_read(&cache->stats.write_hit),
2893 (unsigned) atomic_read(&cache->stats.write_miss),
2894 (unsigned) atomic_read(&cache->stats.demotion),
2895 (unsigned) atomic_read(&cache->stats.promotion),
Mike Snitzer6a388612014-01-09 16:04:12 -05002896 (unsigned long long) from_cblock(cache->nr_dirty));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002897
Joe Thornber2ee57d52013-10-24 14:10:29 -04002898 if (writethrough_mode(&cache->features))
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002899 DMEMIT("1 writethrough ");
Joe Thornber2ee57d52013-10-24 14:10:29 -04002900
2901 else if (passthrough_mode(&cache->features))
2902 DMEMIT("1 passthrough ");
2903
2904 else if (writeback_mode(&cache->features))
2905 DMEMIT("1 writeback ");
2906
2907 else {
2908 DMERR("internal error: unknown io mode: %d", (int) cache->features.io_mode);
2909 goto err;
2910 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002911
2912 DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
Mike Snitzer2e68c4e2014-01-15 21:06:55 -05002913
2914 DMEMIT("%s ", dm_cache_policy_get_name(cache->policy));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002915 if (sz < maxlen) {
2916 r = policy_emit_config_values(cache->policy, result + sz, maxlen - sz);
2917 if (r)
2918 DMERR("policy_emit_config_values returned %d", r);
2919 }
2920
2921 break;
2922
2923 case STATUSTYPE_TABLE:
2924 format_dev_t(buf, cache->metadata_dev->bdev->bd_dev);
2925 DMEMIT("%s ", buf);
2926 format_dev_t(buf, cache->cache_dev->bdev->bd_dev);
2927 DMEMIT("%s ", buf);
2928 format_dev_t(buf, cache->origin_dev->bdev->bd_dev);
2929 DMEMIT("%s", buf);
2930
2931 for (i = 0; i < cache->nr_ctr_args - 1; i++)
2932 DMEMIT(" %s", cache->ctr_args[i]);
2933 if (cache->nr_ctr_args)
2934 DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]);
2935 }
2936
2937 return;
2938
2939err:
2940 DMEMIT("Error");
2941}
2942
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002943/*
Joe Thornber65790ff2013-11-08 16:39:50 +00002944 * A cache block range can take two forms:
2945 *
2946 * i) A single cblock, eg. '3456'
2947 * ii) A begin and end cblock with dots between, eg. 123-234
2948 */
2949static int parse_cblock_range(struct cache *cache, const char *str,
2950 struct cblock_range *result)
2951{
2952 char dummy;
2953 uint64_t b, e;
2954 int r;
2955
2956 /*
2957 * Try and parse form (ii) first.
2958 */
2959 r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy);
2960 if (r < 0)
2961 return r;
2962
2963 if (r == 2) {
2964 result->begin = to_cblock(b);
2965 result->end = to_cblock(e);
2966 return 0;
2967 }
2968
2969 /*
2970 * That didn't work, try form (i).
2971 */
2972 r = sscanf(str, "%llu%c", &b, &dummy);
2973 if (r < 0)
2974 return r;
2975
2976 if (r == 1) {
2977 result->begin = to_cblock(b);
2978 result->end = to_cblock(from_cblock(result->begin) + 1u);
2979 return 0;
2980 }
2981
2982 DMERR("invalid cblock range '%s'", str);
2983 return -EINVAL;
2984}
2985
2986static int validate_cblock_range(struct cache *cache, struct cblock_range *range)
2987{
2988 uint64_t b = from_cblock(range->begin);
2989 uint64_t e = from_cblock(range->end);
2990 uint64_t n = from_cblock(cache->cache_size);
2991
2992 if (b >= n) {
2993 DMERR("begin cblock out of range: %llu >= %llu", b, n);
2994 return -EINVAL;
2995 }
2996
2997 if (e > n) {
2998 DMERR("end cblock out of range: %llu > %llu", e, n);
2999 return -EINVAL;
3000 }
3001
3002 if (b >= e) {
3003 DMERR("invalid cblock range: %llu >= %llu", b, e);
3004 return -EINVAL;
3005 }
3006
3007 return 0;
3008}
3009
3010static int request_invalidation(struct cache *cache, struct cblock_range *range)
3011{
3012 struct invalidation_request req;
3013
3014 INIT_LIST_HEAD(&req.list);
3015 req.cblocks = range;
3016 atomic_set(&req.complete, 0);
3017 req.err = 0;
3018 init_waitqueue_head(&req.result_wait);
3019
3020 spin_lock(&cache->invalidation_lock);
3021 list_add(&req.list, &cache->invalidation_requests);
3022 spin_unlock(&cache->invalidation_lock);
3023 wake_worker(cache);
3024
3025 wait_event(req.result_wait, atomic_read(&req.complete));
3026 return req.err;
3027}
3028
3029static int process_invalidate_cblocks_message(struct cache *cache, unsigned count,
3030 const char **cblock_ranges)
3031{
3032 int r = 0;
3033 unsigned i;
3034 struct cblock_range range;
3035
3036 if (!passthrough_mode(&cache->features)) {
3037 DMERR("cache has to be in passthrough mode for invalidation");
3038 return -EPERM;
3039 }
3040
3041 for (i = 0; i < count; i++) {
3042 r = parse_cblock_range(cache, cblock_ranges[i], &range);
3043 if (r)
3044 break;
3045
3046 r = validate_cblock_range(cache, &range);
3047 if (r)
3048 break;
3049
3050 /*
3051 * Pass begin and end origin blocks to the worker and wake it.
3052 */
3053 r = request_invalidation(cache, &range);
3054 if (r)
3055 break;
3056 }
3057
3058 return r;
3059}
3060
3061/*
3062 * Supports
3063 * "<key> <value>"
3064 * and
3065 * "invalidate_cblocks [(<begin>)|(<begin>-<end>)]*
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003066 *
3067 * The key migration_threshold is supported by the cache target core.
3068 */
3069static int cache_message(struct dm_target *ti, unsigned argc, char **argv)
3070{
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003071 struct cache *cache = ti->private;
3072
Joe Thornber65790ff2013-11-08 16:39:50 +00003073 if (!argc)
3074 return -EINVAL;
3075
Mike Snitzer7b6b2bc2013-11-12 12:17:43 -05003076 if (!strcasecmp(argv[0], "invalidate_cblocks"))
Joe Thornber65790ff2013-11-08 16:39:50 +00003077 return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1);
3078
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003079 if (argc != 2)
3080 return -EINVAL;
3081
Joe Thornber2f14f4b2013-05-10 14:37:21 +01003082 return set_config_value(cache, argv[0], argv[1]);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003083}
3084
3085static int cache_iterate_devices(struct dm_target *ti,
3086 iterate_devices_callout_fn fn, void *data)
3087{
3088 int r = 0;
3089 struct cache *cache = ti->private;
3090
3091 r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data);
3092 if (!r)
3093 r = fn(ti, cache->origin_dev, 0, ti->len, data);
3094
3095 return r;
3096}
3097
3098/*
3099 * We assume I/O is going to the origin (which is the volume
3100 * more likely to have restrictions e.g. by being striped).
3101 * (Looking up the exact location of the data would be expensive
3102 * and could always be out of date by the time the bio is submitted.)
3103 */
3104static int cache_bvec_merge(struct dm_target *ti,
3105 struct bvec_merge_data *bvm,
3106 struct bio_vec *biovec, int max_size)
3107{
3108 struct cache *cache = ti->private;
3109 struct request_queue *q = bdev_get_queue(cache->origin_dev->bdev);
3110
3111 if (!q->merge_bvec_fn)
3112 return max_size;
3113
3114 bvm->bi_bdev = cache->origin_dev->bdev;
3115 return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
3116}
3117
3118static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
3119{
3120 /*
3121 * FIXME: these limits may be incompatible with the cache device
3122 */
3123 limits->max_discard_sectors = cache->discard_block_size * 1024;
3124 limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
3125}
3126
3127static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
3128{
3129 struct cache *cache = ti->private;
Mike Snitzerf6109372013-08-20 15:02:41 -04003130 uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003131
Mike Snitzerf6109372013-08-20 15:02:41 -04003132 /*
3133 * If the system-determined stacked limits are compatible with the
3134 * cache's blocksize (io_opt is a factor) do not override them.
3135 */
3136 if (io_opt_sectors < cache->sectors_per_block ||
3137 do_div(io_opt_sectors, cache->sectors_per_block)) {
3138 blk_limits_io_min(limits, 0);
3139 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
3140 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003141 set_discard_limits(cache, limits);
3142}
3143
3144/*----------------------------------------------------------------*/
3145
3146static struct target_type cache_target = {
3147 .name = "cache",
Mike Snitzer6a388612014-01-09 16:04:12 -05003148 .version = {1, 3, 0},
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003149 .module = THIS_MODULE,
3150 .ctr = cache_ctr,
3151 .dtr = cache_dtr,
3152 .map = cache_map,
3153 .end_io = cache_end_io,
3154 .postsuspend = cache_postsuspend,
3155 .preresume = cache_preresume,
3156 .resume = cache_resume,
3157 .status = cache_status,
3158 .message = cache_message,
3159 .iterate_devices = cache_iterate_devices,
3160 .merge = cache_bvec_merge,
3161 .io_hints = cache_io_hints,
3162};
3163
3164static int __init dm_cache_init(void)
3165{
3166 int r;
3167
3168 r = dm_register_target(&cache_target);
3169 if (r) {
3170 DMERR("cache target registration failed: %d", r);
3171 return r;
3172 }
3173
3174 migration_cache = KMEM_CACHE(dm_cache_migration, 0);
3175 if (!migration_cache) {
3176 dm_unregister_target(&cache_target);
3177 return -ENOMEM;
3178 }
3179
3180 return 0;
3181}
3182
3183static void __exit dm_cache_exit(void)
3184{
3185 dm_unregister_target(&cache_target);
3186 kmem_cache_destroy(migration_cache);
3187}
3188
3189module_init(dm_cache_init);
3190module_exit(dm_cache_exit);
3191
3192MODULE_DESCRIPTION(DM_NAME " cache target");
3193MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>");
3194MODULE_LICENSE("GPL");