blob: d20f4023f6c127861cd1f36bb470ecc4485e9256 [file] [log] [blame]
Joe Thornber991d9fa2011-10-31 20:21:18 +00001/*
Joe Thornberda105ed2012-07-27 15:08:15 +01002 * Copyright (C) 2011-2012 Red Hat, Inc.
Joe Thornber991d9fa2011-10-31 20:21:18 +00003 *
4 * This file is released under the GPL.
5 */
6
7#include "dm-thin-metadata.h"
8#include "persistent-data/dm-btree.h"
9#include "persistent-data/dm-space-map.h"
10#include "persistent-data/dm-space-map-disk.h"
11#include "persistent-data/dm-transaction-manager.h"
12
13#include <linux/list.h>
14#include <linux/device-mapper.h>
15#include <linux/workqueue.h>
16
17/*--------------------------------------------------------------------------
18 * As far as the metadata goes, there is:
19 *
20 * - A superblock in block zero, taking up fewer than 512 bytes for
21 * atomic writes.
22 *
23 * - A space map managing the metadata blocks.
24 *
25 * - A space map managing the data blocks.
26 *
27 * - A btree mapping our internal thin dev ids onto struct disk_device_details.
28 *
29 * - A hierarchical btree, with 2 levels which effectively maps (thin
30 * dev id, virtual block) -> block_time. Block time is a 64-bit
31 * field holding the time in the low 24 bits, and block in the top 48
32 * bits.
33 *
34 * BTrees consist solely of btree_nodes, that fill a block. Some are
35 * internal nodes, as such their values are a __le64 pointing to other
36 * nodes. Leaf nodes can store data of any reasonable size (ie. much
37 * smaller than the block size). The nodes consist of the header,
38 * followed by an array of keys, followed by an array of values. We have
39 * to binary search on the keys so they're all held together to help the
40 * cpu cache.
41 *
42 * Space maps have 2 btrees:
43 *
44 * - One maps a uint64_t onto a struct index_entry. Which points to a
45 * bitmap block, and has some details about how many free entries there
46 * are etc.
47 *
48 * - The bitmap blocks have a header (for the checksum). Then the rest
49 * of the block is pairs of bits. With the meaning being:
50 *
51 * 0 - ref count is 0
52 * 1 - ref count is 1
53 * 2 - ref count is 2
54 * 3 - ref count is higher than 2
55 *
56 * - If the count is higher than 2 then the ref count is entered in a
57 * second btree that directly maps the block_address to a uint32_t ref
58 * count.
59 *
60 * The space map metadata variant doesn't have a bitmaps btree. Instead
61 * it has one single blocks worth of index_entries. This avoids
62 * recursive issues with the bitmap btree needing to allocate space in
63 * order to insert. With a small data block size such as 64k the
64 * metadata support data devices that are hundreds of terrabytes.
65 *
66 * The space maps allocate space linearly from front to back. Space that
67 * is freed in a transaction is never recycled within that transaction.
68 * To try and avoid fragmenting _free_ space the allocator always goes
69 * back and fills in gaps.
70 *
71 * All metadata io is in THIN_METADATA_BLOCK_SIZE sized/aligned chunks
72 * from the block manager.
73 *--------------------------------------------------------------------------*/
74
75#define DM_MSG_PREFIX "thin metadata"
76
77#define THIN_SUPERBLOCK_MAGIC 27022010
78#define THIN_SUPERBLOCK_LOCATION 0
Mike Snitzer07f2b6e2014-02-14 11:58:41 -050079#define THIN_VERSION 2
Joe Thornber991d9fa2011-10-31 20:21:18 +000080#define THIN_METADATA_CACHE_SIZE 64
81#define SECTOR_TO_BLOCK_SHIFT 3
82
Joe Thornber8c971172012-07-27 15:07:58 +010083/*
Dennis Yang2904adc2017-12-12 18:21:40 +080084 * For btree insert:
Joe Thornber8c971172012-07-27 15:07:58 +010085 * 3 for btree insert +
86 * 2 for btree lookup used within space map
Dennis Yang2904adc2017-12-12 18:21:40 +080087 * For btree remove:
88 * 2 for shadow spine +
89 * 4 for rebalance 3 child node
Joe Thornber8c971172012-07-27 15:07:58 +010090 */
Dennis Yang2904adc2017-12-12 18:21:40 +080091#define THIN_MAX_CONCURRENT_LOCKS 6
Joe Thornber8c971172012-07-27 15:07:58 +010092
Joe Thornber991d9fa2011-10-31 20:21:18 +000093/* This should be plenty */
94#define SPACE_MAP_ROOT_SIZE 128
95
96/*
97 * Little endian on-disk superblock and device details.
98 */
99struct thin_disk_superblock {
100 __le32 csum; /* Checksum of superblock except for this field. */
101 __le32 flags;
102 __le64 blocknr; /* This block number, dm_block_t. */
103
104 __u8 uuid[16];
105 __le64 magic;
106 __le32 version;
107 __le32 time;
108
109 __le64 trans_id;
110
111 /*
112 * Root held by userspace transactions.
113 */
114 __le64 held_root;
115
116 __u8 data_space_map_root[SPACE_MAP_ROOT_SIZE];
117 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
118
119 /*
120 * 2-level btree mapping (dev_id, (dev block, time)) -> data block
121 */
122 __le64 data_mapping_root;
123
124 /*
125 * Device detail root mapping dev_id -> device_details
126 */
127 __le64 device_details_root;
128
129 __le32 data_block_size; /* In 512-byte sectors. */
130
131 __le32 metadata_block_size; /* In 512-byte sectors. */
132 __le64 metadata_nr_blocks;
133
134 __le32 compat_flags;
135 __le32 compat_ro_flags;
136 __le32 incompat_flags;
137} __packed;
138
139struct disk_device_details {
140 __le64 mapped_blocks;
141 __le64 transaction_id; /* When created. */
142 __le32 creation_time;
143 __le32 snapshotted_time;
144} __packed;
145
146struct dm_pool_metadata {
147 struct hlist_node hash;
148
149 struct block_device *bdev;
150 struct dm_block_manager *bm;
151 struct dm_space_map *metadata_sm;
152 struct dm_space_map *data_sm;
153 struct dm_transaction_manager *tm;
154 struct dm_transaction_manager *nb_tm;
155
156 /*
157 * Two-level btree.
158 * First level holds thin_dev_t.
159 * Second level holds mappings.
160 */
161 struct dm_btree_info info;
162
163 /*
164 * Non-blocking version of the above.
165 */
166 struct dm_btree_info nb_info;
167
168 /*
169 * Just the top level for deleting whole devices.
170 */
171 struct dm_btree_info tl_info;
172
173 /*
174 * Just the bottom level for creating new devices.
175 */
176 struct dm_btree_info bl_info;
177
178 /*
179 * Describes the device details btree.
180 */
181 struct dm_btree_info details_info;
182
183 struct rw_semaphore root_lock;
184 uint32_t time;
Joe Thornber991d9fa2011-10-31 20:21:18 +0000185 dm_block_t root;
186 dm_block_t details_root;
187 struct list_head thin_devices;
188 uint64_t trans_id;
189 unsigned long flags;
190 sector_t data_block_size;
Joe Thornberda105ed2012-07-27 15:08:15 +0100191
192 /*
Joe Thornbera9537db2018-09-10 16:50:09 +0100193 * We reserve a section of the metadata for commit overhead.
194 * All reported space does *not* include this.
195 */
196 dm_block_t metadata_reserve;
197
198 /*
Joe Thornberda105ed2012-07-27 15:08:15 +0100199 * Set if a transaction has to be aborted but the attempt to roll back
200 * to the previous (good) transaction failed. The only pool metadata
201 * operation possible in this state is the closing of the device.
202 */
203 bool fail_io:1;
Joe Thornber5a320832014-03-27 14:13:23 +0000204
205 /*
206 * Reading the space map roots can fail, so we read it into these
207 * buffers before the superblock is locked and updated.
208 */
209 __u8 data_space_map_root[SPACE_MAP_ROOT_SIZE];
210 __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
Joe Thornber991d9fa2011-10-31 20:21:18 +0000211};
212
213struct dm_thin_device {
214 struct list_head list;
215 struct dm_pool_metadata *pmd;
216 dm_thin_id id;
217
218 int open_count;
Joe Thornberda105ed2012-07-27 15:08:15 +0100219 bool changed:1;
220 bool aborted_with_changes:1;
Joe Thornber991d9fa2011-10-31 20:21:18 +0000221 uint64_t mapped_blocks;
222 uint64_t transaction_id;
223 uint32_t creation_time;
224 uint32_t snapshotted_time;
225};
226
227/*----------------------------------------------------------------
228 * superblock validator
229 *--------------------------------------------------------------*/
230
231#define SUPERBLOCK_CSUM_XOR 160774
232
233static void sb_prepare_for_write(struct dm_block_validator *v,
234 struct dm_block *b,
235 size_t block_size)
236{
237 struct thin_disk_superblock *disk_super = dm_block_data(b);
238
239 disk_super->blocknr = cpu_to_le64(dm_block_location(b));
240 disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
241 block_size - sizeof(__le32),
242 SUPERBLOCK_CSUM_XOR));
243}
244
245static int sb_check(struct dm_block_validator *v,
246 struct dm_block *b,
247 size_t block_size)
248{
249 struct thin_disk_superblock *disk_super = dm_block_data(b);
250 __le32 csum_le;
251
252 if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) {
253 DMERR("sb_check failed: blocknr %llu: "
254 "wanted %llu", le64_to_cpu(disk_super->blocknr),
255 (unsigned long long)dm_block_location(b));
256 return -ENOTBLK;
257 }
258
259 if (le64_to_cpu(disk_super->magic) != THIN_SUPERBLOCK_MAGIC) {
260 DMERR("sb_check failed: magic %llu: "
261 "wanted %llu", le64_to_cpu(disk_super->magic),
262 (unsigned long long)THIN_SUPERBLOCK_MAGIC);
263 return -EILSEQ;
264 }
265
266 csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
267 block_size - sizeof(__le32),
268 SUPERBLOCK_CSUM_XOR));
269 if (csum_le != disk_super->csum) {
270 DMERR("sb_check failed: csum %u: wanted %u",
271 le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum));
272 return -EILSEQ;
273 }
274
275 return 0;
276}
277
278static struct dm_block_validator sb_validator = {
279 .name = "superblock",
280 .prepare_for_write = sb_prepare_for_write,
281 .check = sb_check
282};
283
284/*----------------------------------------------------------------
285 * Methods for the btree value types
286 *--------------------------------------------------------------*/
287
288static uint64_t pack_block_time(dm_block_t b, uint32_t t)
289{
290 return (b << 24) | t;
291}
292
293static void unpack_block_time(uint64_t v, dm_block_t *b, uint32_t *t)
294{
295 *b = v >> 24;
296 *t = v & ((1 << 24) - 1);
297}
298
Mike Snitzer018cede2013-03-01 22:45:47 +0000299static void data_block_inc(void *context, const void *value_le)
Joe Thornber991d9fa2011-10-31 20:21:18 +0000300{
301 struct dm_space_map *sm = context;
302 __le64 v_le;
303 uint64_t b;
304 uint32_t t;
305
306 memcpy(&v_le, value_le, sizeof(v_le));
307 unpack_block_time(le64_to_cpu(v_le), &b, &t);
308 dm_sm_inc_block(sm, b);
309}
310
Mike Snitzer018cede2013-03-01 22:45:47 +0000311static void data_block_dec(void *context, const void *value_le)
Joe Thornber991d9fa2011-10-31 20:21:18 +0000312{
313 struct dm_space_map *sm = context;
314 __le64 v_le;
315 uint64_t b;
316 uint32_t t;
317
318 memcpy(&v_le, value_le, sizeof(v_le));
319 unpack_block_time(le64_to_cpu(v_le), &b, &t);
320 dm_sm_dec_block(sm, b);
321}
322
Mike Snitzer018cede2013-03-01 22:45:47 +0000323static int data_block_equal(void *context, const void *value1_le, const void *value2_le)
Joe Thornber991d9fa2011-10-31 20:21:18 +0000324{
325 __le64 v1_le, v2_le;
326 uint64_t b1, b2;
327 uint32_t t;
328
329 memcpy(&v1_le, value1_le, sizeof(v1_le));
330 memcpy(&v2_le, value2_le, sizeof(v2_le));
331 unpack_block_time(le64_to_cpu(v1_le), &b1, &t);
332 unpack_block_time(le64_to_cpu(v2_le), &b2, &t);
333
334 return b1 == b2;
335}
336
Mike Snitzer018cede2013-03-01 22:45:47 +0000337static void subtree_inc(void *context, const void *value)
Joe Thornber991d9fa2011-10-31 20:21:18 +0000338{
339 struct dm_btree_info *info = context;
340 __le64 root_le;
341 uint64_t root;
342
343 memcpy(&root_le, value, sizeof(root_le));
344 root = le64_to_cpu(root_le);
345 dm_tm_inc(info->tm, root);
346}
347
Mike Snitzer018cede2013-03-01 22:45:47 +0000348static void subtree_dec(void *context, const void *value)
Joe Thornber991d9fa2011-10-31 20:21:18 +0000349{
350 struct dm_btree_info *info = context;
351 __le64 root_le;
352 uint64_t root;
353
354 memcpy(&root_le, value, sizeof(root_le));
355 root = le64_to_cpu(root_le);
356 if (dm_btree_del(info, root))
Mike Snitzer29f929b2016-01-21 14:04:04 -0500357 DMERR("btree delete failed");
Joe Thornber991d9fa2011-10-31 20:21:18 +0000358}
359
Mike Snitzer018cede2013-03-01 22:45:47 +0000360static int subtree_equal(void *context, const void *value1_le, const void *value2_le)
Joe Thornber991d9fa2011-10-31 20:21:18 +0000361{
362 __le64 v1_le, v2_le;
363 memcpy(&v1_le, value1_le, sizeof(v1_le));
364 memcpy(&v2_le, value2_le, sizeof(v2_le));
365
366 return v1_le == v2_le;
367}
368
369/*----------------------------------------------------------------*/
370
Joe Thornber25971192012-07-27 15:08:09 +0100371static int superblock_lock_zero(struct dm_pool_metadata *pmd,
372 struct dm_block **sblock)
373{
374 return dm_bm_write_lock_zero(pmd->bm, THIN_SUPERBLOCK_LOCATION,
375 &sb_validator, sblock);
376}
377
378static int superblock_lock(struct dm_pool_metadata *pmd,
379 struct dm_block **sblock)
380{
381 return dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
382 &sb_validator, sblock);
383}
384
Joe Thornber332627d2012-07-27 15:08:10 +0100385static int __superblock_all_zeroes(struct dm_block_manager *bm, int *result)
Joe Thornber991d9fa2011-10-31 20:21:18 +0000386{
387 int r;
388 unsigned i;
389 struct dm_block *b;
390 __le64 *data_le, zero = cpu_to_le64(0);
391 unsigned block_size = dm_bm_block_size(bm) / sizeof(__le64);
392
393 /*
394 * We can't use a validator here - it may be all zeroes.
395 */
396 r = dm_bm_read_lock(bm, THIN_SUPERBLOCK_LOCATION, NULL, &b);
397 if (r)
398 return r;
399
400 data_le = dm_block_data(b);
401 *result = 1;
402 for (i = 0; i < block_size; i++) {
403 if (data_le[i] != zero) {
404 *result = 0;
405 break;
406 }
407 }
408
Mikulas Patocka4c7da062015-10-22 16:46:59 -0400409 dm_bm_unlock(b);
410
411 return 0;
Joe Thornber991d9fa2011-10-31 20:21:18 +0000412}
413
Joe Thornber41675ae2012-07-27 15:08:08 +0100414static void __setup_btree_details(struct dm_pool_metadata *pmd)
415{
416 pmd->info.tm = pmd->tm;
417 pmd->info.levels = 2;
418 pmd->info.value_type.context = pmd->data_sm;
419 pmd->info.value_type.size = sizeof(__le64);
420 pmd->info.value_type.inc = data_block_inc;
421 pmd->info.value_type.dec = data_block_dec;
422 pmd->info.value_type.equal = data_block_equal;
423
424 memcpy(&pmd->nb_info, &pmd->info, sizeof(pmd->nb_info));
425 pmd->nb_info.tm = pmd->nb_tm;
426
427 pmd->tl_info.tm = pmd->tm;
428 pmd->tl_info.levels = 1;
Joe Thornbere3cbf942012-12-21 20:23:32 +0000429 pmd->tl_info.value_type.context = &pmd->bl_info;
Joe Thornber41675ae2012-07-27 15:08:08 +0100430 pmd->tl_info.value_type.size = sizeof(__le64);
431 pmd->tl_info.value_type.inc = subtree_inc;
432 pmd->tl_info.value_type.dec = subtree_dec;
433 pmd->tl_info.value_type.equal = subtree_equal;
434
435 pmd->bl_info.tm = pmd->tm;
436 pmd->bl_info.levels = 1;
437 pmd->bl_info.value_type.context = pmd->data_sm;
438 pmd->bl_info.value_type.size = sizeof(__le64);
439 pmd->bl_info.value_type.inc = data_block_inc;
440 pmd->bl_info.value_type.dec = data_block_dec;
441 pmd->bl_info.value_type.equal = data_block_equal;
442
443 pmd->details_info.tm = pmd->tm;
444 pmd->details_info.levels = 1;
445 pmd->details_info.value_type.context = NULL;
446 pmd->details_info.value_type.size = sizeof(struct disk_device_details);
447 pmd->details_info.value_type.inc = NULL;
448 pmd->details_info.value_type.dec = NULL;
449 pmd->details_info.value_type.equal = NULL;
450}
451
Joe Thornber5a320832014-03-27 14:13:23 +0000452static int save_sm_roots(struct dm_pool_metadata *pmd)
453{
454 int r;
455 size_t len;
456
457 r = dm_sm_root_size(pmd->metadata_sm, &len);
458 if (r < 0)
459 return r;
460
461 r = dm_sm_copy_root(pmd->metadata_sm, &pmd->metadata_space_map_root, len);
462 if (r < 0)
463 return r;
464
465 r = dm_sm_root_size(pmd->data_sm, &len);
466 if (r < 0)
467 return r;
468
469 return dm_sm_copy_root(pmd->data_sm, &pmd->data_space_map_root, len);
470}
471
472static void copy_sm_roots(struct dm_pool_metadata *pmd,
473 struct thin_disk_superblock *disk)
474{
475 memcpy(&disk->metadata_space_map_root,
476 &pmd->metadata_space_map_root,
477 sizeof(pmd->metadata_space_map_root));
478
479 memcpy(&disk->data_space_map_root,
480 &pmd->data_space_map_root,
481 sizeof(pmd->data_space_map_root));
482}
483
Joe Thornber9cb66532012-07-27 15:08:10 +0100484static int __write_initial_superblock(struct dm_pool_metadata *pmd)
485{
486 int r;
487 struct dm_block *sblock;
488 struct thin_disk_superblock *disk_super;
489 sector_t bdev_size = i_size_read(pmd->bdev->bd_inode) >> SECTOR_SHIFT;
490
491 if (bdev_size > THIN_METADATA_MAX_SECTORS)
492 bdev_size = THIN_METADATA_MAX_SECTORS;
493
Joe Thornber10d2a9f2012-07-27 15:08:11 +0100494 r = dm_sm_commit(pmd->data_sm);
495 if (r < 0)
496 return r;
497
Joe Thornbercc681812017-05-15 09:43:05 -0400498 r = dm_tm_pre_commit(pmd->tm);
Joe Thornber5a320832014-03-27 14:13:23 +0000499 if (r < 0)
500 return r;
501
Joe Thornbercc681812017-05-15 09:43:05 -0400502 r = save_sm_roots(pmd);
Joe Thornber10d2a9f2012-07-27 15:08:11 +0100503 if (r < 0)
504 return r;
505
Joe Thornber9cb66532012-07-27 15:08:10 +0100506 r = superblock_lock_zero(pmd, &sblock);
507 if (r)
508 return r;
509
510 disk_super = dm_block_data(sblock);
Joe Thornber10d2a9f2012-07-27 15:08:11 +0100511 disk_super->flags = 0;
Joe Thornber583ceee2012-07-27 15:08:11 +0100512 memset(disk_super->uuid, 0, sizeof(disk_super->uuid));
Joe Thornber9cb66532012-07-27 15:08:10 +0100513 disk_super->magic = cpu_to_le64(THIN_SUPERBLOCK_MAGIC);
514 disk_super->version = cpu_to_le32(THIN_VERSION);
515 disk_super->time = 0;
Joe Thornber10d2a9f2012-07-27 15:08:11 +0100516 disk_super->trans_id = 0;
517 disk_super->held_root = 0;
518
Joe Thornber5a320832014-03-27 14:13:23 +0000519 copy_sm_roots(pmd, disk_super);
Joe Thornber10d2a9f2012-07-27 15:08:11 +0100520
521 disk_super->data_mapping_root = cpu_to_le64(pmd->root);
522 disk_super->device_details_root = cpu_to_le64(pmd->details_root);
Mike Snitzer7d489352014-02-12 23:58:15 -0500523 disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE);
Joe Thornber9cb66532012-07-27 15:08:10 +0100524 disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT);
525 disk_super->data_block_size = cpu_to_le32(pmd->data_block_size);
526
Joe Thornber270938b2012-07-27 15:08:11 +0100527 return dm_tm_commit(pmd->tm, sblock);
Joe Thornber9cb66532012-07-27 15:08:10 +0100528}
529
Joe Thornbera97e5e62012-07-27 15:08:12 +0100530static int __format_metadata(struct dm_pool_metadata *pmd)
Joe Thornber991d9fa2011-10-31 20:21:18 +0000531{
532 int r;
Joe Thornber991d9fa2011-10-31 20:21:18 +0000533
Joe Thornbere4d22052012-07-27 15:08:12 +0100534 r = dm_tm_create_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION,
535 &pmd->tm, &pmd->metadata_sm);
536 if (r < 0) {
537 DMERR("tm_create_with_sm failed");
538 return r;
539 }
Joe Thornber991d9fa2011-10-31 20:21:18 +0000540
Joe Thornbera97e5e62012-07-27 15:08:12 +0100541 pmd->data_sm = dm_sm_disk_create(pmd->tm, 0);
Joe Thornbere4d22052012-07-27 15:08:12 +0100542 if (IS_ERR(pmd->data_sm)) {
543 DMERR("sm_disk_create failed");
544 r = PTR_ERR(pmd->data_sm);
Joe Thornber0fa5b172012-07-27 15:08:14 +0100545 goto bad_cleanup_tm;
Joe Thornber991d9fa2011-10-31 20:21:18 +0000546 }
547
Joe Thornberd6332812012-07-27 15:08:12 +0100548 pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm);
Joe Thornber991d9fa2011-10-31 20:21:18 +0000549 if (!pmd->nb_tm) {
Joe Thornber0fa5b172012-07-27 15:08:14 +0100550 DMERR("could not create non-blocking clone tm");
Joe Thornber991d9fa2011-10-31 20:21:18 +0000551 r = -ENOMEM;
Joe Thornber0fa5b172012-07-27 15:08:14 +0100552 goto bad_cleanup_data_sm;
Joe Thornber991d9fa2011-10-31 20:21:18 +0000553 }
554
Joe Thornber41675ae2012-07-27 15:08:08 +0100555 __setup_btree_details(pmd);
Joe Thornber991d9fa2011-10-31 20:21:18 +0000556
Joe Thornber9cb66532012-07-27 15:08:10 +0100557 r = dm_btree_empty(&pmd->info, &pmd->root);
558 if (r < 0)
Joe Thornber0fa5b172012-07-27 15:08:14 +0100559 goto bad_cleanup_nb_tm;
Joe Thornber9cb66532012-07-27 15:08:10 +0100560
561 r = dm_btree_empty(&pmd->details_info, &pmd->details_root);
562 if (r < 0) {
563 DMERR("couldn't create devices root");
Joe Thornber0fa5b172012-07-27 15:08:14 +0100564 goto bad_cleanup_nb_tm;
Joe Thornber9cb66532012-07-27 15:08:10 +0100565 }
566
567 r = __write_initial_superblock(pmd);
568 if (r)
Joe Thornber0fa5b172012-07-27 15:08:14 +0100569 goto bad_cleanup_nb_tm;
Joe Thornber9cb66532012-07-27 15:08:10 +0100570
Joe Thornber991d9fa2011-10-31 20:21:18 +0000571 return 0;
572
Joe Thornber0fa5b172012-07-27 15:08:14 +0100573bad_cleanup_nb_tm:
574 dm_tm_destroy(pmd->nb_tm);
575bad_cleanup_data_sm:
Joe Thornberd6332812012-07-27 15:08:12 +0100576 dm_sm_destroy(pmd->data_sm);
Joe Thornber0fa5b172012-07-27 15:08:14 +0100577bad_cleanup_tm:
Joe Thornberd6332812012-07-27 15:08:12 +0100578 dm_tm_destroy(pmd->tm);
579 dm_sm_destroy(pmd->metadata_sm);
Joe Thornber991d9fa2011-10-31 20:21:18 +0000580
581 return r;
582}
583
Mike Snitzerd73ec522012-07-27 15:08:13 +0100584static int __check_incompat_features(struct thin_disk_superblock *disk_super,
585 struct dm_pool_metadata *pmd)
586{
587 uint32_t features;
588
589 features = le32_to_cpu(disk_super->incompat_flags) & ~THIN_FEATURE_INCOMPAT_SUPP;
590 if (features) {
591 DMERR("could not access metadata due to unsupported optional features (%lx).",
592 (unsigned long)features);
593 return -EINVAL;
594 }
595
596 /*
597 * Check for read-only metadata to skip the following RDWR checks.
598 */
599 if (get_disk_ro(pmd->bdev->bd_disk))
600 return 0;
601
602 features = le32_to_cpu(disk_super->compat_ro_flags) & ~THIN_FEATURE_COMPAT_RO_SUPP;
603 if (features) {
604 DMERR("could not access metadata RDWR due to unsupported optional features (%lx).",
605 (unsigned long)features);
606 return -EINVAL;
607 }
608
609 return 0;
610}
611
Joe Thornbere4d22052012-07-27 15:08:12 +0100612static int __open_metadata(struct dm_pool_metadata *pmd)
613{
614 int r;
615 struct dm_block *sblock;
616 struct thin_disk_superblock *disk_super;
617
618 r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
619 &sb_validator, &sblock);
620 if (r < 0) {
621 DMERR("couldn't read superblock");
622 return r;
623 }
624
625 disk_super = dm_block_data(sblock);
Mike Snitzerd73ec522012-07-27 15:08:13 +0100626
Mike Snitzer9aec8622014-07-14 16:35:54 -0400627 /* Verify the data block size hasn't changed */
628 if (le32_to_cpu(disk_super->data_block_size) != pmd->data_block_size) {
629 DMERR("changing the data block size (from %u to %llu) is not supported",
630 le32_to_cpu(disk_super->data_block_size),
631 (unsigned long long)pmd->data_block_size);
632 r = -EINVAL;
633 goto bad_unlock_sblock;
634 }
635
Mike Snitzerd73ec522012-07-27 15:08:13 +0100636 r = __check_incompat_features(disk_super, pmd);
Joe Thornber0fa5b172012-07-27 15:08:14 +0100637 if (r < 0)
638 goto bad_unlock_sblock;
Mike Snitzerd73ec522012-07-27 15:08:13 +0100639
Joe Thornbere4d22052012-07-27 15:08:12 +0100640 r = dm_tm_open_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION,
641 disk_super->metadata_space_map_root,
642 sizeof(disk_super->metadata_space_map_root),
643 &pmd->tm, &pmd->metadata_sm);
644 if (r < 0) {
645 DMERR("tm_open_with_sm failed");
Joe Thornber0fa5b172012-07-27 15:08:14 +0100646 goto bad_unlock_sblock;
Joe Thornbere4d22052012-07-27 15:08:12 +0100647 }
648
649 pmd->data_sm = dm_sm_disk_open(pmd->tm, disk_super->data_space_map_root,
650 sizeof(disk_super->data_space_map_root));
651 if (IS_ERR(pmd->data_sm)) {
652 DMERR("sm_disk_open failed");
Joe Thornbere4d22052012-07-27 15:08:12 +0100653 r = PTR_ERR(pmd->data_sm);
Joe Thornber0fa5b172012-07-27 15:08:14 +0100654 goto bad_cleanup_tm;
Joe Thornbere4d22052012-07-27 15:08:12 +0100655 }
656
Joe Thornbere4d22052012-07-27 15:08:12 +0100657 pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm);
658 if (!pmd->nb_tm) {
Joe Thornber0fa5b172012-07-27 15:08:14 +0100659 DMERR("could not create non-blocking clone tm");
Joe Thornbere4d22052012-07-27 15:08:12 +0100660 r = -ENOMEM;
Joe Thornber0fa5b172012-07-27 15:08:14 +0100661 goto bad_cleanup_data_sm;
Joe Thornbere4d22052012-07-27 15:08:12 +0100662 }
663
664 __setup_btree_details(pmd);
Mikulas Patocka4c7da062015-10-22 16:46:59 -0400665 dm_bm_unlock(sblock);
666
667 return 0;
Joe Thornbere4d22052012-07-27 15:08:12 +0100668
Joe Thornber0fa5b172012-07-27 15:08:14 +0100669bad_cleanup_data_sm:
Joe Thornbere4d22052012-07-27 15:08:12 +0100670 dm_sm_destroy(pmd->data_sm);
Joe Thornber0fa5b172012-07-27 15:08:14 +0100671bad_cleanup_tm:
Joe Thornbere4d22052012-07-27 15:08:12 +0100672 dm_tm_destroy(pmd->tm);
673 dm_sm_destroy(pmd->metadata_sm);
Joe Thornber0fa5b172012-07-27 15:08:14 +0100674bad_unlock_sblock:
675 dm_bm_unlock(sblock);
Joe Thornbere4d22052012-07-27 15:08:12 +0100676
677 return r;
678}
679
Joe Thornber66b1edc2012-07-27 15:08:14 +0100680static int __open_or_format_metadata(struct dm_pool_metadata *pmd, bool format_device)
Joe Thornbere4d22052012-07-27 15:08:12 +0100681{
Joe Thornber8801e062012-07-27 15:08:13 +0100682 int r, unformatted;
Joe Thornber237074c2012-07-27 15:08:13 +0100683
Joe Thornber8801e062012-07-27 15:08:13 +0100684 r = __superblock_all_zeroes(pmd->bm, &unformatted);
Joe Thornber237074c2012-07-27 15:08:13 +0100685 if (r)
686 return r;
687
Joe Thornber8801e062012-07-27 15:08:13 +0100688 if (unformatted)
Joe Thornber66b1edc2012-07-27 15:08:14 +0100689 return format_device ? __format_metadata(pmd) : -EPERM;
690
691 return __open_metadata(pmd);
Joe Thornbere4d22052012-07-27 15:08:12 +0100692}
693
Joe Thornber66b1edc2012-07-27 15:08:14 +0100694static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, bool format_device)
Joe Thornber332627d2012-07-27 15:08:10 +0100695{
696 int r;
697
Mike Snitzer7d489352014-02-12 23:58:15 -0500698 pmd->bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
Joe Thornber332627d2012-07-27 15:08:10 +0100699 THIN_METADATA_CACHE_SIZE,
700 THIN_MAX_CONCURRENT_LOCKS);
701 if (IS_ERR(pmd->bm)) {
702 DMERR("could not create block manager");
703 return PTR_ERR(pmd->bm);
704 }
705
Joe Thornber66b1edc2012-07-27 15:08:14 +0100706 r = __open_or_format_metadata(pmd, format_device);
Joe Thornber332627d2012-07-27 15:08:10 +0100707 if (r)
708 dm_block_manager_destroy(pmd->bm);
709
710 return r;
711}
712
Joe Thornberf9dd9352012-07-27 15:08:10 +0100713static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd)
714{
715 dm_sm_destroy(pmd->data_sm);
716 dm_sm_destroy(pmd->metadata_sm);
717 dm_tm_destroy(pmd->nb_tm);
718 dm_tm_destroy(pmd->tm);
719 dm_block_manager_destroy(pmd->bm);
720}
721
Joe Thornber991d9fa2011-10-31 20:21:18 +0000722static int __begin_transaction(struct dm_pool_metadata *pmd)
723{
724 int r;
Joe Thornber991d9fa2011-10-31 20:21:18 +0000725 struct thin_disk_superblock *disk_super;
726 struct dm_block *sblock;
727
728 /*
Joe Thornber991d9fa2011-10-31 20:21:18 +0000729 * We re-read the superblock every time. Shouldn't need to do this
730 * really.
731 */
732 r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
733 &sb_validator, &sblock);
734 if (r)
735 return r;
736
737 disk_super = dm_block_data(sblock);
738 pmd->time = le32_to_cpu(disk_super->time);
739 pmd->root = le64_to_cpu(disk_super->data_mapping_root);
740 pmd->details_root = le64_to_cpu(disk_super->device_details_root);
741 pmd->trans_id = le64_to_cpu(disk_super->trans_id);
742 pmd->flags = le32_to_cpu(disk_super->flags);
743 pmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
744
Joe Thornber991d9fa2011-10-31 20:21:18 +0000745 dm_bm_unlock(sblock);
Mike Snitzerd73ec522012-07-27 15:08:13 +0100746 return 0;
Joe Thornber991d9fa2011-10-31 20:21:18 +0000747}
748
749static int __write_changed_details(struct dm_pool_metadata *pmd)
750{
751 int r;
752 struct dm_thin_device *td, *tmp;
753 struct disk_device_details details;
754 uint64_t key;
755
756 list_for_each_entry_safe(td, tmp, &pmd->thin_devices, list) {
757 if (!td->changed)
758 continue;
759
760 key = td->id;
761
762 details.mapped_blocks = cpu_to_le64(td->mapped_blocks);
763 details.transaction_id = cpu_to_le64(td->transaction_id);
764 details.creation_time = cpu_to_le32(td->creation_time);
765 details.snapshotted_time = cpu_to_le32(td->snapshotted_time);
766 __dm_bless_for_disk(&details);
767
768 r = dm_btree_insert(&pmd->details_info, pmd->details_root,
769 &key, &details, &pmd->details_root);
770 if (r)
771 return r;
772
773 if (td->open_count)
774 td->changed = 0;
775 else {
776 list_del(&td->list);
777 kfree(td);
778 }
Joe Thornber991d9fa2011-10-31 20:21:18 +0000779 }
780
781 return 0;
782}
783
784static int __commit_transaction(struct dm_pool_metadata *pmd)
785{
Joe Thornber991d9fa2011-10-31 20:21:18 +0000786 int r;
787 size_t metadata_len, data_len;
788 struct thin_disk_superblock *disk_super;
789 struct dm_block *sblock;
790
791 /*
792 * We need to know if the thin_disk_superblock exceeds a 512-byte sector.
793 */
794 BUILD_BUG_ON(sizeof(struct thin_disk_superblock) > 512);
795
796 r = __write_changed_details(pmd);
797 if (r < 0)
Joe Thornberd973ac12012-07-27 15:07:58 +0100798 return r;
Joe Thornber991d9fa2011-10-31 20:21:18 +0000799
Joe Thornber991d9fa2011-10-31 20:21:18 +0000800 r = dm_sm_commit(pmd->data_sm);
801 if (r < 0)
Joe Thornberd973ac12012-07-27 15:07:58 +0100802 return r;
Joe Thornber991d9fa2011-10-31 20:21:18 +0000803
804 r = dm_tm_pre_commit(pmd->tm);
805 if (r < 0)
Joe Thornberd973ac12012-07-27 15:07:58 +0100806 return r;
Joe Thornber991d9fa2011-10-31 20:21:18 +0000807
808 r = dm_sm_root_size(pmd->metadata_sm, &metadata_len);
809 if (r < 0)
Joe Thornberd973ac12012-07-27 15:07:58 +0100810 return r;
Joe Thornber991d9fa2011-10-31 20:21:18 +0000811
Joe Thornberfef838c2012-03-28 18:41:25 +0100812 r = dm_sm_root_size(pmd->data_sm, &data_len);
Joe Thornber991d9fa2011-10-31 20:21:18 +0000813 if (r < 0)
Joe Thornberd973ac12012-07-27 15:07:58 +0100814 return r;
Joe Thornber991d9fa2011-10-31 20:21:18 +0000815
Joe Thornber5a320832014-03-27 14:13:23 +0000816 r = save_sm_roots(pmd);
817 if (r < 0)
818 return r;
819
Joe Thornber25971192012-07-27 15:08:09 +0100820 r = superblock_lock(pmd, &sblock);
Joe Thornber991d9fa2011-10-31 20:21:18 +0000821 if (r)
Joe Thornberd973ac12012-07-27 15:07:58 +0100822 return r;
Joe Thornber991d9fa2011-10-31 20:21:18 +0000823
824 disk_super = dm_block_data(sblock);
825 disk_super->time = cpu_to_le32(pmd->time);
826 disk_super->data_mapping_root = cpu_to_le64(pmd->root);
827 disk_super->device_details_root = cpu_to_le64(pmd->details_root);
828 disk_super->trans_id = cpu_to_le64(pmd->trans_id);
829 disk_super->flags = cpu_to_le32(pmd->flags);
830
Joe Thornber5a320832014-03-27 14:13:23 +0000831 copy_sm_roots(pmd, disk_super);
Joe Thornber991d9fa2011-10-31 20:21:18 +0000832
Joe Thornbereb04cf62012-07-27 15:08:08 +0100833 return dm_tm_commit(pmd->tm, sblock);
Joe Thornber991d9fa2011-10-31 20:21:18 +0000834}
835
Joe Thornbera9537db2018-09-10 16:50:09 +0100836static void __set_metadata_reserve(struct dm_pool_metadata *pmd)
837{
838 int r;
839 dm_block_t total;
840 dm_block_t max_blocks = 4096; /* 16M */
841
842 r = dm_sm_get_nr_blocks(pmd->metadata_sm, &total);
843 if (r) {
844 DMERR("could not get size of metadata device");
845 pmd->metadata_reserve = max_blocks;
Mike Snitzer28e68912018-09-13 21:16:20 -0400846 } else
847 pmd->metadata_reserve = min(max_blocks, div_u64(total, 10));
Joe Thornbera9537db2018-09-10 16:50:09 +0100848}
849
Joe Thornber991d9fa2011-10-31 20:21:18 +0000850struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
Joe Thornber66b1edc2012-07-27 15:08:14 +0100851 sector_t data_block_size,
852 bool format_device)
Joe Thornber991d9fa2011-10-31 20:21:18 +0000853{
854 int r;
Joe Thornber991d9fa2011-10-31 20:21:18 +0000855 struct dm_pool_metadata *pmd;
Joe Thornber991d9fa2011-10-31 20:21:18 +0000856
857 pmd = kmalloc(sizeof(*pmd), GFP_KERNEL);
858 if (!pmd) {
859 DMERR("could not allocate metadata struct");
860 return ERR_PTR(-ENOMEM);
861 }
862
Joe Thornber6a0ebd32012-07-27 15:08:10 +0100863 init_rwsem(&pmd->root_lock);
864 pmd->time = 0;
865 INIT_LIST_HEAD(&pmd->thin_devices);
Joe Thornberda105ed2012-07-27 15:08:15 +0100866 pmd->fail_io = false;
Joe Thornber991d9fa2011-10-31 20:21:18 +0000867 pmd->bdev = bdev;
Joe Thornber9cb66532012-07-27 15:08:10 +0100868 pmd->data_block_size = data_block_size;
Joe Thornber991d9fa2011-10-31 20:21:18 +0000869
Joe Thornber66b1edc2012-07-27 15:08:14 +0100870 r = __create_persistent_data_objects(pmd, format_device);
Joe Thornber332627d2012-07-27 15:08:10 +0100871 if (r) {
872 kfree(pmd);
873 return ERR_PTR(r);
874 }
875
Joe Thornber270938b2012-07-27 15:08:11 +0100876 r = __begin_transaction(pmd);
877 if (r < 0) {
878 if (dm_pool_metadata_close(pmd) < 0)
879 DMWARN("%s: dm_pool_metadata_close() failed.", __func__);
880 return ERR_PTR(r);
Joe Thornber991d9fa2011-10-31 20:21:18 +0000881 }
882
Joe Thornbera9537db2018-09-10 16:50:09 +0100883 __set_metadata_reserve(pmd);
884
Joe Thornber991d9fa2011-10-31 20:21:18 +0000885 return pmd;
Joe Thornber991d9fa2011-10-31 20:21:18 +0000886}
887
888int dm_pool_metadata_close(struct dm_pool_metadata *pmd)
889{
890 int r;
891 unsigned open_devices = 0;
892 struct dm_thin_device *td, *tmp;
893
894 down_read(&pmd->root_lock);
895 list_for_each_entry_safe(td, tmp, &pmd->thin_devices, list) {
896 if (td->open_count)
897 open_devices++;
898 else {
899 list_del(&td->list);
900 kfree(td);
901 }
902 }
903 up_read(&pmd->root_lock);
904
905 if (open_devices) {
906 DMERR("attempt to close pmd when %u device(s) are still open",
907 open_devices);
908 return -EBUSY;
909 }
910
Mike Snitzer49f154c2015-04-23 15:06:27 -0400911 if (!dm_bm_is_read_only(pmd->bm) && !pmd->fail_io) {
Joe Thornber12ba58a2012-07-27 15:08:15 +0100912 r = __commit_transaction(pmd);
913 if (r < 0)
914 DMWARN("%s: __commit_transaction() failed, error = %d",
915 __func__, r);
916 }
Joe Thornber991d9fa2011-10-31 20:21:18 +0000917
Joe Thornberda105ed2012-07-27 15:08:15 +0100918 if (!pmd->fail_io)
919 __destroy_persistent_data_objects(pmd);
Joe Thornber991d9fa2011-10-31 20:21:18 +0000920
Joe Thornberda105ed2012-07-27 15:08:15 +0100921 kfree(pmd);
Joe Thornber991d9fa2011-10-31 20:21:18 +0000922 return 0;
923}
924
Mike Snitzer1f3db252012-03-07 19:09:41 +0000925/*
926 * __open_device: Returns @td corresponding to device with id @dev,
927 * creating it if @create is set and incrementing @td->open_count.
928 * On failure, @td is undefined.
929 */
Joe Thornber991d9fa2011-10-31 20:21:18 +0000930static int __open_device(struct dm_pool_metadata *pmd,
931 dm_thin_id dev, int create,
932 struct dm_thin_device **td)
933{
934 int r, changed = 0;
935 struct dm_thin_device *td2;
936 uint64_t key = dev;
937 struct disk_device_details details_le;
938
939 /*
Mike Snitzer1f3db252012-03-07 19:09:41 +0000940 * If the device is already open, return it.
Joe Thornber991d9fa2011-10-31 20:21:18 +0000941 */
942 list_for_each_entry(td2, &pmd->thin_devices, list)
943 if (td2->id == dev) {
Mike Snitzer1f3db252012-03-07 19:09:41 +0000944 /*
945 * May not create an already-open device.
946 */
947 if (create)
948 return -EEXIST;
949
Joe Thornber991d9fa2011-10-31 20:21:18 +0000950 td2->open_count++;
951 *td = td2;
952 return 0;
953 }
954
955 /*
956 * Check the device exists.
957 */
958 r = dm_btree_lookup(&pmd->details_info, pmd->details_root,
959 &key, &details_le);
960 if (r) {
961 if (r != -ENODATA || !create)
962 return r;
963
Mike Snitzer1f3db252012-03-07 19:09:41 +0000964 /*
965 * Create new device.
966 */
Joe Thornber991d9fa2011-10-31 20:21:18 +0000967 changed = 1;
968 details_le.mapped_blocks = 0;
969 details_le.transaction_id = cpu_to_le64(pmd->trans_id);
970 details_le.creation_time = cpu_to_le32(pmd->time);
971 details_le.snapshotted_time = cpu_to_le32(pmd->time);
972 }
973
974 *td = kmalloc(sizeof(**td), GFP_NOIO);
975 if (!*td)
976 return -ENOMEM;
977
978 (*td)->pmd = pmd;
979 (*td)->id = dev;
980 (*td)->open_count = 1;
981 (*td)->changed = changed;
Joe Thornberda105ed2012-07-27 15:08:15 +0100982 (*td)->aborted_with_changes = false;
Joe Thornber991d9fa2011-10-31 20:21:18 +0000983 (*td)->mapped_blocks = le64_to_cpu(details_le.mapped_blocks);
984 (*td)->transaction_id = le64_to_cpu(details_le.transaction_id);
985 (*td)->creation_time = le32_to_cpu(details_le.creation_time);
986 (*td)->snapshotted_time = le32_to_cpu(details_le.snapshotted_time);
987
988 list_add(&(*td)->list, &pmd->thin_devices);
989
990 return 0;
991}
992
993static void __close_device(struct dm_thin_device *td)
994{
995 --td->open_count;
996}
997
998static int __create_thin(struct dm_pool_metadata *pmd,
999 dm_thin_id dev)
1000{
1001 int r;
1002 dm_block_t dev_root;
1003 uint64_t key = dev;
1004 struct disk_device_details details_le;
1005 struct dm_thin_device *td;
1006 __le64 value;
1007
1008 r = dm_btree_lookup(&pmd->details_info, pmd->details_root,
1009 &key, &details_le);
1010 if (!r)
1011 return -EEXIST;
1012
1013 /*
1014 * Create an empty btree for the mappings.
1015 */
1016 r = dm_btree_empty(&pmd->bl_info, &dev_root);
1017 if (r)
1018 return r;
1019
1020 /*
1021 * Insert it into the main mapping tree.
1022 */
1023 value = cpu_to_le64(dev_root);
1024 __dm_bless_for_disk(&value);
1025 r = dm_btree_insert(&pmd->tl_info, pmd->root, &key, &value, &pmd->root);
1026 if (r) {
1027 dm_btree_del(&pmd->bl_info, dev_root);
1028 return r;
1029 }
1030
1031 r = __open_device(pmd, dev, 1, &td);
1032 if (r) {
Joe Thornber991d9fa2011-10-31 20:21:18 +00001033 dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root);
1034 dm_btree_del(&pmd->bl_info, dev_root);
1035 return r;
1036 }
Joe Thornber991d9fa2011-10-31 20:21:18 +00001037 __close_device(td);
1038
1039 return r;
1040}
1041
1042int dm_pool_create_thin(struct dm_pool_metadata *pmd, dm_thin_id dev)
1043{
Joe Thornberda105ed2012-07-27 15:08:15 +01001044 int r = -EINVAL;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001045
1046 down_write(&pmd->root_lock);
Joe Thornberda105ed2012-07-27 15:08:15 +01001047 if (!pmd->fail_io)
1048 r = __create_thin(pmd, dev);
Joe Thornber991d9fa2011-10-31 20:21:18 +00001049 up_write(&pmd->root_lock);
1050
1051 return r;
1052}
1053
1054static int __set_snapshot_details(struct dm_pool_metadata *pmd,
1055 struct dm_thin_device *snap,
1056 dm_thin_id origin, uint32_t time)
1057{
1058 int r;
1059 struct dm_thin_device *td;
1060
1061 r = __open_device(pmd, origin, 0, &td);
1062 if (r)
1063 return r;
1064
1065 td->changed = 1;
1066 td->snapshotted_time = time;
1067
1068 snap->mapped_blocks = td->mapped_blocks;
1069 snap->snapshotted_time = time;
1070 __close_device(td);
1071
1072 return 0;
1073}
1074
1075static int __create_snap(struct dm_pool_metadata *pmd,
1076 dm_thin_id dev, dm_thin_id origin)
1077{
1078 int r;
1079 dm_block_t origin_root;
1080 uint64_t key = origin, dev_key = dev;
1081 struct dm_thin_device *td;
1082 struct disk_device_details details_le;
1083 __le64 value;
1084
1085 /* check this device is unused */
1086 r = dm_btree_lookup(&pmd->details_info, pmd->details_root,
1087 &dev_key, &details_le);
1088 if (!r)
1089 return -EEXIST;
1090
1091 /* find the mapping tree for the origin */
1092 r = dm_btree_lookup(&pmd->tl_info, pmd->root, &key, &value);
1093 if (r)
1094 return r;
1095 origin_root = le64_to_cpu(value);
1096
1097 /* clone the origin, an inc will do */
1098 dm_tm_inc(pmd->tm, origin_root);
1099
1100 /* insert into the main mapping tree */
1101 value = cpu_to_le64(origin_root);
1102 __dm_bless_for_disk(&value);
1103 key = dev;
1104 r = dm_btree_insert(&pmd->tl_info, pmd->root, &key, &value, &pmd->root);
1105 if (r) {
1106 dm_tm_dec(pmd->tm, origin_root);
1107 return r;
1108 }
1109
1110 pmd->time++;
1111
1112 r = __open_device(pmd, dev, 1, &td);
1113 if (r)
1114 goto bad;
1115
1116 r = __set_snapshot_details(pmd, td, origin, pmd->time);
Mike Snitzer1f3db252012-03-07 19:09:41 +00001117 __close_device(td);
1118
Joe Thornber991d9fa2011-10-31 20:21:18 +00001119 if (r)
1120 goto bad;
1121
Joe Thornber991d9fa2011-10-31 20:21:18 +00001122 return 0;
1123
1124bad:
Joe Thornber991d9fa2011-10-31 20:21:18 +00001125 dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root);
1126 dm_btree_remove(&pmd->details_info, pmd->details_root,
1127 &key, &pmd->details_root);
1128 return r;
1129}
1130
1131int dm_pool_create_snap(struct dm_pool_metadata *pmd,
1132 dm_thin_id dev,
1133 dm_thin_id origin)
1134{
Joe Thornberda105ed2012-07-27 15:08:15 +01001135 int r = -EINVAL;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001136
1137 down_write(&pmd->root_lock);
Joe Thornberda105ed2012-07-27 15:08:15 +01001138 if (!pmd->fail_io)
1139 r = __create_snap(pmd, dev, origin);
Joe Thornber991d9fa2011-10-31 20:21:18 +00001140 up_write(&pmd->root_lock);
1141
1142 return r;
1143}
1144
1145static int __delete_device(struct dm_pool_metadata *pmd, dm_thin_id dev)
1146{
1147 int r;
1148 uint64_t key = dev;
1149 struct dm_thin_device *td;
1150
1151 /* TODO: failure should mark the transaction invalid */
1152 r = __open_device(pmd, dev, 0, &td);
1153 if (r)
1154 return r;
1155
1156 if (td->open_count > 1) {
1157 __close_device(td);
1158 return -EBUSY;
1159 }
1160
1161 list_del(&td->list);
1162 kfree(td);
1163 r = dm_btree_remove(&pmd->details_info, pmd->details_root,
1164 &key, &pmd->details_root);
1165 if (r)
1166 return r;
1167
1168 r = dm_btree_remove(&pmd->tl_info, pmd->root, &key, &pmd->root);
1169 if (r)
1170 return r;
1171
Joe Thornber991d9fa2011-10-31 20:21:18 +00001172 return 0;
1173}
1174
1175int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd,
1176 dm_thin_id dev)
1177{
Joe Thornberda105ed2012-07-27 15:08:15 +01001178 int r = -EINVAL;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001179
1180 down_write(&pmd->root_lock);
Joe Thornberda105ed2012-07-27 15:08:15 +01001181 if (!pmd->fail_io)
1182 r = __delete_device(pmd, dev);
Joe Thornber991d9fa2011-10-31 20:21:18 +00001183 up_write(&pmd->root_lock);
1184
1185 return r;
1186}
1187
1188int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd,
1189 uint64_t current_id,
1190 uint64_t new_id)
1191{
Joe Thornberda105ed2012-07-27 15:08:15 +01001192 int r = -EINVAL;
1193
Joe Thornber991d9fa2011-10-31 20:21:18 +00001194 down_write(&pmd->root_lock);
Joe Thornberda105ed2012-07-27 15:08:15 +01001195
1196 if (pmd->fail_io)
1197 goto out;
1198
Joe Thornber991d9fa2011-10-31 20:21:18 +00001199 if (pmd->trans_id != current_id) {
Joe Thornber991d9fa2011-10-31 20:21:18 +00001200 DMERR("mismatched transaction id");
Joe Thornberda105ed2012-07-27 15:08:15 +01001201 goto out;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001202 }
1203
1204 pmd->trans_id = new_id;
Joe Thornberda105ed2012-07-27 15:08:15 +01001205 r = 0;
1206
1207out:
Joe Thornber991d9fa2011-10-31 20:21:18 +00001208 up_write(&pmd->root_lock);
1209
Joe Thornberda105ed2012-07-27 15:08:15 +01001210 return r;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001211}
1212
1213int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd,
1214 uint64_t *result)
1215{
Joe Thornberda105ed2012-07-27 15:08:15 +01001216 int r = -EINVAL;
1217
Joe Thornber991d9fa2011-10-31 20:21:18 +00001218 down_read(&pmd->root_lock);
Joe Thornberda105ed2012-07-27 15:08:15 +01001219 if (!pmd->fail_io) {
1220 *result = pmd->trans_id;
1221 r = 0;
1222 }
Joe Thornber991d9fa2011-10-31 20:21:18 +00001223 up_read(&pmd->root_lock);
1224
Joe Thornberda105ed2012-07-27 15:08:15 +01001225 return r;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001226}
1227
Joe Thornbercc8394d2012-06-03 00:30:01 +01001228static int __reserve_metadata_snap(struct dm_pool_metadata *pmd)
1229{
1230 int r, inc;
1231 struct thin_disk_superblock *disk_super;
1232 struct dm_block *copy, *sblock;
1233 dm_block_t held_root;
1234
1235 /*
Joe Thornber49e99fc2015-12-09 16:23:24 +00001236 * We commit to ensure the btree roots which we increment in a
1237 * moment are up to date.
1238 */
1239 __commit_transaction(pmd);
1240
1241 /*
Joe Thornbercc8394d2012-06-03 00:30:01 +01001242 * Copy the superblock.
1243 */
1244 dm_sm_inc_block(pmd->metadata_sm, THIN_SUPERBLOCK_LOCATION);
1245 r = dm_tm_shadow_block(pmd->tm, THIN_SUPERBLOCK_LOCATION,
1246 &sb_validator, &copy, &inc);
1247 if (r)
1248 return r;
1249
1250 BUG_ON(!inc);
1251
1252 held_root = dm_block_location(copy);
1253 disk_super = dm_block_data(copy);
1254
1255 if (le64_to_cpu(disk_super->held_root)) {
1256 DMWARN("Pool metadata snapshot already exists: release this before taking another.");
1257
1258 dm_tm_dec(pmd->tm, held_root);
1259 dm_tm_unlock(pmd->tm, copy);
Joe Thornbercc8394d2012-06-03 00:30:01 +01001260 return -EBUSY;
1261 }
1262
1263 /*
1264 * Wipe the spacemap since we're not publishing this.
1265 */
1266 memset(&disk_super->data_space_map_root, 0,
1267 sizeof(disk_super->data_space_map_root));
1268 memset(&disk_super->metadata_space_map_root, 0,
1269 sizeof(disk_super->metadata_space_map_root));
1270
1271 /*
1272 * Increment the data structures that need to be preserved.
1273 */
1274 dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->data_mapping_root));
1275 dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->device_details_root));
1276 dm_tm_unlock(pmd->tm, copy);
1277
1278 /*
1279 * Write the held root into the superblock.
1280 */
Joe Thornber25971192012-07-27 15:08:09 +01001281 r = superblock_lock(pmd, &sblock);
Joe Thornbercc8394d2012-06-03 00:30:01 +01001282 if (r) {
1283 dm_tm_dec(pmd->tm, held_root);
Joe Thornbercc8394d2012-06-03 00:30:01 +01001284 return r;
1285 }
1286
1287 disk_super = dm_block_data(sblock);
1288 disk_super->held_root = cpu_to_le64(held_root);
1289 dm_bm_unlock(sblock);
Joe Thornbercc8394d2012-06-03 00:30:01 +01001290 return 0;
1291}
1292
1293int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd)
1294{
Joe Thornberda105ed2012-07-27 15:08:15 +01001295 int r = -EINVAL;
Joe Thornbercc8394d2012-06-03 00:30:01 +01001296
1297 down_write(&pmd->root_lock);
Joe Thornberda105ed2012-07-27 15:08:15 +01001298 if (!pmd->fail_io)
1299 r = __reserve_metadata_snap(pmd);
Joe Thornbercc8394d2012-06-03 00:30:01 +01001300 up_write(&pmd->root_lock);
1301
1302 return r;
1303}
1304
1305static int __release_metadata_snap(struct dm_pool_metadata *pmd)
1306{
1307 int r;
1308 struct thin_disk_superblock *disk_super;
1309 struct dm_block *sblock, *copy;
1310 dm_block_t held_root;
1311
Joe Thornber25971192012-07-27 15:08:09 +01001312 r = superblock_lock(pmd, &sblock);
Joe Thornbercc8394d2012-06-03 00:30:01 +01001313 if (r)
1314 return r;
1315
1316 disk_super = dm_block_data(sblock);
1317 held_root = le64_to_cpu(disk_super->held_root);
1318 disk_super->held_root = cpu_to_le64(0);
Joe Thornbercc8394d2012-06-03 00:30:01 +01001319
1320 dm_bm_unlock(sblock);
1321
1322 if (!held_root) {
1323 DMWARN("No pool metadata snapshot found: nothing to release.");
1324 return -EINVAL;
1325 }
1326
1327 r = dm_tm_read_lock(pmd->tm, held_root, &sb_validator, &copy);
1328 if (r)
1329 return r;
1330
1331 disk_super = dm_block_data(copy);
Joe Thornber7f518ad2015-08-12 15:10:21 +01001332 dm_btree_del(&pmd->info, le64_to_cpu(disk_super->data_mapping_root));
1333 dm_btree_del(&pmd->details_info, le64_to_cpu(disk_super->device_details_root));
Joe Thornbercc8394d2012-06-03 00:30:01 +01001334 dm_sm_dec_block(pmd->metadata_sm, held_root);
1335
Mikulas Patocka4c7da062015-10-22 16:46:59 -04001336 dm_tm_unlock(pmd->tm, copy);
1337
1338 return 0;
Joe Thornbercc8394d2012-06-03 00:30:01 +01001339}
1340
1341int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd)
1342{
Joe Thornberda105ed2012-07-27 15:08:15 +01001343 int r = -EINVAL;
Joe Thornbercc8394d2012-06-03 00:30:01 +01001344
1345 down_write(&pmd->root_lock);
Joe Thornberda105ed2012-07-27 15:08:15 +01001346 if (!pmd->fail_io)
1347 r = __release_metadata_snap(pmd);
Joe Thornbercc8394d2012-06-03 00:30:01 +01001348 up_write(&pmd->root_lock);
1349
1350 return r;
1351}
1352
1353static int __get_metadata_snap(struct dm_pool_metadata *pmd,
1354 dm_block_t *result)
Joe Thornber991d9fa2011-10-31 20:21:18 +00001355{
1356 int r;
1357 struct thin_disk_superblock *disk_super;
1358 struct dm_block *sblock;
1359
Joe Thornbercc8394d2012-06-03 00:30:01 +01001360 r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
1361 &sb_validator, &sblock);
Joe Thornber991d9fa2011-10-31 20:21:18 +00001362 if (r)
1363 return r;
1364
1365 disk_super = dm_block_data(sblock);
1366 *result = le64_to_cpu(disk_super->held_root);
1367
Mikulas Patocka4c7da062015-10-22 16:46:59 -04001368 dm_bm_unlock(sblock);
1369
1370 return 0;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001371}
1372
Joe Thornbercc8394d2012-06-03 00:30:01 +01001373int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd,
1374 dm_block_t *result)
Joe Thornber991d9fa2011-10-31 20:21:18 +00001375{
Joe Thornberda105ed2012-07-27 15:08:15 +01001376 int r = -EINVAL;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001377
1378 down_read(&pmd->root_lock);
Joe Thornberda105ed2012-07-27 15:08:15 +01001379 if (!pmd->fail_io)
1380 r = __get_metadata_snap(pmd, result);
Joe Thornber991d9fa2011-10-31 20:21:18 +00001381 up_read(&pmd->root_lock);
1382
1383 return r;
1384}
1385
1386int dm_pool_open_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev,
1387 struct dm_thin_device **td)
1388{
Joe Thornberda105ed2012-07-27 15:08:15 +01001389 int r = -EINVAL;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001390
1391 down_write(&pmd->root_lock);
Joe Thornberda105ed2012-07-27 15:08:15 +01001392 if (!pmd->fail_io)
1393 r = __open_device(pmd, dev, 0, td);
Joe Thornber991d9fa2011-10-31 20:21:18 +00001394 up_write(&pmd->root_lock);
1395
1396 return r;
1397}
1398
1399int dm_pool_close_thin_device(struct dm_thin_device *td)
1400{
1401 down_write(&td->pmd->root_lock);
1402 __close_device(td);
1403 up_write(&td->pmd->root_lock);
1404
1405 return 0;
1406}
1407
1408dm_thin_id dm_thin_dev_id(struct dm_thin_device *td)
1409{
1410 return td->id;
1411}
1412
Joe Thornber19fa1a62013-12-17 12:09:40 -05001413/*
1414 * Check whether @time (of block creation) is older than @td's last snapshot.
1415 * If so then the associated block is shared with the last snapshot device.
1416 * Any block on a device created *after* the device last got snapshotted is
1417 * necessarily not shared.
1418 */
Mike Snitzer17b7d632012-07-27 15:07:57 +01001419static bool __snapshotted_since(struct dm_thin_device *td, uint32_t time)
Joe Thornber991d9fa2011-10-31 20:21:18 +00001420{
1421 return td->snapshotted_time > time;
1422}
1423
Joe Thornber3d5f6732015-11-05 15:10:11 +00001424static void unpack_lookup_result(struct dm_thin_device *td, __le64 value,
1425 struct dm_thin_lookup_result *result)
1426{
1427 uint64_t block_time = 0;
1428 dm_block_t exception_block;
1429 uint32_t exception_time;
1430
1431 block_time = le64_to_cpu(value);
1432 unpack_block_time(block_time, &exception_block, &exception_time);
1433 result->block = exception_block;
1434 result->shared = __snapshotted_since(td, exception_time);
1435}
1436
Joe Thornber086fbbb2015-12-07 14:48:04 +00001437static int __find_block(struct dm_thin_device *td, dm_block_t block,
1438 int can_issue_io, struct dm_thin_lookup_result *result)
Joe Thornber991d9fa2011-10-31 20:21:18 +00001439{
Joe Thornbere5cfc692014-10-06 15:24:55 +01001440 int r;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001441 __le64 value;
1442 struct dm_pool_metadata *pmd = td->pmd;
1443 dm_block_t keys[2] = { td->id, block };
Joe Thornberda105ed2012-07-27 15:08:15 +01001444 struct dm_btree_info *info;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001445
Joe Thornbere5cfc692014-10-06 15:24:55 +01001446 if (can_issue_io) {
1447 info = &pmd->info;
1448 } else
1449 info = &pmd->nb_info;
Joe Thornberda105ed2012-07-27 15:08:15 +01001450
1451 r = dm_btree_lookup(info, pmd->root, keys, &value);
Joe Thornber3d5f6732015-11-05 15:10:11 +00001452 if (!r)
1453 unpack_lookup_result(td, value, result);
Joe Thornber991d9fa2011-10-31 20:21:18 +00001454
Joe Thornber086fbbb2015-12-07 14:48:04 +00001455 return r;
1456}
1457
1458int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block,
1459 int can_issue_io, struct dm_thin_lookup_result *result)
1460{
1461 int r;
1462 struct dm_pool_metadata *pmd = td->pmd;
1463
1464 down_read(&pmd->root_lock);
1465 if (pmd->fail_io) {
1466 up_read(&pmd->root_lock);
1467 return -EINVAL;
1468 }
1469
1470 r = __find_block(td, block, can_issue_io, result);
1471
Joe Thornbere5cfc692014-10-06 15:24:55 +01001472 up_read(&pmd->root_lock);
Joe Thornber991d9fa2011-10-31 20:21:18 +00001473 return r;
1474}
1475
Joe Thornber086fbbb2015-12-07 14:48:04 +00001476static int __find_next_mapped_block(struct dm_thin_device *td, dm_block_t block,
Joe Thornber3d5f6732015-11-05 15:10:11 +00001477 dm_block_t *vblock,
1478 struct dm_thin_lookup_result *result)
1479{
1480 int r;
1481 __le64 value;
1482 struct dm_pool_metadata *pmd = td->pmd;
1483 dm_block_t keys[2] = { td->id, block };
1484
Joe Thornber3d5f6732015-11-05 15:10:11 +00001485 r = dm_btree_lookup_next(&pmd->info, pmd->root, keys, vblock, &value);
1486 if (!r)
1487 unpack_lookup_result(td, value, result);
1488
Joe Thornber3d5f6732015-11-05 15:10:11 +00001489 return r;
1490}
1491
Joe Thornber086fbbb2015-12-07 14:48:04 +00001492static int __find_mapped_range(struct dm_thin_device *td,
1493 dm_block_t begin, dm_block_t end,
1494 dm_block_t *thin_begin, dm_block_t *thin_end,
1495 dm_block_t *pool_begin, bool *maybe_shared)
Joe Thornbera5d895a2015-04-16 12:47:21 +01001496{
1497 int r;
1498 dm_block_t pool_end;
1499 struct dm_thin_lookup_result lookup;
1500
1501 if (end < begin)
1502 return -ENODATA;
1503
Joe Thornber086fbbb2015-12-07 14:48:04 +00001504 r = __find_next_mapped_block(td, begin, &begin, &lookup);
Joe Thornber3d5f6732015-11-05 15:10:11 +00001505 if (r)
1506 return r;
Joe Thornbera5d895a2015-04-16 12:47:21 +01001507
Joe Thornber3d5f6732015-11-05 15:10:11 +00001508 if (begin >= end)
Joe Thornbera5d895a2015-04-16 12:47:21 +01001509 return -ENODATA;
1510
1511 *thin_begin = begin;
1512 *pool_begin = lookup.block;
1513 *maybe_shared = lookup.shared;
1514
1515 begin++;
1516 pool_end = *pool_begin + 1;
1517 while (begin != end) {
Joe Thornber086fbbb2015-12-07 14:48:04 +00001518 r = __find_block(td, begin, true, &lookup);
Joe Thornbera5d895a2015-04-16 12:47:21 +01001519 if (r) {
1520 if (r == -ENODATA)
1521 break;
1522 else
1523 return r;
1524 }
1525
1526 if ((lookup.block != pool_end) ||
1527 (lookup.shared != *maybe_shared))
1528 break;
1529
1530 pool_end++;
1531 begin++;
1532 }
1533
1534 *thin_end = begin;
1535 return 0;
1536}
1537
Joe Thornber086fbbb2015-12-07 14:48:04 +00001538int dm_thin_find_mapped_range(struct dm_thin_device *td,
1539 dm_block_t begin, dm_block_t end,
1540 dm_block_t *thin_begin, dm_block_t *thin_end,
1541 dm_block_t *pool_begin, bool *maybe_shared)
1542{
1543 int r = -EINVAL;
1544 struct dm_pool_metadata *pmd = td->pmd;
1545
1546 down_read(&pmd->root_lock);
1547 if (!pmd->fail_io) {
1548 r = __find_mapped_range(td, begin, end, thin_begin, thin_end,
1549 pool_begin, maybe_shared);
1550 }
1551 up_read(&pmd->root_lock);
1552
1553 return r;
1554}
1555
Joe Thornber991d9fa2011-10-31 20:21:18 +00001556static int __insert(struct dm_thin_device *td, dm_block_t block,
1557 dm_block_t data_block)
1558{
1559 int r, inserted;
1560 __le64 value;
1561 struct dm_pool_metadata *pmd = td->pmd;
1562 dm_block_t keys[2] = { td->id, block };
1563
Joe Thornber991d9fa2011-10-31 20:21:18 +00001564 value = cpu_to_le64(pack_block_time(data_block, pmd->time));
1565 __dm_bless_for_disk(&value);
1566
1567 r = dm_btree_insert_notify(&pmd->info, pmd->root, keys, &value,
1568 &pmd->root, &inserted);
1569 if (r)
1570 return r;
1571
Joe Thornber40db5a52012-07-27 15:08:14 +01001572 td->changed = 1;
1573 if (inserted)
Joe Thornber991d9fa2011-10-31 20:21:18 +00001574 td->mapped_blocks++;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001575
1576 return 0;
1577}
1578
1579int dm_thin_insert_block(struct dm_thin_device *td, dm_block_t block,
1580 dm_block_t data_block)
1581{
Joe Thornberda105ed2012-07-27 15:08:15 +01001582 int r = -EINVAL;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001583
1584 down_write(&td->pmd->root_lock);
Joe Thornberda105ed2012-07-27 15:08:15 +01001585 if (!td->pmd->fail_io)
1586 r = __insert(td, block, data_block);
Joe Thornber991d9fa2011-10-31 20:21:18 +00001587 up_write(&td->pmd->root_lock);
1588
1589 return r;
1590}
1591
1592static int __remove(struct dm_thin_device *td, dm_block_t block)
1593{
1594 int r;
1595 struct dm_pool_metadata *pmd = td->pmd;
1596 dm_block_t keys[2] = { td->id, block };
1597
1598 r = dm_btree_remove(&pmd->info, pmd->root, keys, &pmd->root);
1599 if (r)
1600 return r;
1601
Joe Thornberaf63bcb2012-03-07 19:09:44 +00001602 td->mapped_blocks--;
1603 td->changed = 1;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001604
1605 return 0;
1606}
1607
Joe Thornber6550f072015-04-13 09:45:25 +01001608static int __remove_range(struct dm_thin_device *td, dm_block_t begin, dm_block_t end)
1609{
1610 int r;
Joe Thornber993ceab2015-12-02 12:24:39 +00001611 unsigned count, total_count = 0;
Joe Thornber6550f072015-04-13 09:45:25 +01001612 struct dm_pool_metadata *pmd = td->pmd;
1613 dm_block_t keys[1] = { td->id };
1614 __le64 value;
1615 dm_block_t mapping_root;
1616
1617 /*
1618 * Find the mapping tree
1619 */
1620 r = dm_btree_lookup(&pmd->tl_info, pmd->root, keys, &value);
1621 if (r)
1622 return r;
1623
1624 /*
1625 * Remove from the mapping tree, taking care to inc the
1626 * ref count so it doesn't get deleted.
1627 */
1628 mapping_root = le64_to_cpu(value);
1629 dm_tm_inc(pmd->tm, mapping_root);
1630 r = dm_btree_remove(&pmd->tl_info, pmd->root, keys, &pmd->root);
1631 if (r)
1632 return r;
1633
Joe Thornber993ceab2015-12-02 12:24:39 +00001634 /*
1635 * Remove leaves stops at the first unmapped entry, so we have to
1636 * loop round finding mapped ranges.
1637 */
1638 while (begin < end) {
1639 r = dm_btree_lookup_next(&pmd->bl_info, mapping_root, &begin, &begin, &value);
1640 if (r == -ENODATA)
1641 break;
Joe Thornber6550f072015-04-13 09:45:25 +01001642
Joe Thornber993ceab2015-12-02 12:24:39 +00001643 if (r)
1644 return r;
1645
1646 if (begin >= end)
1647 break;
1648
1649 r = dm_btree_remove_leaves(&pmd->bl_info, mapping_root, &begin, end, &mapping_root, &count);
1650 if (r)
1651 return r;
1652
1653 total_count += count;
1654 }
1655
1656 td->mapped_blocks -= total_count;
Joe Thornber6550f072015-04-13 09:45:25 +01001657 td->changed = 1;
1658
1659 /*
1660 * Reinsert the mapping tree.
1661 */
1662 value = cpu_to_le64(mapping_root);
1663 __dm_bless_for_disk(&value);
1664 return dm_btree_insert(&pmd->tl_info, pmd->root, keys, &value, &pmd->root);
1665}
1666
Joe Thornber991d9fa2011-10-31 20:21:18 +00001667int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block)
1668{
Joe Thornberda105ed2012-07-27 15:08:15 +01001669 int r = -EINVAL;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001670
1671 down_write(&td->pmd->root_lock);
Joe Thornberda105ed2012-07-27 15:08:15 +01001672 if (!td->pmd->fail_io)
1673 r = __remove(td, block);
Joe Thornber991d9fa2011-10-31 20:21:18 +00001674 up_write(&td->pmd->root_lock);
1675
1676 return r;
1677}
1678
Joe Thornber6550f072015-04-13 09:45:25 +01001679int dm_thin_remove_range(struct dm_thin_device *td,
1680 dm_block_t begin, dm_block_t end)
1681{
1682 int r = -EINVAL;
1683
1684 down_write(&td->pmd->root_lock);
1685 if (!td->pmd->fail_io)
1686 r = __remove_range(td, begin, end);
1687 up_write(&td->pmd->root_lock);
1688
1689 return r;
1690}
1691
Joe Thornber5675a522019-01-15 13:27:01 -05001692int dm_pool_block_is_shared(struct dm_pool_metadata *pmd, dm_block_t b, bool *result)
Joe Thornber19fa1a62013-12-17 12:09:40 -05001693{
1694 int r;
1695 uint32_t ref_count;
1696
1697 down_read(&pmd->root_lock);
1698 r = dm_sm_get_count(pmd->data_sm, b, &ref_count);
1699 if (!r)
Joe Thornber5675a522019-01-15 13:27:01 -05001700 *result = (ref_count > 1);
Joe Thornber19fa1a62013-12-17 12:09:40 -05001701 up_read(&pmd->root_lock);
1702
1703 return r;
1704}
1705
Joe Thornber2a0fbff2016-07-01 14:00:02 +01001706int dm_pool_inc_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e)
1707{
1708 int r = 0;
1709
1710 down_write(&pmd->root_lock);
1711 for (; b != e; b++) {
1712 r = dm_sm_inc_block(pmd->data_sm, b);
1713 if (r)
1714 break;
1715 }
1716 up_write(&pmd->root_lock);
1717
1718 return r;
1719}
1720
1721int dm_pool_dec_data_range(struct dm_pool_metadata *pmd, dm_block_t b, dm_block_t e)
1722{
1723 int r = 0;
1724
1725 down_write(&pmd->root_lock);
1726 for (; b != e; b++) {
1727 r = dm_sm_dec_block(pmd->data_sm, b);
1728 if (r)
1729 break;
1730 }
1731 up_write(&pmd->root_lock);
1732
1733 return r;
1734}
1735
Joe Thornber40db5a52012-07-27 15:08:14 +01001736bool dm_thin_changed_this_transaction(struct dm_thin_device *td)
1737{
1738 int r;
1739
1740 down_read(&td->pmd->root_lock);
1741 r = td->changed;
1742 up_read(&td->pmd->root_lock);
1743
1744 return r;
1745}
1746
Mike Snitzer4d1662a2014-02-06 06:08:56 -05001747bool dm_pool_changed_this_transaction(struct dm_pool_metadata *pmd)
1748{
1749 bool r = false;
1750 struct dm_thin_device *td, *tmp;
1751
1752 down_read(&pmd->root_lock);
1753 list_for_each_entry_safe(td, tmp, &pmd->thin_devices, list) {
1754 if (td->changed) {
1755 r = td->changed;
1756 break;
1757 }
1758 }
1759 up_read(&pmd->root_lock);
1760
1761 return r;
1762}
1763
Joe Thornberda105ed2012-07-27 15:08:15 +01001764bool dm_thin_aborted_changes(struct dm_thin_device *td)
1765{
1766 bool r;
1767
1768 down_read(&td->pmd->root_lock);
1769 r = td->aborted_with_changes;
1770 up_read(&td->pmd->root_lock);
1771
1772 return r;
1773}
1774
Joe Thornber991d9fa2011-10-31 20:21:18 +00001775int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result)
1776{
Joe Thornberda105ed2012-07-27 15:08:15 +01001777 int r = -EINVAL;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001778
1779 down_write(&pmd->root_lock);
Joe Thornberda105ed2012-07-27 15:08:15 +01001780 if (!pmd->fail_io)
1781 r = dm_sm_new_block(pmd->data_sm, result);
Joe Thornber991d9fa2011-10-31 20:21:18 +00001782 up_write(&pmd->root_lock);
1783
1784 return r;
1785}
1786
1787int dm_pool_commit_metadata(struct dm_pool_metadata *pmd)
1788{
Joe Thornberda105ed2012-07-27 15:08:15 +01001789 int r = -EINVAL;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001790
1791 down_write(&pmd->root_lock);
Joe Thornberda105ed2012-07-27 15:08:15 +01001792 if (pmd->fail_io)
1793 goto out;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001794
1795 r = __commit_transaction(pmd);
1796 if (r <= 0)
1797 goto out;
1798
1799 /*
1800 * Open the next transaction.
1801 */
1802 r = __begin_transaction(pmd);
1803out:
1804 up_write(&pmd->root_lock);
1805 return r;
1806}
1807
Joe Thornberda105ed2012-07-27 15:08:15 +01001808static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd)
1809{
1810 struct dm_thin_device *td;
1811
1812 list_for_each_entry(td, &pmd->thin_devices, list)
1813 td->aborted_with_changes = td->changed;
1814}
1815
1816int dm_pool_abort_metadata(struct dm_pool_metadata *pmd)
1817{
1818 int r = -EINVAL;
1819
1820 down_write(&pmd->root_lock);
1821 if (pmd->fail_io)
1822 goto out;
1823
1824 __set_abort_with_changes_flags(pmd);
1825 __destroy_persistent_data_objects(pmd);
1826 r = __create_persistent_data_objects(pmd, false);
1827 if (r)
1828 pmd->fail_io = true;
1829
1830out:
1831 up_write(&pmd->root_lock);
1832
1833 return r;
1834}
1835
Joe Thornber991d9fa2011-10-31 20:21:18 +00001836int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *result)
1837{
Joe Thornberda105ed2012-07-27 15:08:15 +01001838 int r = -EINVAL;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001839
1840 down_read(&pmd->root_lock);
Joe Thornberda105ed2012-07-27 15:08:15 +01001841 if (!pmd->fail_io)
1842 r = dm_sm_get_nr_free(pmd->data_sm, result);
Joe Thornber991d9fa2011-10-31 20:21:18 +00001843 up_read(&pmd->root_lock);
1844
1845 return r;
1846}
1847
1848int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd,
1849 dm_block_t *result)
1850{
Joe Thornberda105ed2012-07-27 15:08:15 +01001851 int r = -EINVAL;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001852
1853 down_read(&pmd->root_lock);
Joe Thornberda105ed2012-07-27 15:08:15 +01001854 if (!pmd->fail_io)
1855 r = dm_sm_get_nr_free(pmd->metadata_sm, result);
Joe Thornbera9537db2018-09-10 16:50:09 +01001856
1857 if (!r) {
1858 if (*result < pmd->metadata_reserve)
1859 *result = 0;
1860 else
1861 *result -= pmd->metadata_reserve;
1862 }
Joe Thornber991d9fa2011-10-31 20:21:18 +00001863 up_read(&pmd->root_lock);
1864
1865 return r;
1866}
1867
1868int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd,
1869 dm_block_t *result)
1870{
Joe Thornberda105ed2012-07-27 15:08:15 +01001871 int r = -EINVAL;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001872
1873 down_read(&pmd->root_lock);
Joe Thornberda105ed2012-07-27 15:08:15 +01001874 if (!pmd->fail_io)
1875 r = dm_sm_get_nr_blocks(pmd->metadata_sm, result);
Joe Thornber991d9fa2011-10-31 20:21:18 +00001876 up_read(&pmd->root_lock);
1877
1878 return r;
1879}
1880
Joe Thornber991d9fa2011-10-31 20:21:18 +00001881int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result)
1882{
Joe Thornberda105ed2012-07-27 15:08:15 +01001883 int r = -EINVAL;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001884
1885 down_read(&pmd->root_lock);
Joe Thornberda105ed2012-07-27 15:08:15 +01001886 if (!pmd->fail_io)
1887 r = dm_sm_get_nr_blocks(pmd->data_sm, result);
Joe Thornber991d9fa2011-10-31 20:21:18 +00001888 up_read(&pmd->root_lock);
1889
1890 return r;
1891}
1892
1893int dm_thin_get_mapped_count(struct dm_thin_device *td, dm_block_t *result)
1894{
Joe Thornberda105ed2012-07-27 15:08:15 +01001895 int r = -EINVAL;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001896 struct dm_pool_metadata *pmd = td->pmd;
1897
1898 down_read(&pmd->root_lock);
Joe Thornberda105ed2012-07-27 15:08:15 +01001899 if (!pmd->fail_io) {
1900 *result = td->mapped_blocks;
1901 r = 0;
1902 }
Joe Thornber991d9fa2011-10-31 20:21:18 +00001903 up_read(&pmd->root_lock);
1904
Joe Thornberda105ed2012-07-27 15:08:15 +01001905 return r;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001906}
1907
1908static int __highest_block(struct dm_thin_device *td, dm_block_t *result)
1909{
1910 int r;
1911 __le64 value_le;
1912 dm_block_t thin_root;
1913 struct dm_pool_metadata *pmd = td->pmd;
1914
1915 r = dm_btree_lookup(&pmd->tl_info, pmd->root, &td->id, &value_le);
1916 if (r)
1917 return r;
1918
1919 thin_root = le64_to_cpu(value_le);
1920
1921 return dm_btree_find_highest_key(&pmd->bl_info, thin_root, result);
1922}
1923
1924int dm_thin_get_highest_mapped_block(struct dm_thin_device *td,
1925 dm_block_t *result)
1926{
Joe Thornberda105ed2012-07-27 15:08:15 +01001927 int r = -EINVAL;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001928 struct dm_pool_metadata *pmd = td->pmd;
1929
1930 down_read(&pmd->root_lock);
Joe Thornberda105ed2012-07-27 15:08:15 +01001931 if (!pmd->fail_io)
1932 r = __highest_block(td, result);
Joe Thornber991d9fa2011-10-31 20:21:18 +00001933 up_read(&pmd->root_lock);
1934
1935 return r;
1936}
1937
Joe Thornberb17446d2013-05-10 14:37:18 +01001938static int __resize_space_map(struct dm_space_map *sm, dm_block_t new_count)
Joe Thornber991d9fa2011-10-31 20:21:18 +00001939{
1940 int r;
1941 dm_block_t old_count;
1942
Joe Thornberb17446d2013-05-10 14:37:18 +01001943 r = dm_sm_get_nr_blocks(sm, &old_count);
Joe Thornber991d9fa2011-10-31 20:21:18 +00001944 if (r)
1945 return r;
1946
1947 if (new_count == old_count)
1948 return 0;
1949
1950 if (new_count < old_count) {
Joe Thornberb17446d2013-05-10 14:37:18 +01001951 DMERR("cannot reduce size of space map");
Joe Thornber991d9fa2011-10-31 20:21:18 +00001952 return -EINVAL;
1953 }
1954
Joe Thornberb17446d2013-05-10 14:37:18 +01001955 return dm_sm_extend(sm, new_count - old_count);
Joe Thornber991d9fa2011-10-31 20:21:18 +00001956}
1957
1958int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
1959{
Joe Thornberda105ed2012-07-27 15:08:15 +01001960 int r = -EINVAL;
Joe Thornber991d9fa2011-10-31 20:21:18 +00001961
1962 down_write(&pmd->root_lock);
Joe Thornberda105ed2012-07-27 15:08:15 +01001963 if (!pmd->fail_io)
Joe Thornberb17446d2013-05-10 14:37:18 +01001964 r = __resize_space_map(pmd->data_sm, new_count);
Joe Thornber991d9fa2011-10-31 20:21:18 +00001965 up_write(&pmd->root_lock);
1966
1967 return r;
1968}
Joe Thornber12ba58a2012-07-27 15:08:15 +01001969
Joe Thornber24347e92013-05-10 14:37:19 +01001970int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
1971{
1972 int r = -EINVAL;
1973
1974 down_write(&pmd->root_lock);
Joe Thornbera9537db2018-09-10 16:50:09 +01001975 if (!pmd->fail_io) {
Joe Thornber24347e92013-05-10 14:37:19 +01001976 r = __resize_space_map(pmd->metadata_sm, new_count);
Joe Thornbera9537db2018-09-10 16:50:09 +01001977 if (!r)
1978 __set_metadata_reserve(pmd);
1979 }
Joe Thornber24347e92013-05-10 14:37:19 +01001980 up_write(&pmd->root_lock);
1981
1982 return r;
1983}
1984
Joe Thornber12ba58a2012-07-27 15:08:15 +01001985void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd)
1986{
1987 down_write(&pmd->root_lock);
Joe Thornber12ba58a2012-07-27 15:08:15 +01001988 dm_bm_set_read_only(pmd->bm);
1989 up_write(&pmd->root_lock);
1990}
Joe Thornberac8c3f32013-05-10 14:37:21 +01001991
Joe Thornber9b7aaa62013-12-04 16:58:19 -05001992void dm_pool_metadata_read_write(struct dm_pool_metadata *pmd)
1993{
1994 down_write(&pmd->root_lock);
Joe Thornber9b7aaa62013-12-04 16:58:19 -05001995 dm_bm_set_read_write(pmd->bm);
1996 up_write(&pmd->root_lock);
1997}
1998
Joe Thornberac8c3f32013-05-10 14:37:21 +01001999int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
2000 dm_block_t threshold,
2001 dm_sm_threshold_fn fn,
2002 void *context)
2003{
2004 int r;
2005
2006 down_write(&pmd->root_lock);
2007 r = dm_sm_register_threshold_callback(pmd->metadata_sm, threshold, fn, context);
2008 up_write(&pmd->root_lock);
2009
2010 return r;
2011}
Mike Snitzer07f2b6e2014-02-14 11:58:41 -05002012
2013int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd)
2014{
2015 int r;
2016 struct dm_block *sblock;
2017 struct thin_disk_superblock *disk_super;
2018
2019 down_write(&pmd->root_lock);
2020 pmd->flags |= THIN_METADATA_NEEDS_CHECK_FLAG;
2021
2022 r = superblock_lock(pmd, &sblock);
2023 if (r) {
2024 DMERR("couldn't read superblock");
2025 goto out;
2026 }
2027
2028 disk_super = dm_block_data(sblock);
2029 disk_super->flags = cpu_to_le32(pmd->flags);
2030
2031 dm_bm_unlock(sblock);
2032out:
2033 up_write(&pmd->root_lock);
2034 return r;
2035}
2036
2037bool dm_pool_metadata_needs_check(struct dm_pool_metadata *pmd)
2038{
2039 bool needs_check;
2040
2041 down_read(&pmd->root_lock);
2042 needs_check = pmd->flags & THIN_METADATA_NEEDS_CHECK_FLAG;
2043 up_read(&pmd->root_lock);
2044
2045 return needs_check;
2046}
Joe Thornber8a01a6a2014-10-06 15:28:30 +01002047
2048void dm_pool_issue_prefetches(struct dm_pool_metadata *pmd)
2049{
Joe Thornber2eae9e4482016-03-01 10:58:44 +00002050 down_read(&pmd->root_lock);
2051 if (!pmd->fail_io)
2052 dm_tm_issue_prefetches(pmd->tm);
2053 up_read(&pmd->root_lock);
Joe Thornber8a01a6a2014-10-06 15:28:30 +01002054}