dm thin: ensure user takes action to validate data and metadata consistency If a thin metadata operation fails the current transaction will abort, whereby causing potential for IO layers up the stack (e.g. filesystems) to have data loss. As such, set THIN_METADATA_NEEDS_CHECK_FLAG in the thin metadata's superblock which: 1) requires the user verify the thin metadata is consistent (e.g. use thin_check, etc) 2) suggests the user verify the thin data is consistent (e.g. use fsck) The only way to clear the superblock's THIN_METADATA_NEEDS_CHECK_FLAG is to run thin_repair. On metadata operation failure: abort current metadata transaction, set pool in read-only mode, and now set the needs_check flag. As part of this change, constraints are introduced or relaxed: * don't allow a pool to transition to write mode if needs_check is set * don't allow data or metadata space to be resized if needs_check is set * if a thin pool's metadata space is exhausted: the kernel will now force the user to take the pool offline for repair before the kernel will allow the metadata space to be extended. Also, update Documentation to include information about when the thin provisioning target commits metadata, how it handles metadata failures and running out of space. Signed-off-by: Mike Snitzer <snitzer@redhat.com> Signed-off-by: Joe Thornber <ejt@redhat.com>

commit: 07f2b6e0382ec4c59887d5954683f1a0b265574e [log] [tgz]
author: Mike Snitzer <snitzer@redhat.com> Fri Feb 14 11:58:41 2014 -0500
committer: Mike Snitzer <snitzer@redhat.com> Wed Mar 05 15:25:35 2014 -0500
tree: 4863593c0fb83c54cb2865f4f0f6a44969aa28a6
parent: cdc2b4158405f1975f9d5205096f08430eda1c0e [diff] [blame]
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index dfa48ec..a04eba9 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c

@@ -1403,7 +1403,28 @@
 {
 	int r;
 	struct pool_c *pt = pool->ti->private;
-	enum pool_mode old_mode = pool->pf.mode;
+	bool needs_check = dm_pool_metadata_needs_check(pool->pmd);
+	enum pool_mode old_mode = get_pool_mode(pool);
+
+	/*
+	 * Never allow the pool to transition to PM_WRITE mode if user
+	 * intervention is required to verify metadata and data consistency.
+	 */
+	if (new_mode == PM_WRITE && needs_check) {
+		DMERR("%s: unable to switch pool to write mode until repaired.",
+		      dm_device_name(pool->pool_md));
+		if (old_mode != new_mode)
+			new_mode = old_mode;
+		else
+			new_mode = PM_READ_ONLY;
+	}
+	/*
+	 * If we were in PM_FAIL mode, rollback of metadata failed.  We're
+	 * not going to recover without a thin_repair.	So we never let the
+	 * pool move out of the old mode.
+	 */
+	if (old_mode == PM_FAIL)
+		new_mode = old_mode;
 
 	switch (new_mode) {
 	case PM_FAIL:
@@ -1467,19 +1488,28 @@
 	set_pool_mode(pool, PM_READ_ONLY);
 }
 
+static void abort_transaction(struct pool *pool)
+{
+	const char *dev_name = dm_device_name(pool->pool_md);
+
+	DMERR_LIMIT("%s: aborting current metadata transaction", dev_name);
+	if (dm_pool_abort_metadata(pool->pmd)) {
+		DMERR("%s: failed to abort metadata transaction", dev_name);
+		set_pool_mode(pool, PM_FAIL);
+	}
+
+	if (dm_pool_metadata_set_needs_check(pool->pmd)) {
+		DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name);
+		set_pool_mode(pool, PM_FAIL);
+	}
+}
+
 static void metadata_operation_failed(struct pool *pool, const char *op, int r)
 {
-	dm_block_t free_blocks;
-
 	DMERR_LIMIT("%s: metadata operation '%s' failed: error = %d",
 		    dm_device_name(pool->pool_md), op, r);
 
-	if (r == -ENOSPC &&
-	    !dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks) &&
-	    !free_blocks)
-		DMERR_LIMIT("%s: no free metadata space available.",
-			    dm_device_name(pool->pool_md));
-
+	abort_transaction(pool);
 	set_pool_mode(pool, PM_READ_ONLY);
 }
 
@@ -1693,7 +1723,7 @@
 	/*
 	 * We want to make sure that a pool in PM_FAIL mode is never upgraded.
 	 */
-	enum pool_mode old_mode = pool->pf.mode;
+	enum pool_mode old_mode = get_pool_mode(pool);
 	enum pool_mode new_mode = pt->adjusted_pf.mode;
 
 	/*
@@ -1707,16 +1737,6 @@
 	pool->pf = pt->adjusted_pf;
 	pool->low_water_blocks = pt->low_water_blocks;
 
-	/*
-	 * If we were in PM_FAIL mode, rollback of metadata failed.  We're
-	 * not going to recover without a thin_repair.  So we never let the
-	 * pool move out of the old mode.  On the other hand a PM_READ_ONLY
-	 * may have been due to a lack of metadata or data space, and may
-	 * now work (ie. if the underlying devices have been resized).
-	 */
-	if (old_mode == PM_FAIL)
-		new_mode = old_mode;
-
 	set_pool_mode(pool, new_mode);
 
 	return 0;
@@ -2259,6 +2279,12 @@
 		return -EINVAL;
 
 	} else if (data_size > sb_data_size) {
+		if (dm_pool_metadata_needs_check(pool->pmd)) {
+			DMERR("%s: unable to grow the data device until repaired.",
+			      dm_device_name(pool->pool_md));
+			return 0;
+		}
+
 		if (sb_data_size)
 			DMINFO("%s: growing the data device from %llu to %llu blocks",
 			       dm_device_name(pool->pool_md),
@@ -2300,6 +2326,12 @@
 		return -EINVAL;
 
 	} else if (metadata_dev_size > sb_metadata_dev_size) {
+		if (dm_pool_metadata_needs_check(pool->pmd)) {
+			DMERR("%s: unable to grow the metadata device until repaired.",
+			      dm_device_name(pool->pool_md));
+			return 0;
+		}
+
 		warn_if_metadata_device_too_big(pool->md_dev);
 		DMINFO("%s: growing the metadata device from %llu to %llu blocks",
 		       dm_device_name(pool->pool_md),
@@ -2801,7 +2833,7 @@
 	.name = "thin-pool",
 	.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
 		    DM_TARGET_IMMUTABLE,
-	.version = {1, 10, 0},
+	.version = {1, 11, 0},
 	.module = THIS_MODULE,
 	.ctr = pool_ctr,
 	.dtr = pool_dtr,
@@ -3091,7 +3123,7 @@
 
 static struct target_type thin_target = {
 	.name = "thin",
-	.version = {1, 10, 0},
+	.version = {1, 11, 0},
 	.module	= THIS_MODULE,
 	.ctr = thin_ctr,
 	.dtr = thin_dtr,
commit	07f2b6e0382ec4c59887d5954683f1a0b265574e	[log] [tgz]
author	Mike Snitzer <snitzer@redhat.com>	Fri Feb 14 11:58:41 2014 -0500
committer	Mike Snitzer <snitzer@redhat.com>	Wed Mar 05 15:25:35 2014 -0500
tree	4863593c0fb83c54cb2865f4f0f6a44969aa28a6
parent	cdc2b4158405f1975f9d5205096f08430eda1c0e [diff] [blame]