[PATCH] dm snapshot: unify chunk_size

Persistent snapshots currently store a private copy of the chunk size.
Userspace also supplies the chunk size when loading a snapshot.  Ensure
consistency by only storing the chunk_size in one place instead of two.

Currently the two sizes will differ if the chunk size supplied by userspace
does not match the chunk size an existing snapshot actually uses.  Amongst
other problems, this causes an incorrect 'percentage full' to be reported.

The patch ensures consistency by only storing the chunk_size in one place,
removing it from struct pstore.  Some initialisation is delayed until the
correct chunk_size is known.  If read_header() discovers that the wrong chunk
size was supplied, the 'area' buffer (which the header already got read into)
is reinitialised to the correct size.

[akpm: too late for 2.6.17 - suitable for 2.6.17.x after it has settled]

Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index cc07bbe..34a7593 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -91,7 +91,6 @@
 	struct dm_snapshot *snap;	/* up pointer to my snapshot */
 	int version;
 	int valid;
-	uint32_t chunk_size;
 	uint32_t exceptions_per_area;
 
 	/*
@@ -133,7 +132,7 @@
 	int r = -ENOMEM;
 	size_t len;
 
-	len = ps->chunk_size << SECTOR_SHIFT;
+	len = ps->snap->chunk_size << SECTOR_SHIFT;
 
 	/*
 	 * Allocate the chunk_size block of memory that will hold
@@ -160,8 +159,8 @@
 	unsigned long bits;
 
 	where.bdev = ps->snap->cow->bdev;
-	where.sector = ps->chunk_size * chunk;
-	where.count = ps->chunk_size;
+	where.sector = ps->snap->chunk_size * chunk;
+	where.count = ps->snap->chunk_size;
 
 	return dm_io_sync_vm(1, &where, rw, ps->area, &bits);
 }
@@ -188,7 +187,7 @@
 
 static int zero_area(struct pstore *ps, uint32_t area)
 {
-	memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT);
+	memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
 	return area_io(ps, area, WRITE);
 }
 
@@ -196,6 +195,7 @@
 {
 	int r;
 	struct disk_header *dh;
+	chunk_t chunk_size;
 
 	r = chunk_io(ps, 0, READ);
 	if (r)
@@ -210,8 +210,29 @@
 		*new_snapshot = 0;
 		ps->valid = le32_to_cpu(dh->valid);
 		ps->version = le32_to_cpu(dh->version);
-		ps->chunk_size = le32_to_cpu(dh->chunk_size);
+		chunk_size = le32_to_cpu(dh->chunk_size);
+		if (ps->snap->chunk_size != chunk_size) {
+			DMWARN("chunk size %llu in device metadata overrides "
+			       "table chunk size of %llu.",
+			       (unsigned long long)chunk_size,
+			       (unsigned long long)ps->snap->chunk_size);
 
+			/* We had a bogus chunk_size. Fix stuff up. */
+			dm_io_put(sectors_to_pages(ps->snap->chunk_size));
+			free_area(ps);
+
+			ps->snap->chunk_size = chunk_size;
+			ps->snap->chunk_mask = chunk_size - 1;
+			ps->snap->chunk_shift = ffs(chunk_size) - 1;
+
+			r = alloc_area(ps);
+			if (r)
+				return r;
+
+			r = dm_io_get(sectors_to_pages(chunk_size));
+			if (r)
+				return r;
+		}
 	} else {
 		DMWARN("Invalid/corrupt snapshot");
 		r = -ENXIO;
@@ -224,13 +245,13 @@
 {
 	struct disk_header *dh;
 
-	memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT);
+	memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
 
 	dh = (struct disk_header *) ps->area;
 	dh->magic = cpu_to_le32(SNAP_MAGIC);
 	dh->valid = cpu_to_le32(ps->valid);
 	dh->version = cpu_to_le32(ps->version);
-	dh->chunk_size = cpu_to_le32(ps->chunk_size);
+	dh->chunk_size = cpu_to_le32(ps->snap->chunk_size);
 
 	return chunk_io(ps, 0, WRITE);
 }
@@ -365,7 +386,7 @@
 {
 	struct pstore *ps = get_info(store);
 
-	dm_io_put(sectors_to_pages(ps->chunk_size));
+	dm_io_put(sectors_to_pages(ps->snap->chunk_size));
 	vfree(ps->callbacks);
 	free_area(ps);
 	kfree(ps);
@@ -384,6 +405,16 @@
 		return r;
 
 	/*
+	 * Now we know correct chunk_size, complete the initialisation.
+	 */
+	ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) /
+				  sizeof(struct disk_exception);
+	ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
+			sizeof(*ps->callbacks));
+	if (!ps->callbacks)
+		return -ENOMEM;
+
+	/*
 	 * Do we need to setup a new snapshot ?
 	 */
 	if (new_snapshot) {
@@ -533,9 +564,6 @@
 	ps->snap = store->snap;
 	ps->valid = 1;
 	ps->version = SNAPSHOT_DISK_VERSION;
-	ps->chunk_size = chunk_size;
-	ps->exceptions_per_area = (chunk_size << SECTOR_SHIFT) /
-	    sizeof(struct disk_exception);
 	ps->next_free = 2;	/* skipping the header and first area */
 	ps->current_committed = 0;
 
@@ -543,18 +571,9 @@
 	if (r)
 		goto bad;
 
-	/*
-	 * Allocate space for all the callbacks.
-	 */
 	ps->callback_count = 0;
 	atomic_set(&ps->pending_count, 0);
-	ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
-				   sizeof(*ps->callbacks));
-
-	if (!ps->callbacks) {
-		r = -ENOMEM;
-		goto bad;
-	}
+	ps->callbacks = NULL;
 
 	store->destroy = persistent_destroy;
 	store->read_metadata = persistent_read_metadata;