e2fsck: fix rule-violating lblk->pblk mappings on bigalloc filesystems

As far as I can tell, logical block mappings on a bigalloc filesystem are
supposed to follow a few constraints:

 * The logical cluster offset must match the physical cluster offset.
 * A logical cluster may not map to multiple physical clusters.

Since the multiply-claimed block recovery code can be used to fix these
problems, teach e2fsck to find these transgressions and fix them.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
index 647d91b..50a8b99 100644
--- a/e2fsck/pass1.c
+++ b/e2fsck/pass1.c
@@ -1797,6 +1797,40 @@
 	e2fsck_write_inode(ctx, ino, inode, source);
 }
 
+/*
+ * Use the multiple-blocks reclamation code to fix alignment problems in
+ * a bigalloc filesystem.  We want a logical cluster to map to *only* one
+ * physical cluster, and we want the block offsets within that cluster to
+ * line up.
+ */
+static int has_unaligned_cluster_map(e2fsck_t ctx,
+				     blk64_t last_pblk, e2_blkcnt_t last_lblk,
+				     blk64_t pblk, blk64_t lblk)
+{
+	blk64_t cluster_mask;
+
+	if (!ctx->fs->cluster_ratio_bits)
+		return 0;
+	cluster_mask = EXT2FS_CLUSTER_MASK(ctx->fs);
+
+	/*
+	 * If the block in the logical cluster doesn't align with the block in
+	 * the physical cluster...
+	 */
+	if ((lblk & cluster_mask) != (pblk & cluster_mask))
+		return 1;
+
+	/*
+	 * If we cross a physical cluster boundary within a logical cluster...
+	 */
+	if (last_pblk && (lblk & cluster_mask) != 0 &&
+	    EXT2FS_B2C(ctx->fs, lblk) == EXT2FS_B2C(ctx->fs, last_lblk) &&
+	    EXT2FS_B2C(ctx->fs, pblk) != EXT2FS_B2C(ctx->fs, last_pblk))
+		return 1;
+
+	return 0;
+}
+
 static void scan_extent_node(e2fsck_t ctx, struct problem_context *pctx,
 			     struct process_block_struct *pb,
 			     blk64_t start_block, blk64_t end_block,
@@ -2046,7 +2080,16 @@
 				mark_block_used(ctx, blk);
 				pb->num_blocks++;
 			}
-
+			if (has_unaligned_cluster_map(ctx, pb->previous_block,
+						      pb->last_block, blk,
+						      blockcnt)) {
+				pctx->blk = blockcnt;
+				pctx->blk2 = blk;
+				fix_problem(ctx, PR_1_MISALIGNED_CLUSTER, pctx);
+				mark_block_used(ctx, blk);
+				mark_block_used(ctx, blk);
+			}
+			pb->last_block = blockcnt;
 			pb->previous_block = blk;
 
 			if (is_dir) {
@@ -2544,6 +2587,13 @@
 		     ((unsigned) blockcnt & EXT2FS_CLUSTER_MASK(ctx->fs)))) {
 		mark_block_used(ctx, blk);
 		p->num_blocks++;
+	} else if (has_unaligned_cluster_map(ctx, p->previous_block,
+					     p->last_block, blk, blockcnt)) {
+		pctx->blk = blockcnt;
+		pctx->blk2 = blk;
+		fix_problem(ctx, PR_1_MISALIGNED_CLUSTER, pctx);
+		mark_block_used(ctx, blk);
+		mark_block_used(ctx, blk);
 	}
 	if (blockcnt >= 0)
 		p->last_block = blockcnt;
diff --git a/e2fsck/pass1b.c b/e2fsck/pass1b.c
index 9b38977..ec6c2bc 100644
--- a/e2fsck/pass1b.c
+++ b/e2fsck/pass1b.c
@@ -261,7 +261,7 @@
 	e2fsck_t	ctx;
 	ext2_ino_t	ino;
 	int		dup_blocks;
-	blk64_t		cur_cluster;
+	blk64_t		cur_cluster, phys_cluster;
 	struct ext2_inode *inode;
 	struct problem_context *pctx;
 };
@@ -315,6 +315,7 @@
 		pb.dup_blocks = 0;
 		pb.inode = &inode;
 		pb.cur_cluster = ~0;
+		pb.phys_cluster = ~0;
 
 		if (ext2fs_inode_has_valid_blocks2(fs, &inode) ||
 		    (ino == EXT2_BAD_INO))
@@ -351,13 +352,14 @@
 {
 	struct process_block_struct *p;
 	e2fsck_t ctx;
-	blk64_t	lc;
+	blk64_t	lc, pc;
 
 	if (HOLE_BLKADDR(*block_nr))
 		return 0;
 	p = (struct process_block_struct *) priv_data;
 	ctx = p->ctx;
 	lc = EXT2FS_B2C(fs, blockcnt);
+	pc = EXT2FS_B2C(fs, *block_nr);
 
 	if (!ext2fs_test_block_bitmap2(ctx->block_dup_map, *block_nr))
 		goto finish;
@@ -370,11 +372,19 @@
 	p->dup_blocks++;
 	ext2fs_mark_inode_bitmap2(inode_dup_map, p->ino);
 
-	if (lc != p->cur_cluster)
+	/*
+	 * Qualifications for submitting a block for duplicate processing:
+	 * It's an extent/indirect block (and has a negative logical offset);
+	 * we've crossed a logical cluster boundary; or the physical cluster
+	 * suddenly changed, which indicates that blocks in a logical cluster
+	 * are mapped to multiple physical clusters.
+	 */
+	if (blockcnt < 0 || lc != p->cur_cluster || pc != p->phys_cluster)
 		add_dupe(ctx, p->ino, EXT2FS_B2C(fs, *block_nr), p->inode);
 
 finish:
 	p->cur_cluster = lc;
+	p->phys_cluster = pc;
 	return 0;
 }
 
@@ -544,7 +554,11 @@
 			pctx.dir = t->dir;
 			fix_problem(ctx, PR_1D_DUP_FILE_LIST, &pctx);
 		}
-		if (file_ok) {
+		/*
+		 * Even if the file shares blocks with itself, we still need to
+		 * clone the blocks.
+		 */
+		if (file_ok && (meta_data ? shared_len+1 : shared_len) != 0) {
 			fix_problem(ctx, PR_1D_DUP_BLOCKS_DEALT, &pctx);
 			continue;
 		}
@@ -687,9 +701,10 @@
 	errcode_t	errcode;
 	blk64_t		dup_cluster;
 	blk64_t		alloc_block;
-	ext2_ino_t	dir;
+	ext2_ino_t	dir, ino;
 	char	*buf;
 	e2fsck_t ctx;
+	struct ext2_inode	*inode;
 };
 
 static int clone_file_block(ext2_filsys fs,
@@ -737,13 +752,26 @@
 			decrement_badcount(ctx, *block_nr, p);
 
 		cs->dup_cluster = c;
-
+		/*
+		 * Let's try an implied cluster allocation.  If we get the same
+		 * cluster back, then we need to find a new block; otherwise,
+		 * we're merely fixing the problem of one logical cluster being
+		 * mapped to multiple physical clusters.
+		 */
+		new_block = 0;
+		retval = ext2fs_map_cluster_block(fs, cs->ino, cs->inode,
+						  blockcnt, &new_block);
+		if (retval == 0 && new_block != 0 &&
+		    EXT2FS_B2C(ctx->fs, new_block) !=
+		    EXT2FS_B2C(ctx->fs, *block_nr))
+			goto cluster_alloc_ok;
 		retval = ext2fs_new_block2(fs, 0, ctx->block_found_map,
 					   &new_block);
 		if (retval) {
 			cs->errcode = retval;
 			return BLOCK_ABORT;
 		}
+cluster_alloc_ok:
 		cs->alloc_block = new_block;
 
 	got_block:
@@ -798,6 +826,8 @@
 	cs.dup_cluster = ~0;
 	cs.alloc_block = 0;
 	cs.ctx = ctx;
+	cs.ino = ino;
+	cs.inode = &dp->inode;
 	retval = ext2fs_get_mem(fs->blocksize, &cs.buf);
 	if (retval)
 		return retval;
diff --git a/e2fsck/problem.c b/e2fsck/problem.c
index a7291e5..6d9b1af 100644
--- a/e2fsck/problem.c
+++ b/e2fsck/problem.c
@@ -977,6 +977,11 @@
 	  N_("@d @i %i has @x marked uninitialized at @b %c.  "),
 	  PROMPT_FIX, PR_PREEN_OK },
 
+	/* Inode logical block (physical block ) is misaligned. */
+	{ PR_1_MISALIGNED_CLUSTER,
+	  N_("@i %i logical @b %b (physical @b %c) violates cluster allocation rules.\nWill fix in pass 1B.\n"),
+	  PROMPT_NONE, 0 },
+
 	/* Pass 1b errors */
 
 	/* Pass 1B: Rescan for duplicate/bad blocks */
diff --git a/e2fsck/problem.h b/e2fsck/problem.h
index 0a3347f..d673a4e 100644
--- a/e2fsck/problem.h
+++ b/e2fsck/problem.h
@@ -581,6 +581,9 @@
 /* uninit directory block */
 #define PR_1_UNINIT_DBLOCK		0x010073
 
+/* Inode logical block is misaligned */
+#define PR_1_MISALIGNED_CLUSTER		0x010074
+
 /*
  * Pass 1b errors
  */