Change e2fsck to detect and offer to delete or rename duplicate 
filenames in directories when rebuilding directories using
"e2fsck -fD /dev/XXX"

diff --git a/e2fsck/ChangeLog b/e2fsck/ChangeLog
index 792f71c..e74a622 100644
--- a/e2fsck/ChangeLog
+++ b/e2fsck/ChangeLog
@@ -1,3 +1,23 @@
+2003-03-14  Theodore Ts'o  <tytso@mit.edu>
+
+	* rehash.c (duplicate_search_and_fix): Now search for duplicates
+		filenames, and either prompt to remove a complete
+		duplicate entry, or to rename a duplicate filename.
+		(e2fsck_rehash_dir): Use a progress bar to report
+		progress, and don't print all of the directory inodes as
+		they are optimized.
+
+	* problem.c, problem.h (PR_2_DUPLICATE_DIRENT,
+		PR_2_NON_UNIQUE_FILE):  New problem codes.
+	
+	* unix.c (e2fsck_simple_progress), e2fsck.h: New function which
+		can be called to provide specialized progress bars that
+		are not related to the top-level pass-based completion
+		percentage.
+
+	* pass3.c (e2fsck_adjust_inode_count), e2fsck.h: Export previously
+		static function.
+
 2003-03-06    <tytso@mit.edu>
 
 	* e2fsck.8.in: Fix minor nit in the -C option.  (Addresses Debian
diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h
index 3526c71..67d4b95 100644
--- a/e2fsck/e2fsck.h
+++ b/e2fsck/e2fsck.h
@@ -409,6 +409,8 @@
 extern errcode_t e2fsck_expand_directory(e2fsck_t ctx, ext2_ino_t dir,
 					 int num, int gauranteed_size);
 extern ext2_ino_t e2fsck_get_lost_and_found(e2fsck_t ctx, int fix);
+extern errcode_t e2fsck_adjust_inode_count(e2fsck_t ctx, ext2_ino_t ino, 
+					   int adj);
 
 
 /* region.c */
@@ -456,3 +458,5 @@
 
 /* unix.c */
 extern void e2fsck_clear_progbar(e2fsck_t ctx);
+extern int e2fsck_simple_progress(e2fsck_t ctx, char *label,
+				  float percent, unsigned int dpynum);
diff --git a/e2fsck/pass3.c b/e2fsck/pass3.c
index 1e68b1e..86f2e62 100644
--- a/e2fsck/pass3.c
+++ b/e2fsck/pass3.c
@@ -45,7 +45,6 @@
 static int check_directory(e2fsck_t ctx, struct dir_info *dir,
 			   struct problem_context *pctx);
 static void fix_dotdot(e2fsck_t ctx, struct dir_info *dir, ext2_ino_t parent);
-static errcode_t adjust_inode_count(e2fsck_t ctx, ext2_ino_t ino, int adj);
 
 static ext2fs_inode_bitmap inode_loop_detect = 0;
 static ext2fs_inode_bitmap inode_done_map = 0;
@@ -406,7 +405,7 @@
 		dirinfo = e2fsck_get_dir_info(ctx, ino);
 		if (dirinfo)
 			dirinfo->parent = 0;
-		adjust_inode_count(ctx, ino, -1);
+		e2fsck_adjust_inode_count(ctx, ino, -1);
 	} else if (retval != EXT2_ET_FILE_NOT_FOUND) {
 		pctx.errcode = retval;
 		fix_problem(ctx, PR_3_ERR_FIND_LPF, &pctx);
@@ -435,7 +434,7 @@
 	/*
 	 * Next find a free inode.
 	 */
-	retval = ext2fs_new_inode(fs, EXT2_ROOT_INO, 040755,
+	retval = ext2fs_new_inode(fs, EXT2_ROOT_INO, 040700,
 				  ctx->inode_used_map, &ino);
 	if (retval) {
 		pctx.errcode = retval;
@@ -498,7 +497,7 @@
 	 * Miscellaneous bookkeeping that needs to be kept straight.
 	 */
 	e2fsck_add_dir_info(ctx, ino, EXT2_ROOT_INO);
-	adjust_inode_count(ctx, EXT2_ROOT_INO, 1);
+	e2fsck_adjust_inode_count(ctx, EXT2_ROOT_INO, 1);
 	ext2fs_icount_store(ctx->inode_count, ino, 2);
 	ext2fs_icount_store(ctx->inode_link_info, ino, 2);
 	ctx->lost_and_found = ino;
@@ -554,7 +553,7 @@
 		fix_problem(ctx, PR_3_CANT_RECONNECT, &pctx);
 		return 1;
 	}
-	adjust_inode_count(ctx, ino, 1);
+	e2fsck_adjust_inode_count(ctx, ino, 1);
 
 	return 0;
 }
@@ -562,7 +561,7 @@
 /*
  * Utility routine to adjust the inode counts on an inode.
  */
-static errcode_t adjust_inode_count(e2fsck_t ctx, ext2_ino_t ino, int adj)
+errcode_t e2fsck_adjust_inode_count(e2fsck_t ctx, ext2_ino_t ino, int adj)
 {
 	ext2_filsys fs = ctx->fs;
 	errcode_t		retval;
@@ -628,12 +627,12 @@
 
 	clear_problem_context(&pctx);
 	
-	retval = adjust_inode_count(fp->ctx, dirent->inode, -1);
+	retval = e2fsck_adjust_inode_count(fp->ctx, dirent->inode, -1);
 	if (retval) {
 		pctx.errcode = retval;
 		fix_problem(fp->ctx, PR_3_ADJUST_INODE, &pctx);
 	}
-	retval = adjust_inode_count(fp->ctx, fp->parent, 1);
+	retval = e2fsck_adjust_inode_count(fp->ctx, fp->parent, 1);
 	if (retval) {
 		pctx.errcode = retval;
 		fix_problem(fp->ctx, PR_3_ADJUST_INODE, &pctx);
diff --git a/e2fsck/problem.c b/e2fsck/problem.c
index 29cd462..3870fd4 100644
--- a/e2fsck/problem.c
+++ b/e2fsck/problem.c
@@ -1087,6 +1087,16 @@
 	  N_("@p @h %d: node (%B) has bad depth\n"),
 	  PROMPT_NONE, 0 },
 	
+	/* Duplicate directory entry found */
+	{ PR_2_DUPLICATE_DIRENT,
+	  N_("Duplicate @E found.  "),
+	  PROMPT_CLEAR, 0 },
+	
+	/* Non-unique filename found */
+	{ PR_2_NON_UNIQUE_FILE,
+	  N_("@E has a non-unique filename.\nRename to %s"),
+	  PROMPT_NULL, 0 },
+	
 	/* Pass 3 errors */
 
 	/* Pass 3: Checking directory connectivity */
diff --git a/e2fsck/problem.h b/e2fsck/problem.h
index f4a87d2..3c405ef 100644
--- a/e2fsck/problem.h
+++ b/e2fsck/problem.h
@@ -643,7 +643,13 @@
 #define PR_2_HTREE_HASH_ORDER	0x02003F
 
 /* Node in HTREE directory has bad depth */
-#define PR_2_HTREE_BAD_DEPTH	0x020040	
+#define PR_2_HTREE_BAD_DEPTH	0x020040
+
+/* Duplicate directory entry found */
+#define PR_2_DUPLICATE_DIRENT	0x020041
+
+/* Non-unique filename found */
+#define PR_2_NON_UNIQUE_FILE	0x020042
 
 /*
  * Pass 3 errors
diff --git a/e2fsck/rehash.c b/e2fsck/rehash.c
index d8bfb40..9c185b4 100644
--- a/e2fsck/rehash.c
+++ b/e2fsck/rehash.c
@@ -157,28 +157,6 @@
 }
 
 /* Used for sorting the hash entry */
-static EXT2_QSORT_TYPE hash_cmp(const void *a, const void *b)
-{
-	const struct hash_entry *he_a = (const struct hash_entry *) a;
-	const struct hash_entry *he_b = (const struct hash_entry *) b;
-	int	ret;
-	
-	if (he_a->hash > he_b->hash)
-		ret = 1;
-	else if (he_a->hash < he_b->hash)
-		ret = -1;
-	else {
-		if (he_a->minor_hash > he_b->minor_hash)
-			ret = 1;
-		else if (he_a->minor_hash < he_b->minor_hash)
-			ret = -1;
-		else
-			ret = 0;
-	}
-	return ret;
-}
-
-/* Used for sorting the hash entry */
 static EXT2_QSORT_TYPE name_cmp(const void *a, const void *b)
 {
 	const struct hash_entry *he_a = (const struct hash_entry *) a;
@@ -202,6 +180,28 @@
 	return ret;
 }
 
+/* Used for sorting the hash entry */
+static EXT2_QSORT_TYPE hash_cmp(const void *a, const void *b)
+{
+	const struct hash_entry *he_a = (const struct hash_entry *) a;
+	const struct hash_entry *he_b = (const struct hash_entry *) b;
+	int	ret;
+	
+	if (he_a->hash > he_b->hash)
+		ret = 1;
+	else if (he_a->hash < he_b->hash)
+		ret = -1;
+	else {
+		if (he_a->minor_hash > he_b->minor_hash)
+			ret = 1;
+		else if (he_a->minor_hash < he_b->minor_hash)
+			ret = -1;
+		else
+			ret = name_cmp(a, b);
+	}
+	return ret;
+}
+
 static errcode_t alloc_size_dir(ext2_filsys fs, struct out_dir *outdir, 
 				int blocks)
 {
@@ -249,6 +249,127 @@
 	return 0;
 }
 
+/*
+ * This function is used to make a unique filename.  We do this by
+ * appending ~0, and then incrementing the number.  However, we cannot
+ * expand the length of the filename beyond the padding available in
+ * the directory entry.
+ */
+static void mutate_name(char *str, __u16 *len)
+{
+	int	i;
+	__u16	l = *len & 0xFF, h = *len & 0xff00;
+	
+	/*
+	 * First check to see if it looks the name has been mutated
+	 * already
+	 */
+	for (i = l-1; i > 0; i--) {
+		if (!isdigit(str[i]))
+			break;
+	}
+	if ((i == l-1) || (str[i] != '~')) {
+		if (((l-1) & 3) < 2)
+			l += 2;
+		else
+			l = (l+3) & ~3;
+		str[l-2] = '~';
+		str[l-1] = '0';
+		*len = l | h;
+		return;
+	}
+	for (i = l-1; i >= 0; i--) {
+		if (isdigit(str[i])) {
+			if (str[i] == '9')
+				str[i] = '0';
+			else {
+				str[i]++;
+				return;
+			}
+			continue;
+		}
+		if (i == 1) {
+			if (str[0] == 'z')
+				str[0] = 'A';
+			else if (str[0] == 'Z') {
+				str[0] = '~';
+				str[1] = '0';
+			} else
+				str[0]++;
+		} else if (i > 0) {
+			str[i] = '1';
+			str[i-1] = '~';
+		} else {
+			if (str[0] == '~')
+				str[0] = 'a';
+			else 
+				str[0]++;
+		}
+		break;
+	}
+}
+
+static int duplicate_search_and_fix(e2fsck_t ctx, ext2_filsys fs,
+				    ext2_ino_t ino,
+				    struct fill_dir_struct *fd)
+{
+	struct problem_context	pctx;
+	struct hash_entry 	*ent, *prev;
+	int			i, j;
+	int			fixed = 0;
+	char			new_name[256];
+	__u16			new_len;
+	
+	clear_problem_context(&pctx);
+	pctx.ino = ino;
+
+	for (i=1; i < fd->num_array; i++) {
+		ent = fd->harray + i;
+		prev = ent - 1;
+		if (!ent->dir->inode ||
+		    ((ent->dir->name_len & 0xFF) !=
+		     (prev->dir->name_len & 0xFF)) ||
+		    (strncmp(ent->dir->name, prev->dir->name,
+			     ent->dir->name_len & 0xFF)))
+			continue;
+		pctx.dirent = ent->dir;
+		if ((ent->dir->inode == prev->dir->inode) &&
+		    fix_problem(ctx, PR_2_DUPLICATE_DIRENT, &pctx)) {
+			e2fsck_adjust_inode_count(ctx, ent->dir->inode, -1);
+			ent->dir->inode = 0;
+			fixed++;
+			continue;
+		}
+		memcpy(new_name, ent->dir->name, ent->dir->name_len & 0xFF);
+		new_len = ent->dir->name_len;
+		mutate_name(new_name, &new_len);
+		for (j=0; j < fd->num_array; j++) {
+			if ((i==j) ||
+			    ((ent->dir->name_len & 0xFF) !=
+			     (fd->harray[j].dir->name_len & 0xFF)) ||
+			    (strncmp(new_name, fd->harray[j].dir->name,
+				     new_len & 0xFF)))
+				continue;
+			mutate_name(new_name, &new_len);
+			
+			j = -1;
+		}
+		new_name[new_len & 0xFF] = 0;
+		pctx.str = new_name;
+		if (fix_problem(ctx, PR_2_NON_UNIQUE_FILE, &pctx)) {
+			memcpy(ent->dir->name, new_name, new_len & 0xFF);
+			ent->dir->name_len = new_len;
+			ext2fs_dirhash(fs->super->s_def_hash_version,
+				       ent->dir->name,
+				       ent->dir->name_len & 0xFF,
+				       fs->super->s_hash_seed,
+				       &ent->hash, &ent->minor_hash);
+			fixed++;
+		}
+	}
+	return fixed;
+}
+
 
 static errcode_t copy_dir_entries(ext2_filsys fs,
 				  struct fill_dir_struct *fd,
@@ -277,6 +398,8 @@
 	left = fs->blocksize;
 	for (i=0; i < fd->num_array; i++) {
 		ent = fd->harray + i;
+		if (ent->dir->inode == 0)
+			continue;
 		rec_len = EXT2_DIR_REC_LEN(ent->dir->name_len & 0xFF);
 		if (rec_len > left) {
 			if (left)
@@ -575,6 +698,7 @@
 #endif
 
 	/* Sort the list */
+resort:
 	if (fd.compress)
 		qsort(fd.harray+2, fd.num_array-2,
 		      sizeof(struct hash_entry), name_cmp);
@@ -583,6 +707,12 @@
 		      sizeof(struct hash_entry), hash_cmp);
 
 	/*
+	 * Look for duplicates
+	 */
+	if (duplicate_search_and_fix(ctx, fs, ino, &fd))
+		goto resort;
+
+	/*
 	 * Copy the directory entries.  In a htree directory these
 	 * will become the leaf nodes.
 	 */
@@ -623,7 +753,7 @@
 	ext2_u32_iterate 	iter;
 	ext2_ino_t		ino;
 	errcode_t		retval;
-	int			i, all_dirs, dir_index, first = 1;
+	int			i, cur, max, all_dirs, dir_index, first = 1;
 
 #ifdef RESOURCE_TRACK
 	init_resource_track(&rtrack);
@@ -639,9 +769,11 @@
 	clear_problem_context(&pctx);
 
 	dir_index = ctx->fs->super->s_feature_compat & EXT2_FEATURE_COMPAT_DIR_INDEX;
-	if (all_dirs)
+	cur = 0;
+	if (all_dirs) {
 		i = 0;
-	else {
+		max = e2fsck_get_num_dirinfo(ctx);
+	} else {
 		retval = ext2fs_u32_list_iterate_begin(ctx->dirs_to_hash, 
 						       &iter);
 		if (retval) {
@@ -649,6 +781,7 @@
 			fix_problem(ctx, PR_3A_OPTIMIZE_ITER, &pctx);
 			return;
 		}
+		max = ext2fs_u32_list_count(ctx->dirs_to_hash);
 	}
 	while (1) {
 		if (all_dirs) {
@@ -666,12 +799,16 @@
 			fix_problem(ctx, PR_3A_PASS_HEADER, &pctx);
 			first = 0;
 		}
+#if 0
 		fix_problem(ctx, PR_3A_OPTIMIZE_DIR, &pctx);
+#endif
 		pctx.errcode = e2fsck_rehash_dir(ctx, ino);
 		if (pctx.errcode) {
 			end_problem_latch(ctx, PR_LATCH_OPTIMIZE_DIR);
 			fix_problem(ctx, PR_3A_OPTIMIZE_DIR_ERR, &pctx);
 		}
+		e2fsck_simple_progress(ctx, "Rebuilding directory",
+				       (float) (++cur) / (float) max, ino);
 	}
 	end_problem_latch(ctx, PR_LATCH_OPTIMIZE_DIR);
 	if (!all_dirs)
diff --git a/e2fsck/unix.c b/e2fsck/unix.c
index ec1821a..492ba4a 100644
--- a/e2fsck/unix.c
+++ b/e2fsck/unix.c
@@ -328,6 +328,71 @@
 	ctx->flags &= ~E2F_FLAG_PROG_BAR;
 }
 
+int e2fsck_simple_progress(e2fsck_t ctx, char *label, float percent,
+			   unsigned int dpynum)
+{
+	static const char spinner[] = "\\|/-";
+	char buf[80];
+	int	i;
+	int	tick;
+	struct timeval	tv;
+	int dpywidth;
+
+	if (ctx->flags & E2F_FLAG_PROG_SUPPRESS)
+		return 0;
+
+	/*
+	 * Calculate the new progress position.  If the
+	 * percentage hasn't changed, then we skip out right
+	 * away. 
+	 */
+	if (ctx->progress_last_percent == (int) 10 * percent)
+		return 0;
+	ctx->progress_last_percent = (int) 10 * percent;
+
+	/*
+	 * If we've already updated the spinner once within
+	 * the last 1/8th of a second, no point doing it
+	 * again.
+	 */
+	gettimeofday(&tv, NULL);
+	tick = (tv.tv_sec << 3) + (tv.tv_usec / (1000000 / 8));
+	if ((tick == ctx->progress_last_time) &&
+	    (percent != 0.0) && (percent != 100.0))
+		return 0;
+	ctx->progress_last_time = tick;
+
+	/*
+	 * Advance the spinner, and note that the progress bar
+	 * will be on the screen
+	 */
+	ctx->progress_pos = (ctx->progress_pos+1) & 3;
+	ctx->flags |= E2F_FLAG_PROG_BAR;
+
+	dpywidth = 66 - strlen(label);
+	dpywidth = 8 * (dpywidth / 8);
+	if (dpynum)
+		dpywidth -= 8;
+
+	i = ((percent * dpywidth) + 50) / 100;
+	printf("%s: |%s%s", label, bar + (sizeof(bar) - (i+1)),
+	       spaces + (sizeof(spaces) - (dpywidth - i + 1)));
+	if (percent == 100.0)
+		fputc('|', stdout);
+	else
+		fputc(spinner[ctx->progress_pos & 3], stdout);
+	if (dpynum)
+		printf(" %4.1f%%  %u\r", percent, dpynum);
+	else
+		printf(" %4.1f%%   \r", percent);
+	
+	if (percent == 100.0)
+		e2fsck_clear_progbar(ctx);
+	fflush(stdout);
+
+	return 0;
+}
+
 static int e2fsck_update_progress(e2fsck_t ctx, int pass,
 				  unsigned long cur, unsigned long max)
 {
@@ -346,53 +411,9 @@
 		sprintf(buf, "%d %lu %lu\n", pass, cur, max);
 		write(ctx->progress_fd, buf, strlen(buf));
 	} else {
-		if (ctx->flags & E2F_FLAG_PROG_SUPPRESS)
-			return 0;
-		if (dpywidth == 0) {
-			dpywidth = 66 - strlen(ctx->device_name);
-			dpywidth = 8 * (dpywidth / 8);
-		}
-		/*
-		 * Calculate the new progress position.  If the
-		 * percentage hasn't changed, then we skip out right
-		 * away. 
-		 */
 		percent = calc_percent(&e2fsck_tbl, pass, cur, max);
-		if (ctx->progress_last_percent == (int) 10 * percent)
-			return 0;
-		ctx->progress_last_percent = (int) 10 * percent;
-
-		/*
-		 * If we've already updated the spinner once within
-		 * the last 1/8th of a second, no point doing it
-		 * again.
-		 */
-		gettimeofday(&tv, NULL);
-		tick = (tv.tv_sec << 3) + (tv.tv_usec / (1000000 / 8));
-		if ((tick == ctx->progress_last_time) &&
-		    (cur != max) && (cur != 0))
-			return 0;
-		ctx->progress_last_time = tick;
-
-		/*
-		 * Advance the spinner, and note that the progress bar
-		 * will be on the screen
-		 */
-		ctx->progress_pos = (ctx->progress_pos+1) & 3;
-		ctx->flags |= E2F_FLAG_PROG_BAR;
-		
-		i = ((percent * dpywidth) + 50) / 100;
-		printf("%s: |%s%s", ctx->device_name,
-		       bar + (sizeof(bar) - (i+1)),
-		       spaces + (sizeof(spaces) - (dpywidth - i + 1)));
-		if (percent == 100.0)
-			fputc('|', stdout);
-		else
-			fputc(spinner[ctx->progress_pos & 3], stdout);
-		printf(" %4.1f%%   \r", percent);
-		if (percent == 100.0)
-			e2fsck_clear_progbar(ctx);
-		fflush(stdout);
+		e2fsck_simple_progress(ctx, ctx->device_name,
+				       percent, 0);
 	}
 	return 0;
 }