ubi: Be more paranoid while seaching for the most recent Fastmap Since PEB erasure is asynchornous it can happen that there is more than one Fastmap on the MTD. This is fine because the attach logic will pick the Fastmap data structure with the highest sequence number. On a not so well configured MTD stack spurious ECC errors are common. Causes can be different, bad hardware, wrong operating modes, etc... If the most current Fastmap renders bad due to ECC errors UBI might pick an older Fastmap to attach from. While this can only happen on an anyway broken setup it will show completely different sympthoms and makes finding the root cause much more difficult. So, be debug friendly and fall back to scanning mode of we're facing an ECC error while scanning for Fastmap. Cc: <stable@vger.kernel.org> Signed-off-by: Richard Weinberger <richard@nod.at>

commit: 74f2c6e9a47cf4e508198c8594626cc82906a13d [log] [tgz]
author: Richard Weinberger <richard@nod.at> Tue Jun 14 10:12:17 2016 +0200
committer: Richard Weinberger <richard@nod.at> Fri Jul 29 23:32:54 2016 +0200
tree: 353246f510fcff72011013eacaa228b92b4a45db
parent: 5283ec72b0cca44ccea63070f6954fd8675b9ac4 [diff] [blame]
diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c
index bd6fc52..903becd 100644
--- a/drivers/mtd/ubi/attach.c
+++ b/drivers/mtd/ubi/attach.c

@@ -856,13 +856,15 @@
  * @ubi: UBI device description object
  * @ai: attaching information
  * @pnum: the physical eraseblock number
+ * @fast: true if we're scanning for a Fastmap
  *
  * This function reads UBI headers of PEB @pnum, checks them, and adds
  * information about this PEB to the corresponding list or RB-tree in the
  * "attaching info" structure. Returns zero if the physical eraseblock was
  * successfully handled and a negative error code in case of failure.
  */
-static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, int pnum)
+static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai,
+		    int pnum, bool fast)
 {
 	long long ec;
 	int err, bitflips = 0, vol_id = -1, ec_err = 0;
@@ -980,6 +982,20 @@
 			 */
 			ai->maybe_bad_peb_count += 1;
 	case UBI_IO_BAD_HDR:
+			/*
+			 * If we're facing a bad VID header we have to drop *all*
+			 * Fastmap data structures we find. The most recent Fastmap
+			 * could be bad and therefore there is a chance that we attach
+			 * from an old one. On a fine MTD stack a PEB must not render
+			 * bad all of a sudden, but the reality is different.
+			 * So, let's be paranoid and help finding the root cause by
+			 * falling back to scanning mode instead of attaching with a
+			 * bad EBA table and cause data corruption which is hard to
+			 * analyze.
+			 */
+			if (fast)
+				ai->force_full_scan = 1;
+
 		if (ec_err)
 			/*
 			 * Both headers are corrupted. There is a possibility
@@ -1293,7 +1309,7 @@
 		cond_resched();
 
 		dbg_gen("process PEB %d", pnum);
-		err = scan_peb(ubi, ai, pnum);
+		err = scan_peb(ubi, ai, pnum, false);
 		if (err < 0)
 			goto out_vidh;
 	}
@@ -1407,7 +1423,7 @@
 		cond_resched();
 
 		dbg_gen("process PEB %d", pnum);
-		err = scan_peb(ubi, scan_ai, pnum);
+		err = scan_peb(ubi, scan_ai, pnum, true);
 		if (err < 0)
 			goto out_vidh;
 	}
@@ -1415,7 +1431,11 @@
 	ubi_free_vid_hdr(ubi, vidh);
 	kfree(ech);
 
-	err = ubi_scan_fastmap(ubi, *ai, scan_ai);
+	if (scan_ai->force_full_scan)
+		err = UBI_NO_FASTMAP;
+	else
+		err = ubi_scan_fastmap(ubi, *ai, scan_ai);
+
 	if (err) {
 		/*
 		 * Didn't attach via fastmap, do a full scan but reuse what
commit	74f2c6e9a47cf4e508198c8594626cc82906a13d	[log] [tgz]
author	Richard Weinberger <richard@nod.at>	Tue Jun 14 10:12:17 2016 +0200
committer	Richard Weinberger <richard@nod.at>	Fri Jul 29 23:32:54 2016 +0200
tree	353246f510fcff72011013eacaa228b92b4a45db
parent	5283ec72b0cca44ccea63070f6954fd8675b9ac4 [diff] [blame]