mtd: nand: add generic READ RETRY support

Modern MLC (and even SLC?) NAND can experience a large number of
bitflips (beyond the recommended correctability capacity) due to drifts
in the voltage threshold (Vt). These bitflips can cause ECC errors to
occur well within the expected lifetime of the flash. To account for
this, some manufacturers provide a mechanism for shifting the Vt
threshold after a corrupted read.

The generic pattern seems to be that a particular flash has N read retry
modes (where N = 0, traditionally), and after an ECC failure, the host
should reconfigure the flash to use the next available mode, then retry
the read operation. This process repeats until all bitfips can be
corrected or until the host has tried all available retry modes.

This patch adds the infrastructure support for a
vendor-specific/flash-specific callback, used for setting the read-retry
mode (i.e., voltage threshold).

For now, this patch always returns the flash to mode 0 (the default
mode) after a successful read-retry, according to the flowchart found in
Micron's datasheets. This may need to change in the future if it is
determined that eventually, mode 0 is insufficient for the majority of
the flash cells (and so for performance reasons, we should leave the
flash in mode 1, 2, etc.).

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Acked-by: Huang Shijie <b32955@freescale.com>
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index b5c3768..4c2f39f 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -1410,6 +1410,30 @@
 }
 
 /**
+ * nand_setup_read_retry - [INTERN] Set the READ RETRY mode
+ * @mtd: MTD device structure
+ * @retry_mode: the retry mode to use
+ *
+ * Some vendors supply a special command to shift the Vt threshold, to be used
+ * when there are too many bitflips in a page (i.e., ECC error). After setting
+ * a new threshold, the host should retry reading the page.
+ */
+static int nand_setup_read_retry(struct mtd_info *mtd, int retry_mode)
+{
+	struct nand_chip *chip = mtd->priv;
+
+	pr_debug("setting READ RETRY mode %d\n", retry_mode);
+
+	if (retry_mode >= chip->read_retries)
+		return -EINVAL;
+
+	if (!chip->setup_read_retry)
+		return -EOPNOTSUPP;
+
+	return chip->setup_read_retry(mtd, retry_mode);
+}
+
+/**
  * nand_do_read_ops - [INTERN] Read data with ECC
  * @mtd: MTD device structure
  * @from: offset to read from
@@ -1430,6 +1454,7 @@
 
 	uint8_t *bufpoi, *oob, *buf;
 	unsigned int max_bitflips = 0;
+	int retry_mode = 0;
 	bool ecc_fail = false;
 
 	chipnr = (int)(from >> chip->chip_shift);
@@ -1454,6 +1479,7 @@
 		if (realpage != chip->pagebuf || oob) {
 			bufpoi = aligned ? buf : chip->buffers->databuf;
 
+read_retry:
 			chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page);
 
 			/*
@@ -1494,8 +1520,6 @@
 				memcpy(buf, chip->buffers->databuf + col, bytes);
 			}
 
-			buf += bytes;
-
 			if (unlikely(oob)) {
 				int toread = min(oobreadlen, max_oobsize);
 
@@ -1514,8 +1538,24 @@
 					nand_wait_ready(mtd);
 			}
 
-			if (mtd->ecc_stats.failed - ecc_failures)
-				ecc_fail = true;
+			if (mtd->ecc_stats.failed - ecc_failures) {
+				if (retry_mode + 1 <= chip->read_retries) {
+					retry_mode++;
+					ret = nand_setup_read_retry(mtd,
+							retry_mode);
+					if (ret < 0)
+						break;
+
+					/* Reset failures; retry */
+					mtd->ecc_stats.failed = ecc_failures;
+					goto read_retry;
+				} else {
+					/* No more retry modes; real failure */
+					ecc_fail = true;
+				}
+			}
+
+			buf += bytes;
 		} else {
 			memcpy(buf, chip->buffers->databuf + col, bytes);
 			buf += bytes;
@@ -1525,6 +1565,14 @@
 
 		readlen -= bytes;
 
+		/* Reset to retry mode 0 */
+		if (retry_mode) {
+			ret = nand_setup_read_retry(mtd, 0);
+			if (ret < 0)
+				break;
+			retry_mode = 0;
+		}
+
 		if (!readlen)
 			break;
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 029fe59..267d61d 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -472,6 +472,8 @@
  *			commands to the chip.
  * @waitfunc:		[REPLACEABLE] hardwarespecific function for wait on
  *			ready.
+ * @setup_read_retry:	[FLASHSPECIFIC] flash (vendor) specific function for
+ *			setting the read-retry mode. Mostly needed for MLC NAND.
  * @ecc:		[BOARDSPECIFIC] ECC control structure
  * @buffers:		buffer structure for read/write
  * @hwcontrol:		platform-specific hardware control structure
@@ -518,6 +520,7 @@
  *			non 0 if ONFI supported.
  * @onfi_params:	[INTERN] holds the ONFI page parameter when ONFI is
  *			supported, 0 otherwise.
+ * @read_retries:	[INTERN] the number of read retry modes supported
  * @onfi_set_features:	[REPLACEABLE] set the features for ONFI nand
  * @onfi_get_features:	[REPLACEABLE] get the features for ONFI nand
  * @bbt:		[INTERN] bad block table pointer
@@ -565,6 +568,7 @@
 			int feature_addr, uint8_t *subfeature_para);
 	int (*onfi_get_features)(struct mtd_info *mtd, struct nand_chip *chip,
 			int feature_addr, uint8_t *subfeature_para);
+	int (*setup_read_retry)(struct mtd_info *mtd, int retry_mode);
 
 	int chip_delay;
 	unsigned int options;
@@ -589,6 +593,8 @@
 	int onfi_version;
 	struct nand_onfi_params	onfi_params;
 
+	int read_retries;
+
 	flstate_t state;
 
 	uint8_t *oob_poi;