[S390] dasd: improve error recovery for internal I/O

Most of the error conditions reported by a FICON storage server
indicate situations which can be recovered. Sometimes the host just
needs to retry an I/O request, but sometimes the recovery
is more complex and requires the device driver to wait, choose
a different path, etc.

The DASD device driver has a fully featured error recovery
for normal block layer I/O, but not for internal I/O request which
are for example used during the device bring up.
This can lead to situations where the IPL of a system fails because
DASD devices are not properly recognized.
This patch will extend the internal I/O handling to use the existing
error recovery procedures.

Signed-off-by: Stefan Weinhuber <wein@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c
index 316eb12..44796ba 100644
--- a/drivers/s390/block/dasd_3990_erp.c
+++ b/drivers/s390/block/dasd_3990_erp.c
@@ -69,8 +69,7 @@
  *   processing until the started timer has expired or an related
  *   interrupt was received.
  */
-static void
-dasd_3990_erp_block_queue(struct dasd_ccw_req * erp, int expires)
+static void dasd_3990_erp_block_queue(struct dasd_ccw_req *erp, int expires)
 {
 
 	struct dasd_device *device = erp->startdev;
@@ -80,10 +79,13 @@
 		    "blocking request queue for %is", expires/HZ);
 
 	spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
-	device->stopped |= DASD_STOPPED_PENDING;
+	dasd_device_set_stop_bits(device, DASD_STOPPED_PENDING);
 	spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
 	erp->status = DASD_CQR_FILLED;
-	dasd_block_set_timer(device->block, expires);
+	if (erp->block)
+		dasd_block_set_timer(erp->block, expires);
+	else
+		dasd_device_set_timer(device, expires);
 }
 
 /*
@@ -242,9 +244,13 @@
  * DESCRIPTION
  *   Setup ERP to do the ERP action 1 (see Reference manual).
  *   Repeat the operation on a different channel path.
- *   If all alternate paths have been tried, the request is posted with a
- *   permanent error.
- *   Note: duplex handling is not implemented (yet).
+ *   As deviation from the recommended recovery action, we reset the path mask
+ *   after we have tried each path and go through all paths a second time.
+ *   This will cover situations where only one path at a time is actually down,
+ *   but all paths fail and recover just with the same sequence and timing as
+ *   we try to use them (flapping links).
+ *   If all alternate paths have been tried twice, the request is posted with
+ *   a permanent error.
  *
  *  PARAMETER
  *   erp		pointer to the current ERP
@@ -253,17 +259,25 @@
  *   erp		pointer to the ERP
  *
  */
-static struct dasd_ccw_req *
-dasd_3990_erp_action_1(struct dasd_ccw_req * erp)
+static struct dasd_ccw_req *dasd_3990_erp_action_1_sec(struct dasd_ccw_req *erp)
 {
-
-	erp->function = dasd_3990_erp_action_1;
-
+	erp->function = dasd_3990_erp_action_1_sec;
 	dasd_3990_erp_alternate_path(erp);
-
 	return erp;
+}
 
-}				/* end dasd_3990_erp_action_1 */
+static struct dasd_ccw_req *dasd_3990_erp_action_1(struct dasd_ccw_req *erp)
+{
+	erp->function = dasd_3990_erp_action_1;
+	dasd_3990_erp_alternate_path(erp);
+	if (erp->status == DASD_CQR_FAILED) {
+		erp->status = DASD_CQR_FILLED;
+		erp->retries = 10;
+		erp->lpm = LPM_ANYPATH;
+		erp->function = dasd_3990_erp_action_1_sec;
+	}
+	return erp;
+}				/* end dasd_3990_erp_action_1(b) */
 
 /*
  * DASD_3990_ERP_ACTION_4
@@ -2294,6 +2308,7 @@
 		return cqr;
 	}
 
+	ccw = cqr->cpaddr;
 	if (cqr->cpmode == 1) {
 		/* make a shallow copy of the original tcw but set new tsb */
 		erp->cpmode = 1;
@@ -2302,6 +2317,9 @@
 		tsb = (struct tsb *) &tcw[1];
 		*tcw = *((struct tcw *)cqr->cpaddr);
 		tcw->tsb = (long)tsb;
+	} else if (ccw->cmd_code == DASD_ECKD_CCW_PSF) {
+		/* PSF cannot be chained from NOOP/TIC */
+		erp->cpaddr = cqr->cpaddr;
 	} else {
 		/* initialize request with default TIC to current ERP/CQR */
 		ccw = erp->cpaddr;
@@ -2486,6 +2504,8 @@
 
 		erp = dasd_3990_erp_action_1(erp);
 
+	} else if (erp->function == dasd_3990_erp_action_1_sec) {
+		erp = dasd_3990_erp_action_1_sec(erp);
 	} else if (erp->function == dasd_3990_erp_action_5) {
 
 		/* retries have not been successful */