s390/dasd: add safe offline interface

The regular behavior of the DASD device driver when setting a device
offline is to return all outstanding I/O as failed. This behavior is
different from that of other System z operating systems and may lead
to unexpected data loss. Adding an explicit 'safe' offline function
will allow customers to use DASDs in the way they expect them to work.

Signed-off-by: Stefan Haberland <stefan.haberland@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 0595c76..29225e1c 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -349,6 +349,16 @@
 	return rc;
 }
 
+static inline
+int _wait_for_empty_queues(struct dasd_device *device)
+{
+	if (device->block)
+		return list_empty(&device->ccw_queue) &&
+			list_empty(&device->block->ccw_queue);
+	else
+		return list_empty(&device->ccw_queue);
+}
+
 /*
  * Remove device from block device layer. Destroy dirty buffers.
  * Forget format information. Check if the target level is basic
@@ -1841,6 +1851,13 @@
 	cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, devlist);
 	if ((cqr->status == DASD_CQR_IN_IO && cqr->expires != 0) &&
 	    (time_after_eq(jiffies, cqr->expires + cqr->starttime))) {
+		if (test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
+			/*
+			 * IO in safe offline processing should not
+			 * run out of retries
+			 */
+			cqr->retries++;
+		}
 		if (device->discipline->term_IO(cqr) != 0) {
 			/* Hmpf, try again in 5 sec */
 			dev_err(&device->cdev->dev,
@@ -3024,11 +3041,11 @@
 
 	cdev->handler = NULL;
 
-	dasd_remove_sysfs_files(cdev);
 	device = dasd_device_from_cdev(cdev);
 	if (IS_ERR(device))
 		return;
-	if (test_and_set_bit(DASD_FLAG_OFFLINE, &device->flags)) {
+	if (test_and_set_bit(DASD_FLAG_OFFLINE, &device->flags) &&
+	    !test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
 		/* Already doing offline processing */
 		dasd_put_device(device);
 		return;
@@ -3048,6 +3065,8 @@
 	 */
 	if (block)
 		dasd_free_block(block);
+
+	dasd_remove_sysfs_files(cdev);
 }
 
 /*
@@ -3126,16 +3145,13 @@
 {
 	struct dasd_device *device;
 	struct dasd_block *block;
-	int max_count, open_count;
+	int max_count, open_count, rc;
 
+	rc = 0;
 	device = dasd_device_from_cdev(cdev);
 	if (IS_ERR(device))
 		return PTR_ERR(device);
-	if (test_and_set_bit(DASD_FLAG_OFFLINE, &device->flags)) {
-		/* Already doing offline processing */
-		dasd_put_device(device);
-		return 0;
-	}
+
 	/*
 	 * We must make sure that this device is currently not in use.
 	 * The open_count is increased for every opener, that includes
@@ -3159,6 +3175,54 @@
 			return -EBUSY;
 		}
 	}
+
+	if (test_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
+		/*
+		 * safe offline allready running
+		 * could only be called by normal offline so safe_offline flag
+		 * needs to be removed to run normal offline and kill all I/O
+		 */
+		if (test_and_set_bit(DASD_FLAG_OFFLINE, &device->flags)) {
+			/* Already doing normal offline processing */
+			dasd_put_device(device);
+			return -EBUSY;
+		} else
+			clear_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags);
+
+	} else
+		if (test_bit(DASD_FLAG_OFFLINE, &device->flags)) {
+			/* Already doing offline processing */
+			dasd_put_device(device);
+			return -EBUSY;
+		}
+
+	/*
+	 * if safe_offline called set safe_offline_running flag and
+	 * clear safe_offline so that a call to normal offline
+	 * can overrun safe_offline processing
+	 */
+	if (test_and_clear_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags) &&
+	    !test_and_set_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags)) {
+		/*
+		 * If we want to set the device safe offline all IO operations
+		 * should be finished before continuing the offline process
+		 * so sync bdev first and then wait for our queues to become
+		 * empty
+		 */
+		/* sync blockdev and partitions */
+		rc = fsync_bdev(device->block->bdev);
+		if (rc != 0)
+			goto interrupted;
+
+		/* schedule device tasklet and wait for completion */
+		dasd_schedule_device_bh(device);
+		rc = wait_event_interruptible(shutdown_waitq,
+					      _wait_for_empty_queues(device));
+		if (rc != 0)
+			goto interrupted;
+	}
+
+	set_bit(DASD_FLAG_OFFLINE, &device->flags);
 	dasd_set_target_state(device, DASD_STATE_NEW);
 	/* dasd_delete_device destroys the device reference. */
 	block = device->block;
@@ -3170,6 +3234,14 @@
 	if (block)
 		dasd_free_block(block);
 	return 0;
+
+interrupted:
+	/* interrupted by signal */
+	clear_bit(DASD_FLAG_SAFE_OFFLINE, &device->flags);
+	clear_bit(DASD_FLAG_SAFE_OFFLINE_RUNNING, &device->flags);
+	clear_bit(DASD_FLAG_OFFLINE, &device->flags);
+	dasd_put_device(device);
+	return rc;
 }
 
 int dasd_generic_last_path_gone(struct dasd_device *device)
@@ -3489,15 +3561,6 @@
 }
 EXPORT_SYMBOL_GPL(dasd_get_sense);
 
-static inline int _wait_for_empty_queues(struct dasd_device *device)
-{
-	if (device->block)
-		return list_empty(&device->ccw_queue) &&
-			list_empty(&device->block->ccw_queue);
-	else
-		return list_empty(&device->ccw_queue);
-}
-
 void dasd_generic_shutdown(struct ccw_device *cdev)
 {
 	struct dasd_device *device;