ncr5380: Fix EH during arbitration and selection

During arbitration and selection, the relevant command is invisible to
exception handlers and can be found only in a pointer on the stack of a
different thread.

When eh_abort_handler can't find a given command, it can't decide whether
that command was completed already or is still in arbitration or selection
phase. But it must return either SUCCESS (e.g. command completed earlier)
or FAILED (could not abort the nexus, try bus reset).

The solution is to make sure all commands belonging to the LLD are always
visible to exception handlers. Add another scsi_cmnd pointer to the
hostdata struct to track the command in arbitration or selection phase.

Replace 'retain_dma_irq' with the new 'selecting' pointer, to bring
atari_NCR5380.c into line with NCR5380.c.

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Tested-by: Ondrej Zary <linux@rainbow-software.org>
Tested-by: Michael Schmitz <schmitzmic@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c
index d24852b..32355b6 100644
--- a/drivers/scsi/NCR5380.c
+++ b/drivers/scsi/NCR5380.c
@@ -912,9 +912,9 @@
 			 * entire unit.
 			 */
 
-			if (!NCR5380_select(instance, cmd)) {
-				dsprintk(NDEBUG_MAIN, instance, "main: selected target %d for command %p\n",
-				         scmd_id(cmd), cmd);
+			cmd = NCR5380_select(instance, cmd);
+			if (!cmd) {
+				dsprintk(NDEBUG_MAIN, instance, "main: select complete\n");
 			} else {
 				dsprintk(NDEBUG_MAIN | NDEBUG_QUEUES, instance,
 				         "main: select failed, returning %p to queue\n", cmd);
@@ -1061,9 +1061,9 @@
  * Inputs : instance - instantiation of the 5380 driver on which this 
  *      target lives, cmd - SCSI command to execute.
  * 
- * Returns : -1 if selection failed but should be retried.
- *      0 if selection failed and should not be retried.
- *      0 if selection succeeded completely (hostdata->connected == cmd).
+ * Returns cmd if selection failed but should be retried,
+ * NULL if selection failed and should not be retried, or
+ * NULL if selection succeeded (hostdata->connected == cmd).
  *
  * Side effects : 
  *      If bus busy, arbitration failed, etc, NCR5380_select() will exit 
@@ -1081,7 +1081,8 @@
  *	Locks: caller holds hostdata lock in IRQ mode
  */
  
-static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
+static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance,
+                                        struct scsi_cmnd *cmd)
 {
 	struct NCR5380_hostdata *hostdata = shost_priv(instance);
 	unsigned char tmp[3], phase;
@@ -1092,6 +1093,15 @@
 	NCR5380_dprint(NDEBUG_ARBITRATION, instance);
 	dprintk(NDEBUG_ARBITRATION, "scsi%d : starting arbitration, id = %d\n", instance->host_no, instance->this_id);
 
+	/*
+	 * Arbitration and selection phases are slow and involve dropping the
+	 * lock, so we have to watch out for EH. An exception handler may
+	 * change 'selecting' to NULL. This function will then return NULL
+	 * so that the caller will forget about 'cmd'. (During information
+	 * transfer phases, EH may change 'connected' to NULL.)
+	 */
+	hostdata->selecting = cmd;
+
 	/* 
 	 * Set the phase bits to 0, otherwise the NCR5380 won't drive the 
 	 * data bus during SELECTION.
@@ -1117,13 +1127,13 @@
 	spin_lock_irq(&hostdata->lock);
 	if (!(NCR5380_read(MODE_REG) & MR_ARBITRATE)) {
 		/* Reselection interrupt */
-		return -1;
+		goto out;
 	}
 	if (err < 0) {
 		NCR5380_write(MODE_REG, MR_BASE);
 		shost_printk(KERN_ERR, instance,
 		             "select: arbitration timeout\n");
-		return -1;
+		goto out;
 	}
 	spin_unlock_irq(&hostdata->lock);
 
@@ -1135,7 +1145,7 @@
 		NCR5380_write(MODE_REG, MR_BASE);
 		dprintk(NDEBUG_ARBITRATION, "scsi%d : lost arbitration, deasserting MR_ARBITRATE\n", instance->host_no);
 		spin_lock_irq(&hostdata->lock);
-		return -1;
+		goto out;
 	}
 
 	/* After/during arbitration, BSY should be asserted.
@@ -1159,7 +1169,13 @@
 
 	/* NCR5380_reselect() clears MODE_REG after a reselection interrupt */
 	if (!(NCR5380_read(MODE_REG) & MR_ARBITRATE))
-		return -1;
+		goto out;
+
+	if (!hostdata->selecting) {
+		NCR5380_write(MODE_REG, MR_BASE);
+		NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+		goto out;
+	}
 
 	dprintk(NDEBUG_ARBITRATION, "scsi%d : won arbitration\n", instance->host_no);
 
@@ -1232,18 +1248,21 @@
 		if (!hostdata->connected)
 			NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
 		printk("scsi%d : reselection after won arbitration?\n", instance->host_no);
-		return -1;
+		goto out;
 	}
 
 	if (err < 0) {
 		spin_lock_irq(&hostdata->lock);
 		NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-		cmd->result = DID_BAD_TARGET << 16;
-		complete_cmd(instance, cmd);
 		NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-		dprintk(NDEBUG_SELECTION, "scsi%d : target did not respond within 250ms\n",
-		        instance->host_no);
-		return 0;
+		/* Can't touch cmd if it has been reclaimed by the scsi ML */
+		if (hostdata->selecting) {
+			cmd->result = DID_BAD_TARGET << 16;
+			complete_cmd(instance, cmd);
+			dsprintk(NDEBUG_SELECTION, instance, "target did not respond within 250ms\n");
+			cmd = NULL;
+		}
+		goto out;
 	}
 
 	/* 
@@ -1279,7 +1298,11 @@
 		shost_printk(KERN_ERR, instance, "select: REQ timeout\n");
 		NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
 		NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-		return -1;
+		goto out;
+	}
+	if (!hostdata->selecting) {
+		do_abort(instance);
+		goto out;
 	}
 
 	dprintk(NDEBUG_SELECTION, "scsi%d : target %d selected, going into MESSAGE OUT phase.\n", instance->host_no, cmd->device->id);
@@ -1300,7 +1323,13 @@
 
 	initialize_SCp(cmd);
 
-	return 0;
+	cmd = NULL;
+
+out:
+	if (!hostdata->selecting)
+		return NULL;
+	hostdata->selecting = NULL;
+	return cmd;
 }
 
 /* 
@@ -2352,6 +2381,15 @@
 		cmd->scsi_done(cmd); /* No tag or busy flag to worry about */
 	}
 
+	if (hostdata->selecting == cmd) {
+		dsprintk(NDEBUG_ABORT, instance,
+		         "abort: cmd %p == selecting\n", cmd);
+		hostdata->selecting = NULL;
+		cmd->result = DID_ABORT << 16;
+		complete_cmd(instance, cmd);
+		goto out;
+	}
+
 	if (list_del_cmd(&hostdata->disconnected, cmd)) {
 		dsprintk(NDEBUG_ABORT, instance,
 		         "abort: removed %p from disconnected list\n", cmd);