[libata] Improve timeout handling On a timeout call a device specific handler early in the recovery so that we can complete and process successful commands which timed out due to IRQ loss or the like rather more elegantly. [Revised to exclude the timeout handling on a few devices that inherit from SFF but are not SFF enough to use the default timeout handler] Signed-off-by: Alan Cox <alan@redhat.com> Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

commit: c96f1732e25362d10ee7bcac1df8412a2e6b7d23 [log] [tgz]
author: Alan Cox <alan@redhat.com> Tue Mar 24 10:23:46 2009 +0000
committer: Jeff Garzik <jgarzik@redhat.com> Tue Mar 24 22:52:39 2009 -0400
tree: 66e24eddb174d6751579ec5952f72cbbac0fb038
parent: 3d47aa8e7e7b2aa09256590388aa8dddc79280f9 [diff] [blame]
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index ea89091..0183131 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c

@@ -547,7 +547,7 @@
 
 	/* For new EH, all qcs are finished in one of three ways -
 	 * normal completion, error completion, and SCSI timeout.
-	 * Both cmpletions can race against SCSI timeout.  When normal
+	 * Both completions can race against SCSI timeout.  When normal
 	 * completion wins, the qc never reaches EH.  When error
 	 * completion wins, the qc has ATA_QCFLAG_FAILED set.
 	 *
@@ -562,7 +562,19 @@
 		int nr_timedout = 0;
 
 		spin_lock_irqsave(ap->lock, flags);
+		
+		/* This must occur under the ap->lock as we don't want
+		   a polled recovery to race the real interrupt handler
+		   
+		   The lost_interrupt handler checks for any completed but
+		   non-notified command and completes much like an IRQ handler.
+		   
+		   We then fall into the error recovery code which will treat
+		   this as if normal completion won the race */
 
+		if (ap->ops->lost_interrupt)
+			ap->ops->lost_interrupt(ap);
+			
 		list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) {
 			struct ata_queued_cmd *qc;
 
@@ -606,6 +618,9 @@
 		ap->eh_tries = ATA_EH_MAX_TRIES;
 	} else
 		spin_unlock_wait(ap->lock);
+		
+	/* If we timed raced normal completion and there is nothing to
+	   recover nr_timedout == 0 why exactly are we doing error recovery ? */
 
  repeat:
 	/* invoke error handler */
commit	c96f1732e25362d10ee7bcac1df8412a2e6b7d23	[log] [tgz]
author	Alan Cox <alan@redhat.com>	Tue Mar 24 10:23:46 2009 +0000
committer	Jeff Garzik <jgarzik@redhat.com>	Tue Mar 24 22:52:39 2009 -0400
tree	66e24eddb174d6751579ec5952f72cbbac0fb038
parent	3d47aa8e7e7b2aa09256590388aa8dddc79280f9 [diff] [blame]