USB: OHCI handles more ZFMicro quirks

The ZF Micro OHCI controller exhibits unexpected behavior that seems to be
related to high load.  Under certain conditions, the controller will
complete a TD, remove it from the endpoint's queue, and fail to add it to
the donelist. This causes the endpoint to appear to stop responding. Worse,
if the device is removed while in that state, OHCI will hang while waiting
for the orphaned TD to complete.  The situation is not recoverable without
rebooting.

This fix enhances the scope of the existing OHCI_QUIRK_ZFMICRO flag:

 1. A watchdog routine periodically scans the OHCI structures to check
    for orphaned TDs. In these cases the TD is taken back from the
    controller and completed normally.

 2. If a device is removed while the endpoint is hung but before the
    watchdog catches the situation, any outstanding TDs are taken back
    from the controller in the 'sanitize' phase.

The ohci-hcd driver used to print "INTR_SF lossage" in this situation;
this changes it to the universally accurate "ED unlink timeout".  Other
instances of this message presumably have different root causes.

Both this Compaq quirk and a NEC quirk are now properly compiled out for
non-PCI builds of this driver.

Signed-off-by: Mike Nuss <mike@terascala.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c
index 6edf409..d673cb9 100644
--- a/drivers/usb/host/ohci-hcd.c
+++ b/drivers/usb/host/ohci-hcd.c
@@ -81,7 +81,6 @@
 static int ohci_init (struct ohci_hcd *ohci);
 static void ohci_stop (struct usb_hcd *hcd);
 static int ohci_restart (struct ohci_hcd *ohci);
-static void ohci_quirk_nec_worker (struct work_struct *work);
 
 #include "ohci-hub.c"
 #include "ohci-dbg.c"
@@ -314,6 +313,8 @@
 	if (!HC_IS_RUNNING (hcd->state)) {
 sanitize:
 		ed->state = ED_IDLE;
+		if (quirk_zfmicro(ohci) && ed->type == PIPE_INTERRUPT)
+			ohci->eds_scheduled--;
 		finish_unlinks (ohci, 0);
 	}
 
@@ -321,7 +322,12 @@
 	case ED_UNLINK:		/* wait for hw to finish? */
 		/* major IRQ delivery trouble loses INTR_SF too... */
 		if (limit-- == 0) {
-			ohci_warn (ohci, "IRQ INTR_SF lossage\n");
+			ohci_warn(ohci, "ED unlink timeout\n");
+			if (quirk_zfmicro(ohci)) {
+				ohci_warn(ohci, "Attempting ZF TD recovery\n");
+				ohci->ed_to_check = ed;
+				ohci->zf_delay = 2;
+			}
 			goto sanitize;
 		}
 		spin_unlock_irqrestore (&ohci->lock, flags);
@@ -379,6 +385,93 @@
 	(void) ohci_readl (ohci, &ohci->regs->control);
 }
 
+static int check_ed(struct ohci_hcd *ohci, struct ed *ed)
+{
+	return (hc32_to_cpu(ohci, ed->hwINFO) & ED_IN) != 0
+		&& (hc32_to_cpu(ohci, ed->hwHeadP) & TD_MASK)
+			== (hc32_to_cpu(ohci, ed->hwTailP) & TD_MASK)
+		&& !list_empty(&ed->td_list);
+}
+
+/* ZF Micro watchdog timer callback. The ZF Micro chipset sometimes completes
+ * an interrupt TD but neglects to add it to the donelist.  On systems with
+ * this chipset, we need to periodically check the state of the queues to look
+ * for such "lost" TDs.
+ */
+static void unlink_watchdog_func(unsigned long _ohci)
+{
+	long		flags;
+	unsigned	max;
+	unsigned	seen_count = 0;
+	unsigned	i;
+	struct ed	**seen = NULL;
+	struct ohci_hcd	*ohci = (struct ohci_hcd *) _ohci;
+
+	spin_lock_irqsave(&ohci->lock, flags);
+	max = ohci->eds_scheduled;
+	if (!max)
+		goto done;
+
+	if (ohci->ed_to_check)
+		goto out;
+
+	seen = kcalloc(max, sizeof *seen, GFP_ATOMIC);
+	if (!seen)
+		goto out;
+
+	for (i = 0; i < NUM_INTS; i++) {
+		struct ed	*ed = ohci->periodic[i];
+
+		while (ed) {
+			unsigned	temp;
+
+			/* scan this branch of the periodic schedule tree */
+			for (temp = 0; temp < seen_count; temp++) {
+				if (seen[temp] == ed) {
+					/* we've checked it and what's after */
+					ed = NULL;
+					break;
+				}
+			}
+			if (!ed)
+				break;
+			seen[seen_count++] = ed;
+			if (!check_ed(ohci, ed)) {
+				ed = ed->ed_next;
+				continue;
+			}
+
+			/* HC's TD list is empty, but HCD sees at least one
+			 * TD that's not been sent through the donelist.
+			 */
+			ohci->ed_to_check = ed;
+			ohci->zf_delay = 2;
+
+			/* The HC may wait until the next frame to report the
+			 * TD as done through the donelist and INTR_WDH.  (We
+			 * just *assume* it's not a multi-TD interrupt URB;
+			 * those could defer the IRQ more than one frame, using
+			 * DI...)  Check again after the next INTR_SF.
+			 */
+			ohci_writel(ohci, OHCI_INTR_SF,
+					&ohci->regs->intrstatus);
+			ohci_writel(ohci, OHCI_INTR_SF,
+					&ohci->regs->intrenable);
+
+			/* flush those writes */
+			(void) ohci_readl(ohci, &ohci->regs->control);
+
+			goto out;
+		}
+	}
+out:
+	kfree(seen);
+	if (ohci->eds_scheduled)
+		mod_timer(&ohci->unlink_watchdog, round_jiffies_relative(HZ));
+done:
+	spin_unlock_irqrestore(&ohci->lock, flags);
+}
+
 /*-------------------------------------------------------------------------*
  * HC functions
  *-------------------------------------------------------------------------*/
@@ -616,6 +709,15 @@
 	mdelay ((temp >> 23) & 0x1fe);
 	hcd->state = HC_STATE_RUNNING;
 
+	if (quirk_zfmicro(ohci)) {
+		/* Create timer to watch for bad queue state on ZF Micro */
+		setup_timer(&ohci->unlink_watchdog, unlink_watchdog_func,
+				(unsigned long) ohci);
+
+		ohci->eds_scheduled = 0;
+		ohci->ed_to_check = NULL;
+	}
+
 	ohci_dump (ohci, 1);
 
 	return 0;
@@ -629,10 +731,11 @@
 {
 	struct ohci_hcd		*ohci = hcd_to_ohci (hcd);
 	struct ohci_regs __iomem *regs = ohci->regs;
- 	int			ints; 
+	int			ints;
 
 	/* we can eliminate a (slow) ohci_readl()
-	   if _only_ WDH caused this irq */
+	 * if _only_ WDH caused this irq
+	 */
 	if ((ohci->hcca->done_head != 0)
 			&& ! (hc32_to_cpup (ohci, &ohci->hcca->done_head)
 				& 0x01)) {
@@ -651,7 +754,7 @@
 
 	if (ints & OHCI_INTR_UE) {
 		// e.g. due to PCI Master/Target Abort
-		if (ohci->flags & OHCI_QUIRK_NEC) {
+		if (quirk_nec(ohci)) {
 			/* Workaround for a silicon bug in some NEC chips used
 			 * in Apple's PowerBooks. Adapted from Darwin code.
 			 */
@@ -713,6 +816,31 @@
 			ohci_writel (ohci, OHCI_INTR_WDH, &regs->intrenable);
 	}
 
+	if (quirk_zfmicro(ohci) && (ints & OHCI_INTR_SF)) {
+		spin_lock(&ohci->lock);
+		if (ohci->ed_to_check) {
+			struct ed *ed = ohci->ed_to_check;
+
+			if (check_ed(ohci, ed)) {
+				/* HC thinks the TD list is empty; HCD knows
+				 * at least one TD is outstanding
+				 */
+				if (--ohci->zf_delay == 0) {
+					struct td *td = list_entry(
+						ed->td_list.next,
+						struct td, td_list);
+					ohci_warn(ohci,
+						  "Reclaiming orphan TD %p\n",
+						  td);
+					takeback_td(ohci, td);
+					ohci->ed_to_check = NULL;
+				}
+			} else
+				ohci->ed_to_check = NULL;
+		}
+		spin_unlock(&ohci->lock);
+	}
+
 	/* could track INTR_SO to reduce available PCI/... bandwidth */
 
 	/* handle any pending URB/ED unlinks, leaving INTR_SF enabled
@@ -721,7 +849,9 @@
 	spin_lock (&ohci->lock);
 	if (ohci->ed_rm_list)
 		finish_unlinks (ohci, ohci_frame_no(ohci));
-	if ((ints & OHCI_INTR_SF) != 0 && !ohci->ed_rm_list
+	if ((ints & OHCI_INTR_SF) != 0
+			&& !ohci->ed_rm_list
+			&& !ohci->ed_to_check
 			&& HC_IS_RUNNING(hcd->state))
 		ohci_writel (ohci, OHCI_INTR_SF, &regs->intrdisable);
 	spin_unlock (&ohci->lock);
@@ -751,6 +881,9 @@
 	free_irq(hcd->irq, hcd);
 	hcd->irq = -1;
 
+	if (quirk_zfmicro(ohci))
+		del_timer(&ohci->unlink_watchdog);
+
 	remove_debug_files (ohci);
 	ohci_mem_cleanup (ohci);
 	if (ohci->hcca) {
@@ -828,27 +961,6 @@
 
 /*-------------------------------------------------------------------------*/
 
-/* NEC workaround */
-static void ohci_quirk_nec_worker(struct work_struct *work)
-{
-	struct ohci_hcd *ohci = container_of(work, struct ohci_hcd, nec_work);
-	int status;
-
-	status = ohci_init(ohci);
-	if (status != 0) {
-		ohci_err(ohci, "Restarting NEC controller failed "
-			 "in ohci_init, %d\n", status);
-		return;
-	}
-
-	status = ohci_restart(ohci);
-	if (status != 0)
-		ohci_err(ohci, "Restarting NEC controller failed "
-			 "in ohci_restart, %d\n", status);
-}
-
-/*-------------------------------------------------------------------------*/
-
 #define DRIVER_INFO DRIVER_VERSION " " DRIVER_DESC
 
 MODULE_AUTHOR (DRIVER_AUTHOR);