USB: EHCI: improve end_unlink_async()

This patch (as1665) changes the way ehci-hcd's end_unlink_async()
routine works in order to avoid recursive execution and to be more
efficient:

	Now when an IAA cycle ends, a new one gets started up right
	away (if it is needed) instead of waiting until the
	just-unlinked QH has been processed.

	The async_iaa list is renamed to async_idle, which better
	expresses its new purpose: It is now the list of QHs which are
	now completely idle and are waiting to be processed by
	end_unlink_async().

	A new flag is added to track whether an IAA cycle is in
	progress, because the list formerly known as async_iaa no
	longer stores the QHs waiting for the IAA to finish.

	The decision about how many QHs to process when an IAA cycle
	ends is now made at the end of the cycle, when we know the
	current state of the hardware, rather than at the beginning.
	This means a bunch of logic got moved from start_iaa_cycle()
	to end_unlink_async().

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index b32323c..037a472 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -483,7 +483,7 @@
 	 */
 	ehci->periodic_size = DEFAULT_I_TDPS;
 	INIT_LIST_HEAD(&ehci->async_unlink);
-	INIT_LIST_HEAD(&ehci->async_iaa);
+	INIT_LIST_HEAD(&ehci->async_idle);
 	INIT_LIST_HEAD(&ehci->intr_unlink);
 	INIT_LIST_HEAD(&ehci->intr_qh_list);
 	INIT_LIST_HEAD(&ehci->cached_itd_list);
@@ -752,7 +752,7 @@
 		/* guard against (alleged) silicon errata */
 		if (cmd & CMD_IAAD)
 			ehci_dbg(ehci, "IAA with IAAD still set?\n");
-		if (!list_empty(&ehci->async_iaa))
+		if (ehci->iaa_in_progress)
 			COUNT(ehci->stats.iaa);
 		end_unlink_async(ehci);
 	}
diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c
index 4a01367..820583b 100644
--- a/drivers/usb/host/ehci-q.c
+++ b/drivers/usb/host/ehci-q.c
@@ -960,7 +960,7 @@
 
 	/* The async schedule and unlink lists are supposed to be empty */
 	WARN_ON(ehci->async->qh_next.qh || !list_empty(&ehci->async_unlink) ||
-			!list_empty(&ehci->async_iaa));
+			!list_empty(&ehci->async_idle));
 
 	/* Don't turn off the schedule until ASS is 1 */
 	ehci_poll_ASS(ehci);
@@ -1164,41 +1164,19 @@
 		ehci->qh_scan_next = qh->qh_next.qh;
 }
 
-static void start_iaa_cycle(struct ehci_hcd *ehci, bool nested)
+static void start_iaa_cycle(struct ehci_hcd *ehci)
 {
-	/*
-	 * Do nothing if an IAA cycle is already running or
-	 * if one will be started shortly.
-	 */
-	if (!list_empty(&ehci->async_iaa) || ehci->async_unlinking)
+	/* Do nothing if an IAA cycle is already running */
+	if (ehci->iaa_in_progress)
 		return;
+	ehci->iaa_in_progress = true;
 
 	/* If the controller isn't running, we don't have to wait for it */
 	if (unlikely(ehci->rh_state < EHCI_RH_RUNNING)) {
-
-		/* Do all the waiting QHs */
-		list_splice_tail_init(&ehci->async_unlink, &ehci->async_iaa);
-
-		if (!nested)		/* Avoid recursion */
-			end_unlink_async(ehci);
+		end_unlink_async(ehci);
 
 	/* Otherwise start a new IAA cycle */
 	} else if (likely(ehci->rh_state == EHCI_RH_RUNNING)) {
-		struct ehci_qh		*qh;
-
-		/* Do only the first waiting QH (nVidia bug?) */
-		qh = list_first_entry(&ehci->async_unlink, struct ehci_qh,
-				unlink_node);
-
-		/*
-		 * Intel (?) bug: The HC can write back the overlay region
-		 * even after the IAA interrupt occurs.  In self-defense,
-		 * always go through two IAA cycles for each QH.
-		 */
-		if (qh->qh_state == QH_STATE_UNLINK_WAIT)
-			qh->qh_state = QH_STATE_UNLINK;
-		else
-			list_move_tail(&qh->unlink_node, &ehci->async_iaa);
 
 		/* Make sure the unlinks are all visible to the hardware */
 		wmb();
@@ -1215,16 +1193,59 @@
 static void end_unlink_async(struct ehci_hcd *ehci)
 {
 	struct ehci_qh		*qh;
+	bool			early_exit;
 
 	if (ehci->has_synopsys_hc_bug)
 		ehci_writel(ehci, (u32) ehci->async->qh_dma,
 			    &ehci->regs->async_next);
 
+	/* The current IAA cycle has ended */
+	ehci->iaa_in_progress = false;
+
+	if (list_empty(&ehci->async_unlink))
+		return;
+	qh = list_first_entry(&ehci->async_unlink, struct ehci_qh,
+			unlink_node);	/* QH whose IAA cycle just ended */
+
+	/*
+	 * If async_unlinking is set then this routine is already running,
+	 * either on the stack or on another CPU.
+	 */
+	early_exit = ehci->async_unlinking;
+
+	/* If the controller isn't running, process all the waiting QHs */
+	if (ehci->rh_state < EHCI_RH_RUNNING)
+		list_splice_tail_init(&ehci->async_unlink, &ehci->async_idle);
+
+	/*
+	 * Intel (?) bug: The HC can write back the overlay region even
+	 * after the IAA interrupt occurs.  In self-defense, always go
+	 * through two IAA cycles for each QH.
+	 */
+	else if (qh->qh_state == QH_STATE_UNLINK_WAIT) {
+		qh->qh_state = QH_STATE_UNLINK;
+		early_exit = true;
+	}
+
+	/* Otherwise process only the first waiting QH (NVIDIA bug?) */
+	else
+		list_move_tail(&qh->unlink_node, &ehci->async_idle);
+
+	/* Start a new IAA cycle if any QHs are waiting for it */
+	if (!list_empty(&ehci->async_unlink))
+		start_iaa_cycle(ehci);
+
+	/*
+	 * Don't allow nesting or concurrent calls,
+	 * or wait for the second IAA cycle for the next QH.
+	 */
+	if (early_exit)
+		return;
+
 	/* Process the idle QHs */
- restart:
 	ehci->async_unlinking = true;
-	while (!list_empty(&ehci->async_iaa)) {
-		qh = list_first_entry(&ehci->async_iaa, struct ehci_qh,
+	while (!list_empty(&ehci->async_idle)) {
+		qh = list_first_entry(&ehci->async_idle, struct ehci_qh,
 				unlink_node);
 		list_del(&qh->unlink_node);
 
@@ -1239,13 +1260,6 @@
 		disable_async(ehci);
 	}
 	ehci->async_unlinking = false;
-
-	/* Start a new IAA cycle if any QHs are waiting for it */
-	if (!list_empty(&ehci->async_unlink)) {
-		start_iaa_cycle(ehci, true);
-		if (unlikely(ehci->rh_state < EHCI_RH_RUNNING))
-			goto restart;
-	}
 }
 
 static void start_unlink_async(struct ehci_hcd *ehci, struct ehci_qh *qh);
@@ -1270,8 +1284,7 @@
 	}
 
 	/* If nothing else is being unlinked, unlink the last empty QH */
-	if (list_empty(&ehci->async_iaa) && list_empty(&ehci->async_unlink) &&
-			qh_to_unlink) {
+	if (list_empty(&ehci->async_unlink) && qh_to_unlink) {
 		start_unlink_async(ehci, qh_to_unlink);
 		--count;
 	}
@@ -1293,7 +1306,7 @@
 		WARN_ON(!list_empty(&qh->qtd_list));
 		single_unlink_async(ehci, qh);
 	}
-	start_iaa_cycle(ehci, false);
+	start_iaa_cycle(ehci);
 }
 
 /* makes sure the async qh will become idle */
@@ -1306,7 +1319,7 @@
 		return;
 
 	single_unlink_async(ehci, qh);
-	start_iaa_cycle(ehci, false);
+	start_iaa_cycle(ehci);
 }
 
 /*-------------------------------------------------------------------------*/
diff --git a/drivers/usb/host/ehci-timer.c b/drivers/usb/host/ehci-timer.c
index f63a983..11e5b32 100644
--- a/drivers/usb/host/ehci-timer.c
+++ b/drivers/usb/host/ehci-timer.c
@@ -304,7 +304,7 @@
 	 * (a) SMP races against real IAA firing and retriggering, and
 	 * (b) clean HC shutdown, when IAA watchdog was pending.
 	 */
-	if (ehci->rh_state != EHCI_RH_RUNNING)
+	if (!ehci->iaa_in_progress || ehci->rh_state != EHCI_RH_RUNNING)
 		return;
 
 	/* If we get here, IAA is *REALLY* late.  It's barely
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index 13f6704..e666999 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -121,6 +121,7 @@
 	bool			scanning:1;
 	bool			need_rescan:1;
 	bool			intr_unlinking:1;
+	bool			iaa_in_progress:1;
 	bool			async_unlinking:1;
 	bool			shutdown:1;
 	struct ehci_qh		*qh_scan_next;
@@ -129,7 +130,7 @@
 	struct ehci_qh		*async;
 	struct ehci_qh		*dummy;		/* For AMD quirk use */
 	struct list_head	async_unlink;
-	struct list_head	async_iaa;
+	struct list_head	async_idle;
 	unsigned		async_unlink_cycle;
 	unsigned		async_count;	/* async activity count */