UHCI: Eliminate asynchronous skeleton Queue Headers
This patch (as856) attempts to improve the performance of uhci-hcd by
removing the asynchronous skeleton Queue Headers. They don't contain
any useful information but the controller has to read through them at
least once every millisecond, incurring a non-zero DMA overhead.
Now all the asynchronous queues are combined, along with the period-1
interrupt queue, into a single list with a single skeleton QH. The
start of the low-speed control, full-speed control, and bulk sublists
is determined by linear search. Since there should rarely be more
than a couple of QHs in the list, the searches should incur a much
smaller total load than keeping the skeleton QHs.
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c
index 1f0833a..44da433 100644
--- a/drivers/usb/host/uhci-hcd.c
+++ b/drivers/usb/host/uhci-hcd.c
@@ -13,7 +13,7 @@
* (C) Copyright 2000 Yggdrasil Computing, Inc. (port of new PCI interface
* support from usb-ohci.c by Adam Richter, adam@yggdrasil.com).
* (C) Copyright 1999 Gregory P. Smith (from usb-ohci.c)
- * (C) Copyright 2004-2006 Alan Stern, stern@rowland.harvard.edu
+ * (C) Copyright 2004-2007 Alan Stern, stern@rowland.harvard.edu
*
* Intel documents this fairly well, and as far as I know there
* are no royalties or anything like that, but even so there are
@@ -107,10 +107,10 @@
* interrupt QHs, which will help spread out bandwidth utilization.
*
* ffs (Find First bit Set) does exactly what we need:
- * 1,3,5,... => ffs = 0 => use skel_int2_qh = skelqh[8],
- * 2,6,10,... => ffs = 1 => use skel_int4_qh = skelqh[7], etc.
+ * 1,3,5,... => ffs = 0 => use period-2 QH = skelqh[8],
+ * 2,6,10,... => ffs = 1 => use period-4 QH = skelqh[7], etc.
* ffs >= 7 => not on any high-period queue, so use
- * skel_int1_qh = skelqh[9].
+ * period-1 QH = skelqh[9].
* Add in UHCI_NUMFRAMES to insure at least one bit is set.
*/
skelnum = 8 - (int) __ffs(frame | UHCI_NUMFRAMES);
@@ -540,16 +540,18 @@
*
* The hardware doesn't really know any difference
* in the queues, but the order does matter for the
- * protocols higher up. The order is:
+ * protocols higher up. The order in which the queues
+ * are encountered by the hardware is:
*
- * - any isochronous events handled before any
+ * - All isochronous events are handled before any
* of the queues. We don't do that here, because
* we'll create the actual TD entries on demand.
- * - The first queue is the interrupt queue.
- * - The second queue is the control queue, split into low- and full-speed
- * - The third queue is bulk queue.
- * - The fourth queue is the bandwidth reclamation queue, which loops back
- * to the full-speed control queue.
+ * - The first queue is the high-period interrupt queue.
+ * - The second queue is the period-1 interrupt and async
+ * (low-speed control, full-speed control, then bulk) queue.
+ * - The third queue is the terminating bandwidth reclamation queue,
+ * which contains no members, loops back to itself, and is present
+ * only when FSBR is on and there are no full-speed control or bulk QHs.
*/
static int uhci_start(struct usb_hcd *hcd)
{
@@ -626,30 +628,18 @@
}
/*
- * 8 Interrupt queues; link all higher int queues to int1,
- * then link int1 to control and control to bulk
+ * 8 Interrupt queues; link all higher int queues to int1 = async
*/
- uhci->skel_int128_qh->link =
- uhci->skel_int64_qh->link =
- uhci->skel_int32_qh->link =
- uhci->skel_int16_qh->link =
- uhci->skel_int8_qh->link =
- uhci->skel_int4_qh->link =
- uhci->skel_int2_qh->link = LINK_TO_QH(
- uhci->skel_int1_qh);
-
- uhci->skel_int1_qh->link = LINK_TO_QH(uhci->skel_ls_control_qh);
- uhci->skel_ls_control_qh->link = LINK_TO_QH(uhci->skel_fs_control_qh);
- uhci->skel_fs_control_qh->link = LINK_TO_QH(uhci->skel_bulk_qh);
- uhci->skel_bulk_qh->link = LINK_TO_QH(uhci->skel_term_qh);
+ for (i = SKEL_ISO + 1; i < SKEL_ASYNC; ++i)
+ uhci->skelqh[i]->link = LINK_TO_QH(uhci->skel_async_qh);
+ uhci->skel_async_qh->link = uhci->skel_term_qh->link = UHCI_PTR_TERM;
/* This dummy TD is to work around a bug in Intel PIIX controllers */
uhci_fill_td(uhci->term_td, 0, uhci_explen(0) |
- (0x7f << TD_TOKEN_DEVADDR_SHIFT) | USB_PID_IN, 0);
- uhci->term_td->link = LINK_TO_TD(uhci->term_td);
-
- uhci->skel_term_qh->link = UHCI_PTR_TERM;
- uhci->skel_term_qh->element = LINK_TO_TD(uhci->term_td);
+ (0x7f << TD_TOKEN_DEVADDR_SHIFT) | USB_PID_IN, 0);
+ uhci->term_td->link = UHCI_PTR_TERM;
+ uhci->skel_async_qh->element = uhci->skel_term_qh->element =
+ LINK_TO_TD(uhci->term_td);
/*
* Fill the frame list: make all entries point to the proper