USB: EHCI: split ehci_qh into hw and sw parts

The ehci_qh structure merged hw and sw together which is not good:
1. More and more items are being added into ehci_qh, the ehci_qh software
   part are unnecessary to be allocated in DMA qh_pool.
2. If HCD has local SRAM, the sw part will consume it too, and it won't
   bring any benefit.
3. For non-cache-coherence system, the entire ehci_qh is uncachable, actually
   we only need the hw part to be uncacheable. Spliting them will let the sw
   part to be cacheable.

Signed-off-by: Alek Du <alek.du@intel.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
CC: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c
index edd61ee..327437a 100644
--- a/drivers/usb/host/ehci-sched.c
+++ b/drivers/usb/host/ehci-sched.c
@@ -60,6 +60,20 @@
 	}
 }
 
+static __hc32 *
+shadow_next_periodic(struct ehci_hcd *ehci, union ehci_shadow *periodic,
+		__hc32 tag)
+{
+	switch (hc32_to_cpu(ehci, tag)) {
+	/* our ehci_shadow.qh is actually software part */
+	case Q_TYPE_QH:
+		return &periodic->qh->hw->hw_next;
+	/* others are hw parts */
+	default:
+		return periodic->hw_next;
+	}
+}
+
 /* caller must hold ehci->lock */
 static void periodic_unlink (struct ehci_hcd *ehci, unsigned frame, void *ptr)
 {
@@ -71,7 +85,8 @@
 	while (here.ptr && here.ptr != ptr) {
 		prev_p = periodic_next_shadow(ehci, prev_p,
 				Q_NEXT_TYPE(ehci, *hw_p));
-		hw_p = here.hw_next;
+		hw_p = shadow_next_periodic(ehci, &here,
+				Q_NEXT_TYPE(ehci, *hw_p));
 		here = *prev_p;
 	}
 	/* an interrupt entry (at list end) could have been shared */
@@ -83,7 +98,7 @@
 	 */
 	*prev_p = *periodic_next_shadow(ehci, &here,
 			Q_NEXT_TYPE(ehci, *hw_p));
-	*hw_p = *here.hw_next;
+	*hw_p = *shadow_next_periodic(ehci, &here, Q_NEXT_TYPE(ehci, *hw_p));
 }
 
 /* how many of the uframe's 125 usecs are allocated? */
@@ -93,18 +108,20 @@
 	__hc32			*hw_p = &ehci->periodic [frame];
 	union ehci_shadow	*q = &ehci->pshadow [frame];
 	unsigned		usecs = 0;
+	struct ehci_qh_hw	*hw;
 
 	while (q->ptr) {
 		switch (hc32_to_cpu(ehci, Q_NEXT_TYPE(ehci, *hw_p))) {
 		case Q_TYPE_QH:
+			hw = q->qh->hw;
 			/* is it in the S-mask? */
-			if (q->qh->hw_info2 & cpu_to_hc32(ehci, 1 << uframe))
+			if (hw->hw_info2 & cpu_to_hc32(ehci, 1 << uframe))
 				usecs += q->qh->usecs;
 			/* ... or C-mask? */
-			if (q->qh->hw_info2 & cpu_to_hc32(ehci,
+			if (hw->hw_info2 & cpu_to_hc32(ehci,
 					1 << (8 + uframe)))
 				usecs += q->qh->c_usecs;
-			hw_p = &q->qh->hw_next;
+			hw_p = &hw->hw_next;
 			q = &q->qh->qh_next;
 			break;
 		// case Q_TYPE_FSTN:
@@ -237,10 +254,10 @@
 			continue;
 		case Q_TYPE_QH:
 			if (same_tt(dev, q->qh->dev)) {
-				uf = tt_start_uframe(ehci, q->qh->hw_info2);
+				uf = tt_start_uframe(ehci, q->qh->hw->hw_info2);
 				tt_usecs[uf] += q->qh->tt_usecs;
 			}
-			hw_p = &q->qh->hw_next;
+			hw_p = &q->qh->hw->hw_next;
 			q = &q->qh->qh_next;
 			continue;
 		case Q_TYPE_SITD:
@@ -375,6 +392,7 @@
 	for (; frame < ehci->periodic_size; frame += period) {
 		union ehci_shadow	here;
 		__hc32			type;
+		struct ehci_qh_hw	*hw;
 
 		here = ehci->pshadow [frame];
 		type = Q_NEXT_TYPE(ehci, ehci->periodic [frame]);
@@ -385,17 +403,18 @@
 				here = here.itd->itd_next;
 				continue;
 			case Q_TYPE_QH:
+				hw = here.qh->hw;
 				if (same_tt (dev, here.qh->dev)) {
 					u32		mask;
 
 					mask = hc32_to_cpu(ehci,
-							here.qh->hw_info2);
+							hw->hw_info2);
 					/* "knows" no gap is needed */
 					mask |= mask >> 8;
 					if (mask & uf_mask)
 						break;
 				}
-				type = Q_NEXT_TYPE(ehci, here.qh->hw_next);
+				type = Q_NEXT_TYPE(ehci, hw->hw_next);
 				here = here.qh->qh_next;
 				continue;
 			case Q_TYPE_SITD:
@@ -498,7 +517,8 @@
 
 	dev_dbg (&qh->dev->dev,
 		"link qh%d-%04x/%p start %d [%d/%d us]\n",
-		period, hc32_to_cpup(ehci, &qh->hw_info2) & (QH_CMASK | QH_SMASK),
+		period, hc32_to_cpup(ehci, &qh->hw->hw_info2)
+			& (QH_CMASK | QH_SMASK),
 		qh, qh->start, qh->usecs, qh->c_usecs);
 
 	/* high bandwidth, or otherwise every microframe */
@@ -517,7 +537,7 @@
 			if (type == cpu_to_hc32(ehci, Q_TYPE_QH))
 				break;
 			prev = periodic_next_shadow(ehci, prev, type);
-			hw_p = &here.qh->hw_next;
+			hw_p = shadow_next_periodic(ehci, &here, type);
 			here = *prev;
 		}
 
@@ -528,14 +548,14 @@
 			if (qh->period > here.qh->period)
 				break;
 			prev = &here.qh->qh_next;
-			hw_p = &here.qh->hw_next;
+			hw_p = &here.qh->hw->hw_next;
 			here = *prev;
 		}
 		/* link in this qh, unless some earlier pass did that */
 		if (qh != here.qh) {
 			qh->qh_next = here;
 			if (here.qh)
-				qh->hw_next = *hw_p;
+				qh->hw->hw_next = *hw_p;
 			wmb ();
 			prev->qh = qh;
 			*hw_p = QH_NEXT (ehci, qh->qh_dma);
@@ -581,7 +601,7 @@
 	dev_dbg (&qh->dev->dev,
 		"unlink qh%d-%04x/%p start %d [%d/%d us]\n",
 		qh->period,
-		hc32_to_cpup(ehci, &qh->hw_info2) & (QH_CMASK | QH_SMASK),
+		hc32_to_cpup(ehci, &qh->hw->hw_info2) & (QH_CMASK | QH_SMASK),
 		qh, qh->start, qh->usecs, qh->c_usecs);
 
 	/* qh->qh_next still "live" to HC */
@@ -596,6 +616,7 @@
 static void intr_deschedule (struct ehci_hcd *ehci, struct ehci_qh *qh)
 {
 	unsigned	wait;
+	struct ehci_qh_hw *hw = qh->hw;
 
 	qh_unlink_periodic (ehci, qh);
 
@@ -606,14 +627,14 @@
 	 */
 	if (list_empty (&qh->qtd_list)
 			|| (cpu_to_hc32(ehci, QH_CMASK)
-					& qh->hw_info2) != 0)
+					& hw->hw_info2) != 0)
 		wait = 2;
 	else
 		wait = 55;	/* worst case: 3 * 1024 */
 
 	udelay (wait);
 	qh->qh_state = QH_STATE_IDLE;
-	qh->hw_next = EHCI_LIST_END(ehci);
+	hw->hw_next = EHCI_LIST_END(ehci);
 	wmb ();
 }
 
@@ -739,14 +760,15 @@
 	unsigned	uframe;
 	__hc32		c_mask;
 	unsigned	frame;		/* 0..(qh->period - 1), or NO_FRAME */
+	struct ehci_qh_hw	*hw = qh->hw;
 
 	qh_refresh(ehci, qh);
-	qh->hw_next = EHCI_LIST_END(ehci);
+	hw->hw_next = EHCI_LIST_END(ehci);
 	frame = qh->start;
 
 	/* reuse the previous schedule slots, if we can */
 	if (frame < qh->period) {
-		uframe = ffs(hc32_to_cpup(ehci, &qh->hw_info2) & QH_SMASK);
+		uframe = ffs(hc32_to_cpup(ehci, &hw->hw_info2) & QH_SMASK);
 		status = check_intr_schedule (ehci, frame, --uframe,
 				qh, &c_mask);
 	} else {
@@ -784,11 +806,11 @@
 		qh->start = frame;
 
 		/* reset S-frame and (maybe) C-frame masks */
-		qh->hw_info2 &= cpu_to_hc32(ehci, ~(QH_CMASK | QH_SMASK));
-		qh->hw_info2 |= qh->period
+		hw->hw_info2 &= cpu_to_hc32(ehci, ~(QH_CMASK | QH_SMASK));
+		hw->hw_info2 |= qh->period
 			? cpu_to_hc32(ehci, 1 << uframe)
 			: cpu_to_hc32(ehci, QH_SMASK);
-		qh->hw_info2 |= c_mask;
+		hw->hw_info2 |= c_mask;
 	} else
 		ehci_dbg (ehci, "reused qh %p schedule\n", qh);
 
@@ -2188,7 +2210,7 @@
 			case Q_TYPE_QH:
 				/* handle any completions */
 				temp.qh = qh_get (q.qh);
-				type = Q_NEXT_TYPE(ehci, q.qh->hw_next);
+				type = Q_NEXT_TYPE(ehci, q.qh->hw->hw_next);
 				q = q.qh->qh_next;
 				modified = qh_completions (ehci, temp.qh);
 				if (unlikely (list_empty (&temp.qh->qtd_list)))