USB: EHCI support for big-endian descriptors

This patch implements supports for EHCI controllers whose in-memory
data structures are represented in big-endian format. This is needed
(unfortunately) for the AMCC PPC440EPx SoC EHCI controller; the EHCI
spec doesn't specify little-endian format, although that's what most
other implementations use.

The guts of the patch are to introduce the hc32 type and change all
references from le32 to hc32.  All access routines are converted from
cpu_to_le32(...) to cpu_to_hc32(ehci, ...) and similar for the other
"direction".  (This is the same approach used with OHCI.)

David fixed:
	Whitespace fixes; refresh against ehci cpufreq patch; move glue
	for that PPC driver to the patch adding it; fix free symbol
	capture bugs in modified "constant" macros; and make "hc32" etc
	be "le32" unless we really need the BE options, so "sparse" can
	do some real good.

Signed-off-by: Stefan Roese <sr@denx.de>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>


diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index 6271187..a8faff6 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -72,6 +72,11 @@
 	depends on USB_EHCI_HCD
 	default n
 
+config USB_EHCI_BIG_ENDIAN_DESC
+	bool
+	depends on USB_EHCI_HCD
+	default n
+
 config USB_ISP116X_HCD
 	tristate "ISP116X HCD support"
 	depends on USB
diff --git a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c
index 43eddae..5bb3d09 100644
--- a/drivers/usb/host/ehci-dbg.c
+++ b/drivers/usb/host/ehci-dbg.c
@@ -52,7 +52,7 @@
 		HCS_INDICATOR (params) ? " ind" : "",
 		HCS_N_CC (params),
 		HCS_N_PCC (params),
-	        HCS_PORTROUTED (params) ? "" : " ordered",
+		HCS_PORTROUTED (params) ? "" : " ordered",
 		HCS_PPC (params) ? "" : " !ppc",
 		HCS_N_PORTS (params)
 		);
@@ -91,20 +91,20 @@
 
 	if (HCC_ISOC_CACHE (params)) {
 		ehci_dbg (ehci,
-		     "%s hcc_params %04x caching frame %s%s%s\n",
-		     label, params,
-		     HCC_PGM_FRAMELISTLEN (params) ? "256/512/1024" : "1024",
-		     HCC_CANPARK (params) ? " park" : "",
-		     HCC_64BIT_ADDR (params) ? " 64 bit addr" : "");
+			"%s hcc_params %04x caching frame %s%s%s\n",
+			label, params,
+			HCC_PGM_FRAMELISTLEN(params) ? "256/512/1024" : "1024",
+			HCC_CANPARK(params) ? " park" : "",
+			HCC_64BIT_ADDR(params) ? " 64 bit addr" : "");
 	} else {
 		ehci_dbg (ehci,
-		     "%s hcc_params %04x thresh %d uframes %s%s%s\n",
-		     label,
-		     params,
-		     HCC_ISOC_THRES (params),
-		     HCC_PGM_FRAMELISTLEN (params) ? "256/512/1024" : "1024",
-		     HCC_CANPARK (params) ? " park" : "",
-		     HCC_64BIT_ADDR (params) ? " 64 bit addr" : "");
+			"%s hcc_params %04x thresh %d uframes %s%s%s\n",
+			label,
+			params,
+			HCC_ISOC_THRES(params),
+			HCC_PGM_FRAMELISTLEN(params) ? "256/512/1024" : "1024",
+			HCC_CANPARK(params) ? " park" : "",
+			HCC_64BIT_ADDR(params) ? " 64 bit addr" : "");
 	}
 }
 #else
@@ -118,17 +118,17 @@
 static void __attribute__((__unused__))
 dbg_qtd (const char *label, struct ehci_hcd *ehci, struct ehci_qtd *qtd)
 {
-	ehci_dbg (ehci, "%s td %p n%08x %08x t%08x p0=%08x\n", label, qtd,
-		le32_to_cpup (&qtd->hw_next),
-		le32_to_cpup (&qtd->hw_alt_next),
-		le32_to_cpup (&qtd->hw_token),
-		le32_to_cpup (&qtd->hw_buf [0]));
+	ehci_dbg(ehci, "%s td %p n%08x %08x t%08x p0=%08x\n", label, qtd,
+		hc32_to_cpup(ehci, &qtd->hw_next),
+		hc32_to_cpup(ehci, &qtd->hw_alt_next),
+		hc32_to_cpup(ehci, &qtd->hw_token),
+		hc32_to_cpup(ehci, &qtd->hw_buf [0]));
 	if (qtd->hw_buf [1])
-		ehci_dbg (ehci, "  p1=%08x p2=%08x p3=%08x p4=%08x\n",
-			le32_to_cpup (&qtd->hw_buf [1]),
-			le32_to_cpup (&qtd->hw_buf [2]),
-			le32_to_cpup (&qtd->hw_buf [3]),
-			le32_to_cpup (&qtd->hw_buf [4]));
+		ehci_dbg(ehci, "  p1=%08x p2=%08x p3=%08x p4=%08x\n",
+			hc32_to_cpup(ehci, &qtd->hw_buf[1]),
+			hc32_to_cpup(ehci, &qtd->hw_buf[2]),
+			hc32_to_cpup(ehci, &qtd->hw_buf[3]),
+			hc32_to_cpup(ehci, &qtd->hw_buf[4]));
 }
 
 static void __attribute__((__unused__))
@@ -144,26 +144,27 @@
 dbg_itd (const char *label, struct ehci_hcd *ehci, struct ehci_itd *itd)
 {
 	ehci_dbg (ehci, "%s [%d] itd %p, next %08x, urb %p\n",
-		label, itd->frame, itd, le32_to_cpu(itd->hw_next), itd->urb);
+		label, itd->frame, itd, hc32_to_cpu(ehci, itd->hw_next),
+		itd->urb);
 	ehci_dbg (ehci,
 		"  trans: %08x %08x %08x %08x %08x %08x %08x %08x\n",
-		le32_to_cpu(itd->hw_transaction[0]),
-		le32_to_cpu(itd->hw_transaction[1]),
-		le32_to_cpu(itd->hw_transaction[2]),
-		le32_to_cpu(itd->hw_transaction[3]),
-		le32_to_cpu(itd->hw_transaction[4]),
-		le32_to_cpu(itd->hw_transaction[5]),
-		le32_to_cpu(itd->hw_transaction[6]),
-		le32_to_cpu(itd->hw_transaction[7]));
+		hc32_to_cpu(ehci, itd->hw_transaction[0]),
+		hc32_to_cpu(ehci, itd->hw_transaction[1]),
+		hc32_to_cpu(ehci, itd->hw_transaction[2]),
+		hc32_to_cpu(ehci, itd->hw_transaction[3]),
+		hc32_to_cpu(ehci, itd->hw_transaction[4]),
+		hc32_to_cpu(ehci, itd->hw_transaction[5]),
+		hc32_to_cpu(ehci, itd->hw_transaction[6]),
+		hc32_to_cpu(ehci, itd->hw_transaction[7]));
 	ehci_dbg (ehci,
 		"  buf:   %08x %08x %08x %08x %08x %08x %08x\n",
-		le32_to_cpu(itd->hw_bufp[0]),
-		le32_to_cpu(itd->hw_bufp[1]),
-		le32_to_cpu(itd->hw_bufp[2]),
-		le32_to_cpu(itd->hw_bufp[3]),
-		le32_to_cpu(itd->hw_bufp[4]),
-		le32_to_cpu(itd->hw_bufp[5]),
-		le32_to_cpu(itd->hw_bufp[6]));
+		hc32_to_cpu(ehci, itd->hw_bufp[0]),
+		hc32_to_cpu(ehci, itd->hw_bufp[1]),
+		hc32_to_cpu(ehci, itd->hw_bufp[2]),
+		hc32_to_cpu(ehci, itd->hw_bufp[3]),
+		hc32_to_cpu(ehci, itd->hw_bufp[4]),
+		hc32_to_cpu(ehci, itd->hw_bufp[5]),
+		hc32_to_cpu(ehci, itd->hw_bufp[6]));
 	ehci_dbg (ehci, "  index: %d %d %d %d %d %d %d %d\n",
 		itd->index[0], itd->index[1], itd->index[2],
 		itd->index[3], itd->index[4], itd->index[5],
@@ -174,14 +175,15 @@
 dbg_sitd (const char *label, struct ehci_hcd *ehci, struct ehci_sitd *sitd)
 {
 	ehci_dbg (ehci, "%s [%d] sitd %p, next %08x, urb %p\n",
-		label, sitd->frame, sitd, le32_to_cpu(sitd->hw_next), sitd->urb);
+		label, sitd->frame, sitd, hc32_to_cpu(ehci, sitd->hw_next),
+		sitd->urb);
 	ehci_dbg (ehci,
 		"  addr %08x sched %04x result %08x buf %08x %08x\n",
-		le32_to_cpu(sitd->hw_fullspeed_ep),
-		le32_to_cpu(sitd->hw_uframe),
-		le32_to_cpu(sitd->hw_results),
-		le32_to_cpu(sitd->hw_buf [0]),
-		le32_to_cpu(sitd->hw_buf [1]));
+		hc32_to_cpu(ehci, sitd->hw_fullspeed_ep),
+		hc32_to_cpu(ehci, sitd->hw_uframe),
+		hc32_to_cpu(ehci, sitd->hw_results),
+		hc32_to_cpu(ehci, sitd->hw_buf[0]),
+		hc32_to_cpu(ehci, sitd->hw_buf[1]));
 }
 
 static int __attribute__((__unused__))
@@ -267,8 +269,7 @@
 		(status & PORT_PEC) ? " PEC" : "",
 		(status & PORT_PE) ? " PE" : "",
 		(status & PORT_CSC) ? " CSC" : "",
-		(status & PORT_CONNECT) ? " CONNECT" : ""
-	    );
+		(status & PORT_CONNECT) ? " CONNECT" : "");
 }
 
 #else
@@ -332,9 +333,10 @@
 		default: tmp = '?'; break; \
 		}; tmp; })
 
-static inline char token_mark (__le32 token)
+static inline char token_mark(struct ehci_hcd *ehci, __hc32 token)
 {
-	__u32 v = le32_to_cpu (token);
+	__u32 v = hc32_to_cpu(ehci, token);
+
 	if (v & QTD_STS_ACTIVE)
 		return '*';
 	if (v & QTD_STS_HALT)
@@ -360,46 +362,48 @@
 	unsigned		size = *sizep;
 	char			*next = *nextp;
 	char			mark;
+	u32			list_end = EHCI_LIST_END(ehci);
 
-	if (qh->hw_qtd_next == EHCI_LIST_END)	/* NEC does this */
+	if (qh->hw_qtd_next == list_end)	/* NEC does this */
 		mark = '@';
 	else
-		mark = token_mark (qh->hw_token);
+		mark = token_mark(ehci, qh->hw_token);
 	if (mark == '/') {	/* qh_alt_next controls qh advance? */
-		if ((qh->hw_alt_next & QTD_MASK) == ehci->async->hw_alt_next)
+		if ((qh->hw_alt_next & QTD_MASK(ehci))
+				== ehci->async->hw_alt_next)
 			mark = '#';	/* blocked */
-		else if (qh->hw_alt_next == EHCI_LIST_END)
+		else if (qh->hw_alt_next == list_end)
 			mark = '.';	/* use hw_qtd_next */
 		/* else alt_next points to some other qtd */
 	}
-	scratch = le32_to_cpup (&qh->hw_info1);
-	hw_curr = (mark == '*') ? le32_to_cpup (&qh->hw_current) : 0;
+	scratch = hc32_to_cpup(ehci, &qh->hw_info1);
+	hw_curr = (mark == '*') ? hc32_to_cpup(ehci, &qh->hw_current) : 0;
 	temp = scnprintf (next, size,
 			"qh/%p dev%d %cs ep%d %08x %08x (%08x%c %s nak%d)",
 			qh, scratch & 0x007f,
 			speed_char (scratch),
 			(scratch >> 8) & 0x000f,
-			scratch, le32_to_cpup (&qh->hw_info2),
-			le32_to_cpup (&qh->hw_token), mark,
-			(__constant_cpu_to_le32 (QTD_TOGGLE) & qh->hw_token)
+			scratch, hc32_to_cpup(ehci, &qh->hw_info2),
+			hc32_to_cpup(ehci, &qh->hw_token), mark,
+			(cpu_to_hc32(ehci, QTD_TOGGLE) & qh->hw_token)
 				? "data1" : "data0",
-			(le32_to_cpup (&qh->hw_alt_next) >> 1) & 0x0f);
+			(hc32_to_cpup(ehci, &qh->hw_alt_next) >> 1) & 0x0f);
 	size -= temp;
 	next += temp;
 
 	/* hc may be modifying the list as we read it ... */
 	list_for_each (entry, &qh->qtd_list) {
 		td = list_entry (entry, struct ehci_qtd, qtd_list);
-		scratch = le32_to_cpup (&td->hw_token);
+		scratch = hc32_to_cpup(ehci, &td->hw_token);
 		mark = ' ';
 		if (hw_curr == td->qtd_dma)
 			mark = '*';
-		else if (qh->hw_qtd_next == cpu_to_le32(td->qtd_dma))
+		else if (qh->hw_qtd_next == cpu_to_hc32(ehci, td->qtd_dma))
 			mark = '+';
 		else if (QTD_LENGTH (scratch)) {
 			if (td->hw_alt_next == ehci->async->hw_alt_next)
 				mark = '#';
-			else if (td->hw_alt_next != EHCI_LIST_END)
+			else if (td->hw_alt_next != list_end)
 				mark = '/';
 		}
 		temp = snprintf (next, size,
@@ -490,7 +494,7 @@
 	unsigned		temp, size, seen_count;
 	char			*next;
 	unsigned		i;
-	__le32			tag;
+	__hc32			tag;
 
 	if (!(seen = kmalloc (DBG_SCHED_LIMIT * sizeof *seen, GFP_ATOMIC)))
 		return 0;
@@ -514,18 +518,19 @@
 		p = ehci->pshadow [i];
 		if (likely (!p.ptr))
 			continue;
-		tag = Q_NEXT_TYPE (ehci->periodic [i]);
+		tag = Q_NEXT_TYPE(ehci, ehci->periodic [i]);
 
 		temp = scnprintf (next, size, "%4d: ", i);
 		size -= temp;
 		next += temp;
 
 		do {
-			switch (tag) {
+			switch (hc32_to_cpu(ehci, tag)) {
 			case Q_TYPE_QH:
 				temp = scnprintf (next, size, " qh%d-%04x/%p",
 						p.qh->period,
-						le32_to_cpup (&p.qh->hw_info2)
+						hc32_to_cpup(ehci,
+								&p.qh->hw_info2)
 							/* uframe masks */
 							& (QH_CMASK | QH_SMASK),
 						p.qh);
@@ -543,7 +548,7 @@
 				}
 				/* show more info the first time around */
 				if (temp == seen_count && p.ptr) {
-					u32	scratch = le32_to_cpup (
+					u32	scratch = hc32_to_cpup(ehci,
 							&p.qh->hw_info1);
 					struct ehci_qtd	*qtd;
 					char		*type = "";
@@ -554,7 +559,8 @@
 							&p.qh->qtd_list,
 							qtd_list) {
 						temp++;
-						switch (0x03 & (le32_to_cpu (
+						switch (0x03 & (hc32_to_cpu(
+							ehci,
 							qtd->hw_token) >> 8)) {
 						case 0: type = "out"; continue;
 						case 1: type = "in"; continue;
@@ -576,7 +582,7 @@
 				} else
 					temp = 0;
 				if (p.qh) {
-					tag = Q_NEXT_TYPE (p.qh->hw_next);
+					tag = Q_NEXT_TYPE(ehci, p.qh->hw_next);
 					p = p.qh->qh_next;
 				}
 				break;
@@ -584,23 +590,23 @@
 				temp = scnprintf (next, size,
 					" fstn-%8x/%p", p.fstn->hw_prev,
 					p.fstn);
-				tag = Q_NEXT_TYPE (p.fstn->hw_next);
+				tag = Q_NEXT_TYPE(ehci, p.fstn->hw_next);
 				p = p.fstn->fstn_next;
 				break;
 			case Q_TYPE_ITD:
 				temp = scnprintf (next, size,
 					" itd/%p", p.itd);
-				tag = Q_NEXT_TYPE (p.itd->hw_next);
+				tag = Q_NEXT_TYPE(ehci, p.itd->hw_next);
 				p = p.itd->itd_next;
 				break;
 			case Q_TYPE_SITD:
 				temp = scnprintf (next, size,
 					" sitd%d-%04x/%p",
 					p.sitd->stream->interval,
-					le32_to_cpup (&p.sitd->hw_uframe)
+					hc32_to_cpup(ehci, &p.sitd->hw_uframe)
 						& 0x0000ffff,
 					p.sitd);
-				tag = Q_NEXT_TYPE (p.sitd->hw_next);
+				tag = Q_NEXT_TYPE(ehci, p.sitd->hw_next);
 				p = p.sitd->sitd_next;
 				break;
 			}
@@ -673,7 +679,8 @@
 		unsigned	count = 256/4;
 
 		pdev = to_pci_dev(ehci_to_hcd(ehci)->self.controller);
-		offset = HCC_EXT_CAPS (ehci_readl(ehci, &ehci->caps->hcc_params));
+		offset = HCC_EXT_CAPS(ehci_readl(ehci,
+				&ehci->caps->hcc_params));
 		while (offset && count--) {
 			pci_read_config_dword (pdev, offset, &cap);
 			switch (cap & 0xff) {
@@ -740,14 +747,16 @@
 
 	for (i = 1; i <= HCS_N_PORTS (ehci->hcs_params); i++) {
 		temp = dbg_port_buf (scratch, sizeof scratch, label, i,
-				ehci_readl(ehci, &ehci->regs->port_status [i - 1]));
+				ehci_readl(ehci,
+					&ehci->regs->port_status[i - 1]));
 		temp = scnprintf (next, size, fmt, temp, scratch);
 		size -= temp;
 		next += temp;
 		if (i == HCS_DEBUG_PORT(ehci->hcs_params) && ehci->debug) {
 			temp = scnprintf (next, size,
 					"    debug control %08x\n",
-					ehci_readl(ehci, &ehci->debug->control));
+					ehci_readl(ehci,
+						&ehci->debug->control));
 			size -= temp;
 			next += temp;
 		}
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 566badb..99ab31e 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -526,12 +526,12 @@
 	 * from automatically advancing to the next td after short reads.
 	 */
 	ehci->async->qh_next.qh = NULL;
-	ehci->async->hw_next = QH_NEXT(ehci->async->qh_dma);
-	ehci->async->hw_info1 = cpu_to_le32(QH_HEAD);
-	ehci->async->hw_token = cpu_to_le32(QTD_STS_HALT);
-	ehci->async->hw_qtd_next = EHCI_LIST_END;
+	ehci->async->hw_next = QH_NEXT(ehci, ehci->async->qh_dma);
+	ehci->async->hw_info1 = cpu_to_hc32(ehci, QH_HEAD);
+	ehci->async->hw_token = cpu_to_hc32(ehci, QTD_STS_HALT);
+	ehci->async->hw_qtd_next = EHCI_LIST_END(ehci);
 	ehci->async->qh_state = QH_STATE_LINKED;
-	ehci->async->hw_alt_next = QTD_NEXT(ehci->async->dummy->qtd_dma);
+	ehci->async->hw_alt_next = QTD_NEXT(ehci, ehci->async->dummy->qtd_dma);
 
 	/* clear interrupt enables, set irq latency */
 	if (log2_irq_thresh < 0 || log2_irq_thresh > 6)
diff --git a/drivers/usb/host/ehci-mem.c b/drivers/usb/host/ehci-mem.c
index 5cff6ba..bdb29e6 100644
--- a/drivers/usb/host/ehci-mem.c
+++ b/drivers/usb/host/ehci-mem.c
@@ -27,7 +27,7 @@
  *	  need to use dma_pool or dma_alloc_coherent
  *	- driver buffers, read/written by HC ... single shot DMA mapped
  *
- * There's also PCI "register" data, which is memory mapped.
+ * There's also "register" data (e.g. PCI or SOC), which is memory mapped.
  * No memory seen by this driver is pageable.
  */
 
@@ -35,13 +35,14 @@
 
 /* Allocate the key transfer structures from the previously allocated pool */
 
-static inline void ehci_qtd_init (struct ehci_qtd *qtd, dma_addr_t dma)
+static inline void ehci_qtd_init(struct ehci_hcd *ehci, struct ehci_qtd *qtd,
+				  dma_addr_t dma)
 {
 	memset (qtd, 0, sizeof *qtd);
 	qtd->qtd_dma = dma;
 	qtd->hw_token = cpu_to_le32 (QTD_STS_HALT);
-	qtd->hw_next = EHCI_LIST_END;
-	qtd->hw_alt_next = EHCI_LIST_END;
+	qtd->hw_next = EHCI_LIST_END(ehci);
+	qtd->hw_alt_next = EHCI_LIST_END(ehci);
 	INIT_LIST_HEAD (&qtd->qtd_list);
 }
 
@@ -52,7 +53,7 @@
 
 	qtd = dma_pool_alloc (ehci->qtd_pool, flags, &dma);
 	if (qtd != NULL) {
-		ehci_qtd_init (qtd, dma);
+		ehci_qtd_init(ehci, qtd, dma);
 	}
 	return qtd;
 }
@@ -220,7 +221,7 @@
 		goto fail;
 	}
 	for (i = 0; i < ehci->periodic_size; i++)
-		ehci->periodic [i] = EHCI_LIST_END;
+		ehci->periodic [i] = EHCI_LIST_END(ehci);
 
 	/* software shadow of hardware table */
 	ehci->pshadow = kcalloc(ehci->periodic_size, sizeof(void *), flags);
diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c
index 903510b..2284028 100644
--- a/drivers/usb/host/ehci-q.c
+++ b/drivers/usb/host/ehci-q.c
@@ -43,15 +43,15 @@
 /* fill a qtd, returning how much of the buffer we were able to queue up */
 
 static int
-qtd_fill (struct ehci_qtd *qtd, dma_addr_t buf, size_t len,
-		int token, int maxpacket)
+qtd_fill(struct ehci_hcd *ehci, struct ehci_qtd *qtd, dma_addr_t buf,
+		  size_t len, int token, int maxpacket)
 {
 	int	i, count;
 	u64	addr = buf;
 
 	/* one buffer entry per 4K ... first might be short or unaligned */
-	qtd->hw_buf [0] = cpu_to_le32 ((u32)addr);
-	qtd->hw_buf_hi [0] = cpu_to_le32 ((u32)(addr >> 32));
+	qtd->hw_buf[0] = cpu_to_hc32(ehci, (u32)addr);
+	qtd->hw_buf_hi[0] = cpu_to_hc32(ehci, (u32)(addr >> 32));
 	count = 0x1000 - (buf & 0x0fff);	/* rest of that page */
 	if (likely (len < count))		/* ... iff needed */
 		count = len;
@@ -62,8 +62,9 @@
 		/* per-qtd limit: from 16K to 20K (best alignment) */
 		for (i = 1; count < len && i < 5; i++) {
 			addr = buf;
-			qtd->hw_buf [i] = cpu_to_le32 ((u32)addr);
-			qtd->hw_buf_hi [i] = cpu_to_le32 ((u32)(addr >> 32));
+			qtd->hw_buf[i] = cpu_to_hc32(ehci, (u32)addr);
+			qtd->hw_buf_hi[i] = cpu_to_hc32(ehci,
+					(u32)(addr >> 32));
 			buf += 0x1000;
 			if ((count + 0x1000) < len)
 				count += 0x1000;
@@ -75,7 +76,7 @@
 		if (count != len)
 			count -= (count % maxpacket);
 	}
-	qtd->hw_token = cpu_to_le32 ((count << 16) | token);
+	qtd->hw_token = cpu_to_hc32(ehci, (count << 16) | token);
 	qtd->length = count;
 
 	return count;
@@ -89,28 +90,28 @@
 	/* writes to an active overlay are unsafe */
 	BUG_ON(qh->qh_state != QH_STATE_IDLE);
 
-	qh->hw_qtd_next = QTD_NEXT (qtd->qtd_dma);
-	qh->hw_alt_next = EHCI_LIST_END;
+	qh->hw_qtd_next = QTD_NEXT(ehci, qtd->qtd_dma);
+	qh->hw_alt_next = EHCI_LIST_END(ehci);
 
 	/* Except for control endpoints, we make hardware maintain data
 	 * toggle (like OHCI) ... here (re)initialize the toggle in the QH,
 	 * and set the pseudo-toggle in udev. Only usb_clear_halt() will
 	 * ever clear it.
 	 */
-	if (!(qh->hw_info1 & cpu_to_le32(1 << 14))) {
+	if (!(qh->hw_info1 & cpu_to_hc32(ehci, 1 << 14))) {
 		unsigned	is_out, epnum;
 
-		is_out = !(qtd->hw_token & cpu_to_le32(1 << 8));
-		epnum = (le32_to_cpup(&qh->hw_info1) >> 8) & 0x0f;
+		is_out = !(qtd->hw_token & cpu_to_hc32(ehci, 1 << 8));
+		epnum = (hc32_to_cpup(ehci, &qh->hw_info1) >> 8) & 0x0f;
 		if (unlikely (!usb_gettoggle (qh->dev, epnum, is_out))) {
-			qh->hw_token &= ~__constant_cpu_to_le32 (QTD_TOGGLE);
+			qh->hw_token &= ~cpu_to_hc32(ehci, QTD_TOGGLE);
 			usb_settoggle (qh->dev, epnum, is_out, 1);
 		}
 	}
 
 	/* HC must see latest qtd and qh data before we clear ACTIVE+HALT */
 	wmb ();
-	qh->hw_token &= __constant_cpu_to_le32 (QTD_TOGGLE | QTD_STS_PING);
+	qh->hw_token &= cpu_to_hc32(ehci, QTD_TOGGLE | QTD_STS_PING);
 }
 
 /* if it weren't for a common silicon quirk (writing the dummy into the qh
@@ -128,7 +129,7 @@
 		qtd = list_entry (qh->qtd_list.next,
 				struct ehci_qtd, qtd_list);
 		/* first qtd may already be partially processed */
-		if (cpu_to_le32 (qtd->qtd_dma) == qh->hw_current)
+		if (cpu_to_hc32(ehci, qtd->qtd_dma) == qh->hw_current)
 			qtd = NULL;
 	}
 
@@ -222,7 +223,7 @@
 		struct ehci_qh	*qh = (struct ehci_qh *) urb->hcpriv;
 
 		/* S-mask in a QH means it's an interrupt urb */
-		if ((qh->hw_info2 & __constant_cpu_to_le32 (QH_SMASK)) != 0) {
+		if ((qh->hw_info2 & cpu_to_hc32(ehci, QH_SMASK)) != 0) {
 
 			/* ... update hc-wide periodic stats (for usbfs) */
 			ehci_to_hcd(ehci)->self.bandwidth_int_reqs--;
@@ -277,7 +278,6 @@
  * Chases up to qh->hw_current.  Returns number of completions called,
  * indicating how much "real" work we did.
  */
-#define HALT_BIT __constant_cpu_to_le32(QTD_STS_HALT)
 static unsigned
 qh_completions (struct ehci_hcd *ehci, struct ehci_qh *qh)
 {
@@ -287,6 +287,7 @@
 	unsigned		count = 0;
 	int			do_status = 0;
 	u8			state;
+	u32			halt = HALT_BIT(ehci);
 
 	if (unlikely (list_empty (&qh->qtd_list)))
 		return count;
@@ -334,7 +335,7 @@
 
 		/* hardware copies qtd out of qh overlay */
 		rmb ();
-		token = le32_to_cpu (qtd->hw_token);
+		token = hc32_to_cpu(ehci, qtd->hw_token);
 
 		/* always clean up qtds the hc de-activated */
 		if ((token & QTD_STS_ACTIVE) == 0) {
@@ -346,7 +347,8 @@
 			 * that silicon quirk can kick in with this dummy too.
 			 */
 			} else if (IS_SHORT_READ (token)
-					&& !(qtd->hw_alt_next & EHCI_LIST_END)) {
+					&& !(qtd->hw_alt_next
+						& EHCI_LIST_END(ehci))) {
 				stopped = 1;
 				goto halt;
 			}
@@ -378,17 +380,17 @@
 
 			/* token in overlay may be most current */
 			if (state == QH_STATE_IDLE
-					&& cpu_to_le32 (qtd->qtd_dma)
+					&& cpu_to_hc32(ehci, qtd->qtd_dma)
 						== qh->hw_current)
-				token = le32_to_cpu (qh->hw_token);
+				token = hc32_to_cpu(ehci, qh->hw_token);
 
 			/* force halt for unlinked or blocked qh, so we'll
 			 * patch the qh later and so that completions can't
 			 * activate it while we "know" it's stopped.
 			 */
-			if ((HALT_BIT & qh->hw_token) == 0) {
+			if ((halt & qh->hw_token) == 0) {
 halt:
-				qh->hw_token |= HALT_BIT;
+				qh->hw_token |= halt;
 				wmb ();
 			}
 		}
@@ -423,7 +425,7 @@
 	 * it after fault cleanup, or recovering from silicon wrongly
 	 * overlaying the dummy qtd (which reduces DMA chatter).
 	 */
-	if (stopped != 0 || qh->hw_qtd_next == EHCI_LIST_END) {
+	if (stopped != 0 || qh->hw_qtd_next == EHCI_LIST_END(ehci)) {
 		switch (state) {
 		case QH_STATE_IDLE:
 			qh_refresh(ehci, qh);
@@ -432,7 +434,7 @@
 			/* should be rare for periodic transfers,
 			 * except maybe high bandwidth ...
 			 */
-			if ((__constant_cpu_to_le32 (QH_SMASK)
+			if ((cpu_to_hc32(ehci, QH_SMASK)
 					& qh->hw_info2) != 0) {
 				intr_deschedule (ehci, qh);
 				(void) qh_schedule (ehci, qh);
@@ -506,8 +508,9 @@
 	is_input = usb_pipein (urb->pipe);
 	if (usb_pipecontrol (urb->pipe)) {
 		/* SETUP pid */
-		qtd_fill (qtd, urb->setup_dma, sizeof (struct usb_ctrlrequest),
-			token | (2 /* "setup" */ << 8), 8);
+		qtd_fill(ehci, qtd, urb->setup_dma,
+				sizeof (struct usb_ctrlrequest),
+				token | (2 /* "setup" */ << 8), 8);
 
 		/* ... and always at least one more pid */
 		token ^= QTD_TOGGLE;
@@ -516,7 +519,7 @@
 		if (unlikely (!qtd))
 			goto cleanup;
 		qtd->urb = urb;
-		qtd_prev->hw_next = QTD_NEXT (qtd->qtd_dma);
+		qtd_prev->hw_next = QTD_NEXT(ehci, qtd->qtd_dma);
 		list_add_tail (&qtd->qtd_list, head);
 
 		/* for zero length DATA stages, STATUS is always IN */
@@ -543,7 +546,7 @@
 	for (;;) {
 		int this_qtd_len;
 
-		this_qtd_len = qtd_fill (qtd, buf, len, token, maxpacket);
+		this_qtd_len = qtd_fill(ehci, qtd, buf, len, token, maxpacket);
 		len -= this_qtd_len;
 		buf += this_qtd_len;
 		if (is_input)
@@ -561,7 +564,7 @@
 		if (unlikely (!qtd))
 			goto cleanup;
 		qtd->urb = urb;
-		qtd_prev->hw_next = QTD_NEXT (qtd->qtd_dma);
+		qtd_prev->hw_next = QTD_NEXT(ehci, qtd->qtd_dma);
 		list_add_tail (&qtd->qtd_list, head);
 	}
 
@@ -570,7 +573,7 @@
 	 */
 	if (likely ((urb->transfer_flags & URB_SHORT_NOT_OK) == 0
 				|| usb_pipecontrol (urb->pipe)))
-		qtd->hw_alt_next = EHCI_LIST_END;
+		qtd->hw_alt_next = EHCI_LIST_END(ehci);
 
 	/*
 	 * control requests may need a terminating data "status" ack;
@@ -594,17 +597,17 @@
 			if (unlikely (!qtd))
 				goto cleanup;
 			qtd->urb = urb;
-			qtd_prev->hw_next = QTD_NEXT (qtd->qtd_dma);
+			qtd_prev->hw_next = QTD_NEXT(ehci, qtd->qtd_dma);
 			list_add_tail (&qtd->qtd_list, head);
 
 			/* never any data in such packets */
-			qtd_fill (qtd, 0, 0, token, 0);
+			qtd_fill(ehci, qtd, 0, 0, token, 0);
 		}
 	}
 
 	/* by default, enable interrupt on urb completion */
 	if (likely (!(urb->transfer_flags & URB_NO_INTERRUPT)))
-		qtd->hw_token |= __constant_cpu_to_le32 (QTD_IOC);
+		qtd->hw_token |= cpu_to_hc32(ehci, QTD_IOC);
 	return head;
 
 cleanup:
@@ -773,8 +776,8 @@
 
 	/* init as live, toggle clear, advance to dummy */
 	qh->qh_state = QH_STATE_IDLE;
-	qh->hw_info1 = cpu_to_le32 (info1);
-	qh->hw_info2 = cpu_to_le32 (info2);
+	qh->hw_info1 = cpu_to_hc32(ehci, info1);
+	qh->hw_info2 = cpu_to_hc32(ehci, info2);
 	usb_settoggle (urb->dev, usb_pipeendpoint (urb->pipe), !is_input, 1);
 	qh_refresh (ehci, qh);
 	return qh;
@@ -786,7 +789,7 @@
 
 static void qh_link_async (struct ehci_hcd *ehci, struct ehci_qh *qh)
 {
-	__le32		dma = QH_NEXT (qh->qh_dma);
+	__hc32		dma = QH_NEXT(ehci, qh->qh_dma);
 	struct ehci_qh	*head;
 
 	/* (re)start the async schedule? */
@@ -824,8 +827,6 @@
 
 /*-------------------------------------------------------------------------*/
 
-#define	QH_ADDR_MASK	__constant_cpu_to_le32(0x7f)
-
 /*
  * For control/bulk/interrupt, return QH with these TDs appended.
  * Allocates and initializes the QH if necessary.
@@ -841,6 +842,7 @@
 )
 {
 	struct ehci_qh		*qh = NULL;
+	u32			qh_addr_mask = cpu_to_hc32(ehci, 0x7f);
 
 	qh = (struct ehci_qh *) *ptr;
 	if (unlikely (qh == NULL)) {
@@ -862,7 +864,7 @@
 
                         /* usb_reset_device() briefly reverts to address 0 */
                         if (usb_pipedevice (urb->pipe) == 0)
-                                qh->hw_info1 &= ~QH_ADDR_MASK;
+                                qh->hw_info1 &= ~qh_addr_mask;
 		}
 
 		/* just one way to queue requests: swap with the dummy qtd.
@@ -871,7 +873,7 @@
 		if (likely (qtd != NULL)) {
 			struct ehci_qtd		*dummy;
 			dma_addr_t		dma;
-			__le32			token;
+			__hc32			token;
 
 			/* to avoid racing the HC, use the dummy td instead of
 			 * the first td of our list (becomes new dummy).  both
@@ -879,7 +881,7 @@
 			 * HC is allowed to fetch the old dummy (4.10.2).
 			 */
 			token = qtd->hw_token;
-			qtd->hw_token = HALT_BIT;
+			qtd->hw_token = HALT_BIT(ehci);
 			wmb ();
 			dummy = qh->dummy;
 
@@ -891,14 +893,14 @@
 			list_add (&dummy->qtd_list, qtd_list);
 			__list_splice (qtd_list, qh->qtd_list.prev);
 
-			ehci_qtd_init (qtd, qtd->qtd_dma);
+			ehci_qtd_init(ehci, qtd, qtd->qtd_dma);
 			qh->dummy = qtd;
 
 			/* hc must see the new dummy at list end */
 			dma = qtd->qtd_dma;
 			qtd = list_entry (qh->qtd_list.prev,
 					struct ehci_qtd, qtd_list);
-			qtd->hw_next = QTD_NEXT (dma);
+			qtd->hw_next = QTD_NEXT(ehci, dma);
 
 			/* let the hc process these next qtds */
 			wmb ();
@@ -974,7 +976,7 @@
 
 	timer_action_done (ehci, TIMER_IAA_WATCHDOG);
 
-	// qh->hw_next = cpu_to_le32 (qh->qh_dma);
+	// qh->hw_next = cpu_to_hc32(qh->qh_dma);
 	qh->qh_state = QH_STATE_IDLE;
 	qh->qh_next.qh = NULL;
 	qh_put (qh);			// refcount from reclaim
diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c
index 500aebb..d4a8ace 100644
--- a/drivers/usb/host/ehci-sched.c
+++ b/drivers/usb/host/ehci-sched.c
@@ -44,9 +44,10 @@
  * @tag: hardware tag for type of this record
  */
 static union ehci_shadow *
-periodic_next_shadow (union ehci_shadow *periodic, __le32 tag)
+periodic_next_shadow(struct ehci_hcd *ehci, union ehci_shadow *periodic,
+		__hc32 tag)
 {
-	switch (tag) {
+	switch (hc32_to_cpu(ehci, tag)) {
 	case Q_TYPE_QH:
 		return &periodic->qh->qh_next;
 	case Q_TYPE_FSTN:
@@ -62,13 +63,14 @@
 /* caller must hold ehci->lock */
 static void periodic_unlink (struct ehci_hcd *ehci, unsigned frame, void *ptr)
 {
-	union ehci_shadow	*prev_p = &ehci->pshadow [frame];
-	__le32			*hw_p = &ehci->periodic [frame];
+	union ehci_shadow	*prev_p = &ehci->pshadow[frame];
+	__hc32			*hw_p = &ehci->periodic[frame];
 	union ehci_shadow	here = *prev_p;
 
 	/* find predecessor of "ptr"; hw and shadow lists are in sync */
 	while (here.ptr && here.ptr != ptr) {
-		prev_p = periodic_next_shadow (prev_p, Q_NEXT_TYPE (*hw_p));
+		prev_p = periodic_next_shadow(ehci, prev_p,
+				Q_NEXT_TYPE(ehci, *hw_p));
 		hw_p = here.hw_next;
 		here = *prev_p;
 	}
@@ -79,7 +81,8 @@
 	/* update shadow and hardware lists ... the old "next" pointers
 	 * from ptr may still be in use, the caller updates them.
 	 */
-	*prev_p = *periodic_next_shadow (&here, Q_NEXT_TYPE (*hw_p));
+	*prev_p = *periodic_next_shadow(ehci, &here,
+			Q_NEXT_TYPE(ehci, *hw_p));
 	*hw_p = *here.hw_next;
 }
 
@@ -87,18 +90,19 @@
 static unsigned short
 periodic_usecs (struct ehci_hcd *ehci, unsigned frame, unsigned uframe)
 {
-	__le32			*hw_p = &ehci->periodic [frame];
+	__hc32			*hw_p = &ehci->periodic [frame];
 	union ehci_shadow	*q = &ehci->pshadow [frame];
 	unsigned		usecs = 0;
 
 	while (q->ptr) {
-		switch (Q_NEXT_TYPE (*hw_p)) {
+		switch (hc32_to_cpu(ehci, Q_NEXT_TYPE(ehci, *hw_p))) {
 		case Q_TYPE_QH:
 			/* is it in the S-mask? */
-			if (q->qh->hw_info2 & cpu_to_le32 (1 << uframe))
+			if (q->qh->hw_info2 & cpu_to_hc32(ehci, 1 << uframe))
 				usecs += q->qh->usecs;
 			/* ... or C-mask? */
-			if (q->qh->hw_info2 & cpu_to_le32 (1 << (8 + uframe)))
+			if (q->qh->hw_info2 & cpu_to_hc32(ehci,
+					1 << (8 + uframe)))
 				usecs += q->qh->c_usecs;
 			hw_p = &q->qh->hw_next;
 			q = &q->qh->qh_next;
@@ -108,7 +112,7 @@
 			/* for "save place" FSTNs, count the relevant INTR
 			 * bandwidth from the previous frame
 			 */
-			if (q->fstn->hw_prev != EHCI_LIST_END) {
+			if (q->fstn->hw_prev != EHCI_LIST_END(ehci)) {
 				ehci_dbg (ehci, "ignoring FSTN cost ...\n");
 			}
 			hw_p = &q->fstn->hw_next;
@@ -121,9 +125,10 @@
 			break;
 		case Q_TYPE_SITD:
 			/* is it in the S-mask?  (count SPLIT, DATA) */
-			if (q->sitd->hw_uframe & cpu_to_le32 (1 << uframe)) {
+			if (q->sitd->hw_uframe & cpu_to_hc32(ehci,
+					1 << uframe)) {
 				if (q->sitd->hw_fullspeed_ep &
-						__constant_cpu_to_le32 (1<<31))
+						cpu_to_hc32(ehci, 1<<31))
 					usecs += q->sitd->stream->usecs;
 				else	/* worst case for OUT start-split */
 					usecs += HS_USECS_ISO (188);
@@ -131,7 +136,7 @@
 
 			/* ... C-mask?  (count CSPLIT, DATA) */
 			if (q->sitd->hw_uframe &
-					cpu_to_le32 (1 << (8 + uframe))) {
+					cpu_to_hc32(ehci, 1 << (8 + uframe))) {
 				/* worst case for IN complete-split */
 				usecs += q->sitd->stream->c_usecs;
 			}
@@ -173,9 +178,9 @@
  * will cause a transfer in "B-frame" uframe 0.  "B-frames" lag
  * "H-frames" by 1 uframe.  See the EHCI spec sec 4.5 and figure 4.7.
  */
-static inline unsigned char tt_start_uframe(struct ehci_hcd *ehci, __le32 mask)
+static inline unsigned char tt_start_uframe(struct ehci_hcd *ehci, __hc32 mask)
 {
-	unsigned char smask = QH_SMASK & le32_to_cpu(mask);
+	unsigned char smask = QH_SMASK & hc32_to_cpu(ehci, mask);
 	if (!smask) {
 		ehci_err(ehci, "invalid empty smask!\n");
 		/* uframe 7 can't have bw so this will indicate failure */
@@ -217,14 +222,14 @@
 	unsigned short tt_usecs[8]
 )
 {
-	__le32			*hw_p = &ehci->periodic [frame];
+	__hc32			*hw_p = &ehci->periodic [frame];
 	union ehci_shadow	*q = &ehci->pshadow [frame];
 	unsigned char		uf;
 
 	memset(tt_usecs, 0, 16);
 
 	while (q->ptr) {
-		switch (Q_NEXT_TYPE(*hw_p)) {
+		switch (hc32_to_cpu(ehci, Q_NEXT_TYPE(ehci, *hw_p))) {
 		case Q_TYPE_ITD:
 			hw_p = &q->itd->hw_next;
 			q = &q->itd->itd_next;
@@ -247,8 +252,8 @@
 			continue;
 		// case Q_TYPE_FSTN:
 		default:
-			ehci_dbg(ehci,
-				  "ignoring periodic frame %d FSTN\n", frame);
+			ehci_dbg(ehci, "ignoring periodic frame %d FSTN\n",
+					frame);
 			hw_p = &q->fstn->hw_next;
 			q = &q->fstn->fstn_next;
 		}
@@ -368,41 +373,42 @@
 	 */
 	for (; frame < ehci->periodic_size; frame += period) {
 		union ehci_shadow	here;
-		__le32			type;
+		__hc32			type;
 
 		here = ehci->pshadow [frame];
-		type = Q_NEXT_TYPE (ehci->periodic [frame]);
+		type = Q_NEXT_TYPE(ehci, ehci->periodic [frame]);
 		while (here.ptr) {
-			switch (type) {
+			switch (hc32_to_cpu(ehci, type)) {
 			case Q_TYPE_ITD:
-				type = Q_NEXT_TYPE (here.itd->hw_next);
+				type = Q_NEXT_TYPE(ehci, here.itd->hw_next);
 				here = here.itd->itd_next;
 				continue;
 			case Q_TYPE_QH:
 				if (same_tt (dev, here.qh->dev)) {
 					u32		mask;
 
-					mask = le32_to_cpu (here.qh->hw_info2);
+					mask = hc32_to_cpu(ehci,
+							here.qh->hw_info2);
 					/* "knows" no gap is needed */
 					mask |= mask >> 8;
 					if (mask & uf_mask)
 						break;
 				}
-				type = Q_NEXT_TYPE (here.qh->hw_next);
+				type = Q_NEXT_TYPE(ehci, here.qh->hw_next);
 				here = here.qh->qh_next;
 				continue;
 			case Q_TYPE_SITD:
 				if (same_tt (dev, here.sitd->urb->dev)) {
 					u16		mask;
 
-					mask = le32_to_cpu (here.sitd
+					mask = hc32_to_cpu(ehci, here.sitd
 								->hw_uframe);
 					/* FIXME assumes no gap for IN! */
 					mask |= mask >> 8;
 					if (mask & uf_mask)
 						break;
 				}
-				type = Q_NEXT_TYPE (here.sitd->hw_next);
+				type = Q_NEXT_TYPE(ehci, here.sitd->hw_next);
 				here = here.sitd->sitd_next;
 				continue;
 			// case Q_TYPE_FSTN:
@@ -475,13 +481,6 @@
 /*-------------------------------------------------------------------------*/
 #ifdef CONFIG_CPU_FREQ
 
-/* ignore/inactivate bit in QH hw_info1 */
-#define INACTIVATE_BIT __constant_cpu_to_le32(QH_INACTIVATE)
-
-#define HALT_BIT __constant_cpu_to_le32(QTD_STS_HALT)
-#define ACTIVE_BIT __constant_cpu_to_le32(QTD_STS_ACTIVE)
-#define STATUS_BIT __constant_cpu_to_le32(QTD_STS_STS)
-
 static int safe_to_modify_i (struct ehci_hcd *ehci, struct ehci_qh *qh)
 {
 	int now; /* current (frame * 8) + uframe */
@@ -492,8 +491,8 @@
 
 	now = readl(&ehci->regs->frame_index) % (ehci->periodic_size << 3);
 
-	next_start = ((1024 << 3) + (qh->start << 3) + start_uframe - now) %
-		     (qh->period << 3);
+	next_start = ((1024 << 3) + (qh->start << 3) + start_uframe - now)
+			% (qh->period << 3);
 	prev_start = (qh->period << 3) - next_start;
 
 	/*
@@ -510,7 +509,7 @@
 	 */
 	if ((next_start > ehci->i_thresh) && (prev_start > 1))
 		/* safe to set "i" bit if split isn't in progress */
-		return (qh->hw_token & STATUS_BIT) ? 0 : 1;
+		return (qh->hw_token & STATUS_BIT(ehci)) ? 0 : 1;
 	else
 		return 0;
 }
@@ -520,12 +519,14 @@
 {
 	struct ehci_qh	*qh;
 	int		not_done, safe;
+	u32		inactivate = INACTIVATE_BIT(ehci);
+	u32		active = ACTIVE_BIT(ehci);
 
 	do {
 		not_done = 0;
 		list_for_each_entry(qh, &ehci->split_intr_qhs,
-				     split_intr_qhs) {
-			if (qh->hw_info1 & INACTIVATE_BIT)
+				split_intr_qhs) {
+			if (qh->hw_info1 & inactivate)
 				/* already off */
 				continue;
 			/*
@@ -539,8 +540,8 @@
 			if (safe == 0) {
 				not_done = 1;
 			} else if (safe > 0) {
-				qh->was_active = qh->hw_token & ACTIVE_BIT;
-				qh->hw_info1 |= INACTIVATE_BIT;
+				qh->was_active = qh->hw_token & active;
+				qh->hw_info1 |= inactivate;
 			}
 		}
 	} while (not_done);
@@ -552,11 +553,14 @@
 	struct ehci_qh	*qh;
 	u32		token;
 	int		not_done, safe;
+	u32		inactivate = INACTIVATE_BIT(ehci);
+	u32		active = ACTIVE_BIT(ehci);
+	u32		halt = HALT_BIT(ehci);
 
 	do {
 		not_done = 0;
 		list_for_each_entry(qh, &ehci->split_intr_qhs, split_intr_qhs) {
-			if (!(qh->hw_info1 & INACTIVATE_BIT)) /* already on */
+			if (!(qh->hw_info1 & inactivate)) /* already on */
 				continue;
 			/*
 			 * Don't reactivate if cached, or controller might
@@ -568,11 +572,11 @@
 			} else if (safe > 0) {
 				/* See EHCI 1.0 section 4.15.2.4. */
 				token = qh->hw_token;
-				qh->hw_token = (token | HALT_BIT) & ~ACTIVE_BIT;
+				qh->hw_token = (token | halt) & ~active;
 				wmb();
-				qh->hw_info1 &= ~INACTIVATE_BIT;
+				qh->hw_info1 &= ~inactivate;
 				wmb();
-				qh->hw_token = (token & ~HALT_BIT) | qh->was_active;
+				qh->hw_token = (token & ~halt) | qh->was_active;
 			}
 		}
 	} while (not_done);
@@ -592,7 +596,7 @@
 
 	dev_dbg (&qh->dev->dev,
 		"link qh%d-%04x/%p start %d [%d/%d us]\n",
-		period, le32_to_cpup (&qh->hw_info2) & (QH_CMASK | QH_SMASK),
+		period, hc32_to_cpup(ehci, &qh->hw_info2) & (QH_CMASK | QH_SMASK),
 		qh, qh->start, qh->usecs, qh->c_usecs);
 
 #ifdef CONFIG_CPU_FREQ
@@ -603,7 +607,7 @@
 	 */
 	if (ehci->cpufreq_changing)
 		if (!(qh->hw_info1 & (cpu_to_le32(1 << 13))))
-			qh->hw_info1 |= INACTIVATE_BIT;
+			qh->hw_info1 |= INACTIVATE_BIT(ehci);
 #endif
 
 	/* high bandwidth, or otherwise every microframe */
@@ -611,17 +615,17 @@
 		period = 1;
 
 	for (i = qh->start; i < ehci->periodic_size; i += period) {
-		union ehci_shadow	*prev = &ehci->pshadow [i];
-		__le32			*hw_p = &ehci->periodic [i];
+		union ehci_shadow	*prev = &ehci->pshadow[i];
+		__hc32			*hw_p = &ehci->periodic[i];
 		union ehci_shadow	here = *prev;
-		__le32			type = 0;
+		__hc32			type = 0;
 
 		/* skip the iso nodes at list head */
 		while (here.ptr) {
-			type = Q_NEXT_TYPE (*hw_p);
-			if (type == Q_TYPE_QH)
+			type = Q_NEXT_TYPE(ehci, *hw_p);
+			if (type == cpu_to_hc32(ehci, Q_TYPE_QH))
 				break;
-			prev = periodic_next_shadow (prev, type);
+			prev = periodic_next_shadow(ehci, prev, type);
 			hw_p = &here.qh->hw_next;
 			here = *prev;
 		}
@@ -643,7 +647,7 @@
 				qh->hw_next = *hw_p;
 			wmb ();
 			prev->qh = qh;
-			*hw_p = QH_NEXT (qh->qh_dma);
+			*hw_p = QH_NEXT (ehci, qh->qh_dma);
 		}
 	}
 	qh->qh_state = QH_STATE_LINKED;
@@ -677,7 +681,7 @@
 	//   and this qh is active in the current uframe
 	//   (and overlay token SplitXstate is false?)
 	// THEN
-	//   qh->hw_info1 |= __constant_cpu_to_le32 (1 << 7 /* "ignore" */);
+	//   qh->hw_info1 |= __constant_cpu_to_hc32(1 << 7 /* "ignore" */);
 
 #ifdef CONFIG_CPU_FREQ
 	/* remove qh from list of low/full speed interrupt QHs */
@@ -701,7 +705,7 @@
 	dev_dbg (&qh->dev->dev,
 		"unlink qh%d-%04x/%p start %d [%d/%d us]\n",
 		qh->period,
-		le32_to_cpup (&qh->hw_info2) & (QH_CMASK | QH_SMASK),
+		hc32_to_cpup(ehci, &qh->hw_info2) & (QH_CMASK | QH_SMASK),
 		qh, qh->start, qh->usecs, qh->c_usecs);
 
 	/* qh->qh_next still "live" to HC */
@@ -727,7 +731,7 @@
 	 * active high speed queues may need bigger delays...
 	 */
 	if (list_empty (&qh->qtd_list)
-			|| (__constant_cpu_to_le32 (QH_CMASK)
+			|| (cpu_to_hc32(ehci, QH_CMASK)
 					& qh->hw_info2) != 0)
 		wait = 2;
 	else
@@ -735,7 +739,7 @@
 
 	udelay (wait);
 	qh->qh_state = QH_STATE_IDLE;
-	qh->hw_next = EHCI_LIST_END;
+	qh->hw_next = EHCI_LIST_END(ehci);
 	wmb ();
 }
 
@@ -792,7 +796,7 @@
 	unsigned		frame,
 	unsigned		uframe,
 	const struct ehci_qh	*qh,
-	__le32			*c_maskp
+	__hc32			*c_maskp
 )
 {
 	int		retval = -ENOSPC;
@@ -824,7 +828,7 @@
 
 		retval = 0;
 
-		*c_maskp = cpu_to_le32 (mask << 8);
+		*c_maskp = cpu_to_hc32(ehci, mask << 8);
 	}
 #else
 	/* Make sure this tt's buffer is also available for CSPLITs.
@@ -835,7 +839,7 @@
 	 * one smart pass...
 	 */
 	mask = 0x03 << (uframe + qh->gap_uf);
-	*c_maskp = cpu_to_le32 (mask << 8);
+	*c_maskp = cpu_to_hc32(ehci, mask << 8);
 
 	mask |= 1 << uframe;
 	if (tt_no_collision (ehci, qh->period, qh->dev, frame, mask)) {
@@ -855,20 +859,20 @@
 /* "first fit" scheduling policy used the first time through,
  * or when the previous schedule slot can't be re-used.
  */
-static int qh_schedule (struct ehci_hcd *ehci, struct ehci_qh *qh)
+static int qh_schedule(struct ehci_hcd *ehci, struct ehci_qh *qh)
 {
 	int		status;
 	unsigned	uframe;
-	__le32		c_mask;
+	__hc32		c_mask;
 	unsigned	frame;		/* 0..(qh->period - 1), or NO_FRAME */
 
 	qh_refresh(ehci, qh);
-	qh->hw_next = EHCI_LIST_END;
+	qh->hw_next = EHCI_LIST_END(ehci);
 	frame = qh->start;
 
 	/* reuse the previous schedule slots, if we can */
 	if (frame < qh->period) {
-		uframe = ffs (le32_to_cpup (&qh->hw_info2) & QH_SMASK);
+		uframe = ffs(hc32_to_cpup(ehci, &qh->hw_info2) & QH_SMASK);
 		status = check_intr_schedule (ehci, frame, --uframe,
 				qh, &c_mask);
 	} else {
@@ -904,10 +908,10 @@
 		qh->start = frame;
 
 		/* reset S-frame and (maybe) C-frame masks */
-		qh->hw_info2 &= __constant_cpu_to_le32(~(QH_CMASK | QH_SMASK));
+		qh->hw_info2 &= cpu_to_hc32(ehci, ~(QH_CMASK | QH_SMASK));
 		qh->hw_info2 |= qh->period
-			? cpu_to_le32 (1 << uframe)
-			: __constant_cpu_to_le32 (QH_SMASK);
+			? cpu_to_hc32(ehci, 1 << uframe)
+			: cpu_to_hc32(ehci, QH_SMASK);
 		qh->hw_info2 |= c_mask;
 	} else
 		ehci_dbg (ehci, "reused qh %p schedule\n", qh);
@@ -937,7 +941,7 @@
 	spin_lock_irqsave (&ehci->lock, flags);
 
 	if (unlikely(!test_bit(HCD_FLAG_HW_ACCESSIBLE,
-			       &ehci_to_hcd(ehci)->flags))) {
+			&ehci_to_hcd(ehci)->flags))) {
 		status = -ESHUTDOWN;
 		goto done;
 	}
@@ -1027,9 +1031,9 @@
 		buf1 |= maxp;
 		maxp *= multi;
 
-		stream->buf0 = cpu_to_le32 ((epnum << 8) | dev->devnum);
-		stream->buf1 = cpu_to_le32 (buf1);
-		stream->buf2 = cpu_to_le32 (multi);
+		stream->buf0 = cpu_to_hc32(ehci, (epnum << 8) | dev->devnum);
+		stream->buf1 = cpu_to_hc32(ehci, buf1);
+		stream->buf2 = cpu_to_hc32(ehci, multi);
 
 		/* usbfs wants to report the average usecs per frame tied up
 		 * when transfers on this endpoint are scheduled ...
@@ -1072,7 +1076,7 @@
 		bandwidth /= 1 << (interval + 2);
 
 		/* stream->splits gets created from raw_mask later */
-		stream->address = cpu_to_le32 (addr);
+		stream->address = cpu_to_hc32(ehci, addr);
 	}
 	stream->bandwidth = bandwidth;
 
@@ -1206,7 +1210,8 @@
 }
 
 static inline void
-itd_sched_init (
+itd_sched_init(
+	struct ehci_hcd		*ehci,
 	struct ehci_iso_sched	*iso_sched,
 	struct ehci_iso_stream	*stream,
 	struct urb		*urb
@@ -1236,7 +1241,7 @@
 				&& !(urb->transfer_flags & URB_NO_INTERRUPT))
 			trans |= EHCI_ITD_IOC;
 		trans |= length << 16;
-		uframe->transaction = cpu_to_le32 (trans);
+		uframe->transaction = cpu_to_hc32(ehci, trans);
 
 		/* might need to cross a buffer page within a uframe */
 		uframe->bufp = (buf & ~(u64)0x0fff);
@@ -1278,7 +1283,7 @@
 	if (unlikely (sched == NULL))
 		return -ENOMEM;
 
-	itd_sched_init (sched, stream, urb);
+	itd_sched_init(ehci, sched, stream, urb);
 
 	if (urb->interval < 8)
 		num_itds = 1 + (sched->span + 7) / 8;
@@ -1296,7 +1301,7 @@
 		/* prefer previously-allocated itds */
 		if (likely (!list_empty(&stream->free_list))) {
 			itd = list_entry (stream->free_list.prev,
-					 struct ehci_itd, itd_list);
+					struct ehci_itd, itd_list);
 			list_del (&itd->itd_list);
 			itd_dma = itd->itd_dma;
 		} else
@@ -1423,7 +1428,7 @@
 		uframe += period_uframes;
 	} while (uframe < mod);
 
-	stream->splits = cpu_to_le32(stream->raw_mask << (uframe & 7));
+	stream->splits = cpu_to_hc32(ehci, stream->raw_mask << (uframe & 7));
 	return 1;
 }
 
@@ -1544,12 +1549,13 @@
 /*-------------------------------------------------------------------------*/
 
 static inline void
-itd_init (struct ehci_iso_stream *stream, struct ehci_itd *itd)
+itd_init(struct ehci_hcd *ehci, struct ehci_iso_stream *stream,
+		struct ehci_itd *itd)
 {
 	int i;
 
 	/* it's been recently zeroed */
-	itd->hw_next = EHCI_LIST_END;
+	itd->hw_next = EHCI_LIST_END(ehci);
 	itd->hw_bufp [0] = stream->buf0;
 	itd->hw_bufp [1] = stream->buf1;
 	itd->hw_bufp [2] = stream->buf2;
@@ -1561,7 +1567,8 @@
 }
 
 static inline void
-itd_patch (
+itd_patch(
+	struct ehci_hcd		*ehci,
 	struct ehci_itd		*itd,
 	struct ehci_iso_sched	*iso_sched,
 	unsigned		index,
@@ -1576,17 +1583,18 @@
 	uframe &= 0x07;
 	itd->index [uframe] = index;
 
-	itd->hw_transaction [uframe] = uf->transaction;
-	itd->hw_transaction [uframe] |= cpu_to_le32 (pg << 12);
-	itd->hw_bufp [pg] |= cpu_to_le32 (uf->bufp & ~(u32)0);
-	itd->hw_bufp_hi [pg] |= cpu_to_le32 ((u32)(uf->bufp >> 32));
+	itd->hw_transaction[uframe] = uf->transaction;
+	itd->hw_transaction[uframe] |= cpu_to_hc32(ehci, pg << 12);
+	itd->hw_bufp[pg] |= cpu_to_hc32(ehci, uf->bufp & ~(u32)0);
+	itd->hw_bufp_hi[pg] |= cpu_to_hc32(ehci, (u32)(uf->bufp >> 32));
 
 	/* iso_frame_desc[].offset must be strictly increasing */
 	if (unlikely (uf->cross)) {
 		u64	bufp = uf->bufp + 4096;
+
 		itd->pg = ++pg;
-		itd->hw_bufp [pg] |= cpu_to_le32 (bufp & ~(u32)0);
-		itd->hw_bufp_hi [pg] |= cpu_to_le32 ((u32)(bufp >> 32));
+		itd->hw_bufp[pg] |= cpu_to_hc32(ehci, bufp & ~(u32)0);
+		itd->hw_bufp_hi[pg] |= cpu_to_hc32(ehci, (u32)(bufp >> 32));
 	}
 }
 
@@ -1599,7 +1607,7 @@
 	ehci->pshadow [frame].itd = itd;
 	itd->frame = frame;
 	wmb ();
-	ehci->periodic [frame] = cpu_to_le32 (itd->itd_dma) | Q_TYPE_ITD;
+	ehci->periodic[frame] = cpu_to_hc32(ehci, itd->itd_dma | Q_TYPE_ITD);
 }
 
 /* fit urb's itds into the selected schedule slot; activate as needed */
@@ -1644,14 +1652,14 @@
 			list_move_tail (&itd->itd_list, &stream->td_list);
 			itd->stream = iso_stream_get (stream);
 			itd->urb = usb_get_urb (urb);
-			itd_init (stream, itd);
+			itd_init (ehci, stream, itd);
 		}
 
 		uframe = next_uframe & 0x07;
 		frame = next_uframe >> 3;
 
 		itd->usecs [uframe] = stream->usecs;
-		itd_patch (itd, iso_sched, packet, uframe);
+		itd_patch(ehci, itd, iso_sched, packet, uframe);
 
 		next_uframe += stream->interval;
 		stream->depth += stream->interval;
@@ -1699,7 +1707,7 @@
 		urb_index = itd->index[uframe];
 		desc = &urb->iso_frame_desc [urb_index];
 
-		t = le32_to_cpup (&itd->hw_transaction [uframe]);
+		t = hc32_to_cpup(ehci, &itd->hw_transaction [uframe]);
 		itd->hw_transaction [uframe] = 0;
 		stream->depth -= stream->interval;
 
@@ -1829,7 +1837,8 @@
  */
 
 static inline void
-sitd_sched_init (
+sitd_sched_init(
+	struct ehci_hcd		*ehci,
 	struct ehci_iso_sched	*iso_sched,
 	struct ehci_iso_stream	*stream,
 	struct urb		*urb
@@ -1858,7 +1867,7 @@
 				&& !(urb->transfer_flags & URB_NO_INTERRUPT))
 			trans |= SITD_IOC;
 		trans |= length << 16;
-		packet->transaction = cpu_to_le32 (trans);
+		packet->transaction = cpu_to_hc32(ehci, trans);
 
 		/* might need to cross a buffer page within a td */
 		packet->bufp = buf;
@@ -1894,7 +1903,7 @@
 	if (iso_sched == NULL)
 		return -ENOMEM;
 
-	sitd_sched_init (iso_sched, stream, urb);
+	sitd_sched_init(ehci, iso_sched, stream, urb);
 
 	/* allocate/init sITDs */
 	spin_lock_irqsave (&ehci->lock, flags);
@@ -1946,7 +1955,8 @@
 /*-------------------------------------------------------------------------*/
 
 static inline void
-sitd_patch (
+sitd_patch(
+	struct ehci_hcd		*ehci,
 	struct ehci_iso_stream	*stream,
 	struct ehci_sitd	*sitd,
 	struct ehci_iso_sched	*iso_sched,
@@ -1956,20 +1966,20 @@
 	struct ehci_iso_packet	*uf = &iso_sched->packet [index];
 	u64			bufp = uf->bufp;
 
-	sitd->hw_next = EHCI_LIST_END;
+	sitd->hw_next = EHCI_LIST_END(ehci);
 	sitd->hw_fullspeed_ep = stream->address;
 	sitd->hw_uframe = stream->splits;
 	sitd->hw_results = uf->transaction;
-	sitd->hw_backpointer = EHCI_LIST_END;
+	sitd->hw_backpointer = EHCI_LIST_END(ehci);
 
 	bufp = uf->bufp;
-	sitd->hw_buf [0] = cpu_to_le32 (bufp);
-	sitd->hw_buf_hi [0] = cpu_to_le32 (bufp >> 32);
+	sitd->hw_buf[0] = cpu_to_hc32(ehci, bufp);
+	sitd->hw_buf_hi[0] = cpu_to_hc32(ehci, bufp >> 32);
 
-	sitd->hw_buf [1] = cpu_to_le32 (uf->buf1);
+	sitd->hw_buf[1] = cpu_to_hc32(ehci, uf->buf1);
 	if (uf->cross)
 		bufp += 4096;
-	sitd->hw_buf_hi [1] = cpu_to_le32 (bufp >> 32);
+	sitd->hw_buf_hi[1] = cpu_to_hc32(ehci, bufp >> 32);
 	sitd->index = index;
 }
 
@@ -1982,7 +1992,7 @@
 	ehci->pshadow [frame].sitd = sitd;
 	sitd->frame = frame;
 	wmb ();
-	ehci->periodic [frame] = cpu_to_le32 (sitd->sitd_dma) | Q_TYPE_SITD;
+	ehci->periodic[frame] = cpu_to_hc32(ehci, sitd->sitd_dma | Q_TYPE_SITD);
 }
 
 /* fit urb's sitds into the selected schedule slot; activate as needed */
@@ -2010,7 +2020,7 @@
 			urb->dev->devpath, stream->bEndpointAddress & 0x0f,
 			(stream->bEndpointAddress & USB_DIR_IN) ? "in" : "out",
 			(next_uframe >> 3) % ehci->periodic_size,
-			stream->interval, le32_to_cpu (stream->splits));
+			stream->interval, hc32_to_cpu(ehci, stream->splits));
 		stream->start = jiffies;
 	}
 	ehci_to_hcd(ehci)->self.bandwidth_isoc_reqs++;
@@ -2031,7 +2041,7 @@
 		sitd->stream = iso_stream_get (stream);
 		sitd->urb = usb_get_urb (urb);
 
-		sitd_patch (stream, sitd, sched, packet);
+		sitd_patch(ehci, stream, sitd, sched, packet);
 		sitd_link (ehci, (next_uframe >> 3) % ehci->periodic_size,
 				sitd);
 
@@ -2069,7 +2079,7 @@
 
 	urb_index = sitd->index;
 	desc = &urb->iso_frame_desc [urb_index];
-	t = le32_to_cpup (&sitd->hw_results);
+	t = hc32_to_cpup(ehci, &sitd->hw_results);
 
 	/* report transfer status */
 	if (t & SITD_ERRS) {
@@ -2224,7 +2234,7 @@
 
 	for (;;) {
 		union ehci_shadow	q, *q_p;
-		__le32			type, *hw_p;
+		__hc32			type, *hw_p;
 		unsigned		uframes;
 
 		/* don't scan past the live uframe */
@@ -2242,7 +2252,7 @@
 		q_p = &ehci->pshadow [frame];
 		hw_p = &ehci->periodic [frame];
 		q.ptr = q_p->ptr;
-		type = Q_NEXT_TYPE (*hw_p);
+		type = Q_NEXT_TYPE(ehci, *hw_p);
 		modified = 0;
 
 		while (q.ptr != NULL) {
@@ -2251,11 +2261,11 @@
 			int			live;
 
 			live = HC_IS_RUNNING (ehci_to_hcd(ehci)->state);
-			switch (type) {
+			switch (hc32_to_cpu(ehci, type)) {
 			case Q_TYPE_QH:
 				/* handle any completions */
 				temp.qh = qh_get (q.qh);
-				type = Q_NEXT_TYPE (q.qh->hw_next);
+				type = Q_NEXT_TYPE(ehci, q.qh->hw_next);
 				q = q.qh->qh_next;
 				modified = qh_completions (ehci, temp.qh);
 				if (unlikely (list_empty (&temp.qh->qtd_list)))
@@ -2266,10 +2276,10 @@
 				/* for "save place" FSTNs, look at QH entries
 				 * in the previous frame for completions.
 				 */
-				if (q.fstn->hw_prev != EHCI_LIST_END) {
+				if (q.fstn->hw_prev != EHCI_LIST_END(ehci)) {
 					dbg ("ignoring completions from FSTNs");
 				}
-				type = Q_NEXT_TYPE (q.fstn->hw_next);
+				type = Q_NEXT_TYPE(ehci, q.fstn->hw_next);
 				q = q.fstn->fstn_next;
 				break;
 			case Q_TYPE_ITD:
@@ -2277,11 +2287,12 @@
 				rmb ();
 				for (uf = live ? uframes : 8; uf < 8; uf++) {
 					if (0 == (q.itd->hw_transaction [uf]
-							& ITD_ACTIVE))
+							& ITD_ACTIVE(ehci)))
 						continue;
 					q_p = &q.itd->itd_next;
 					hw_p = &q.itd->hw_next;
-					type = Q_NEXT_TYPE (q.itd->hw_next);
+					type = Q_NEXT_TYPE(ehci,
+							q.itd->hw_next);
 					q = *q_p;
 					break;
 				}
@@ -2293,23 +2304,24 @@
 				 */
 				*q_p = q.itd->itd_next;
 				*hw_p = q.itd->hw_next;
-				type = Q_NEXT_TYPE (q.itd->hw_next);
+				type = Q_NEXT_TYPE(ehci, q.itd->hw_next);
 				wmb();
 				modified = itd_complete (ehci, q.itd);
 				q = *q_p;
 				break;
 			case Q_TYPE_SITD:
-				if ((q.sitd->hw_results & SITD_ACTIVE)
+				if ((q.sitd->hw_results & SITD_ACTIVE(ehci))
 						&& live) {
 					q_p = &q.sitd->sitd_next;
 					hw_p = &q.sitd->hw_next;
-					type = Q_NEXT_TYPE (q.sitd->hw_next);
+					type = Q_NEXT_TYPE(ehci,
+							q.sitd->hw_next);
 					q = *q_p;
 					break;
 				}
 				*q_p = q.sitd->sitd_next;
 				*hw_p = q.sitd->hw_next;
-				type = Q_NEXT_TYPE (q.sitd->hw_next);
+				type = Q_NEXT_TYPE(ehci, q.sitd->hw_next);
 				wmb();
 				modified = sitd_complete (ehci, q.sitd);
 				q = *q_p;
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index a9ba5d2..79ad2af 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -21,6 +21,22 @@
 
 /* definitions used for the EHCI driver */
 
+/*
+ * __hc32 and __hc16 are "Host Controller" types, they may be equivalent to
+ * __leXX (normally) or __beXX (given EHCI_BIG_ENDIAN_DESC), depending on
+ * the host controller implementation.
+ *
+ * To facilitate the strongest possible byte-order checking from "sparse"
+ * and so on, we use __leXX unless that's not practical.
+ */
+#ifdef CONFIG_USB_EHCI_BIG_ENDIAN_DESC
+typedef __u32 __bitwise __hc32;
+typedef __u16 __bitwise __hc16;
+#else
+#define __hc32	__le32
+#define __hc16	__le16
+#endif
+
 /* statistics can be kept for for tuning/monitoring */
 struct ehci_stats {
 	/* irq usage */
@@ -70,7 +86,7 @@
 	/* periodic schedule support */
 #define	DEFAULT_I_TDPS		1024		/* some HCs can do less */
 	unsigned		periodic_size;
-	__le32			*periodic;	/* hw periodic table */
+	__hc32			*periodic;	/* hw periodic table */
 	dma_addr_t		periodic_dma;
 	unsigned		i_thresh;	/* uframes HC might cache */
 
@@ -103,6 +119,7 @@
 	unsigned		no_selective_suspend:1;
 	unsigned		has_fsl_port_bug:1; /* FreeScale */
 	unsigned		big_endian_mmio:1;
+	unsigned		big_endian_desc:1;
 
 	u8			sbrn;		/* packed release number */
 
@@ -309,7 +326,7 @@
 
 /*-------------------------------------------------------------------------*/
 
-#define	QTD_NEXT(dma)	cpu_to_le32((u32)dma)
+#define	QTD_NEXT(ehci, dma)	cpu_to_hc32(ehci, (u32)dma)
 
 /*
  * EHCI Specification 0.95 Section 3.5
@@ -321,9 +338,9 @@
  */
 struct ehci_qtd {
 	/* first part defined by EHCI spec */
-	__le32			hw_next;	  /* see EHCI 3.5.1 */
-	__le32			hw_alt_next;      /* see EHCI 3.5.2 */
-	__le32			hw_token;         /* see EHCI 3.5.3 */
+	__hc32			hw_next;	/* see EHCI 3.5.1 */
+	__hc32			hw_alt_next;    /* see EHCI 3.5.2 */
+	__hc32			hw_token;       /* see EHCI 3.5.3 */
 #define	QTD_TOGGLE	(1 << 31)	/* data toggle */
 #define	QTD_LENGTH(tok)	(((tok)>>16) & 0x7fff)
 #define	QTD_IOC		(1 << 15)	/* interrupt on complete */
@@ -337,8 +354,13 @@
 #define	QTD_STS_MMF	(1 << 2)	/* incomplete split transaction */
 #define	QTD_STS_STS	(1 << 1)	/* split transaction state */
 #define	QTD_STS_PING	(1 << 0)	/* issue PING? */
-	__le32			hw_buf [5];        /* see EHCI 3.5.4 */
-	__le32			hw_buf_hi [5];        /* Appendix B */
+
+#define ACTIVE_BIT(ehci)	cpu_to_hc32(ehci, QTD_STS_ACTIVE)
+#define HALT_BIT(ehci)		cpu_to_hc32(ehci, QTD_STS_HALT)
+#define STATUS_BIT(ehci)	cpu_to_hc32(ehci, QTD_STS_STS)
+
+	__hc32			hw_buf [5];        /* see EHCI 3.5.4 */
+	__hc32			hw_buf_hi [5];        /* Appendix B */
 
 	/* the rest is HCD-private */
 	dma_addr_t		qtd_dma;		/* qtd address */
@@ -348,26 +370,33 @@
 } __attribute__ ((aligned (32)));
 
 /* mask NakCnt+T in qh->hw_alt_next */
-#define QTD_MASK __constant_cpu_to_le32 (~0x1f)
+#define QTD_MASK(ehci)	cpu_to_hc32 (ehci, ~0x1f)
 
 #define IS_SHORT_READ(token) (QTD_LENGTH (token) != 0 && QTD_PID (token) == 1)
 
 /*-------------------------------------------------------------------------*/
 
 /* type tag from {qh,itd,sitd,fstn}->hw_next */
-#define Q_NEXT_TYPE(dma) ((dma) & __constant_cpu_to_le32 (3 << 1))
+#define Q_NEXT_TYPE(ehci,dma)	((dma) & cpu_to_hc32(ehci, 3 << 1))
 
+/*
+ * Now the following defines are not converted using the
+ * __constant_cpu_to_le32() macro anymore, since we have to support
+ * "dynamic" switching between be and le support, so that the driver
+ * can be used on one system with SoC EHCI controller using big-endian
+ * descriptors as well as a normal little-endian PCI EHCI controller.
+ */
 /* values for that type tag */
-#define Q_TYPE_ITD	__constant_cpu_to_le32 (0 << 1)
-#define Q_TYPE_QH	__constant_cpu_to_le32 (1 << 1)
-#define Q_TYPE_SITD	__constant_cpu_to_le32 (2 << 1)
-#define Q_TYPE_FSTN	__constant_cpu_to_le32 (3 << 1)
+#define Q_TYPE_ITD	(0 << 1)
+#define Q_TYPE_QH	(1 << 1)
+#define Q_TYPE_SITD	(2 << 1)
+#define Q_TYPE_FSTN	(3 << 1)
 
 /* next async queue entry, or pointer to interrupt/periodic QH */
-#define	QH_NEXT(dma)	(cpu_to_le32(((u32)dma)&~0x01f)|Q_TYPE_QH)
+#define QH_NEXT(ehci,dma)	(cpu_to_hc32(ehci, (((u32)dma)&~0x01f)|Q_TYPE_QH))
 
 /* for periodic/async schedules and qtd lists, mark end of list */
-#define	EHCI_LIST_END	__constant_cpu_to_le32(1) /* "null pointer" to hw */
+#define EHCI_LIST_END(ehci)	cpu_to_hc32(ehci, 1) /* "null pointer" to hw */
 
 /*
  * Entries in periodic shadow table are pointers to one of four kinds
@@ -382,7 +411,7 @@
 	struct ehci_itd		*itd;		/* Q_TYPE_ITD */
 	struct ehci_sitd	*sitd;		/* Q_TYPE_SITD */
 	struct ehci_fstn	*fstn;		/* Q_TYPE_FSTN */
-	__le32			*hw_next;	/* (all types) */
+	__hc32			*hw_next;	/* (all types) */
 	void			*ptr;
 };
 
@@ -398,24 +427,27 @@
 
 struct ehci_qh {
 	/* first part defined by EHCI spec */
-	__le32			hw_next;	 /* see EHCI 3.6.1 */
-	__le32			hw_info1;        /* see EHCI 3.6.2 */
+	__hc32			hw_next;	/* see EHCI 3.6.1 */
+	__hc32			hw_info1;       /* see EHCI 3.6.2 */
 #define	QH_HEAD		0x00008000
 #define	QH_INACTIVATE	0x00000080
-	__le32			hw_info2;        /* see EHCI 3.6.2 */
+
+#define INACTIVATE_BIT(ehci)	cpu_to_hc32(ehci, QH_INACTIVATE)
+
+	__hc32			hw_info2;        /* see EHCI 3.6.2 */
 #define	QH_SMASK	0x000000ff
 #define	QH_CMASK	0x0000ff00
 #define	QH_HUBADDR	0x007f0000
 #define	QH_HUBPORT	0x3f800000
 #define	QH_MULT		0xc0000000
-	__le32			hw_current;	 /* qtd list - see EHCI 3.6.4 */
+	__hc32			hw_current;	/* qtd list - see EHCI 3.6.4 */
 
 	/* qtd overlay (hardware parts of a struct ehci_qtd) */
-	__le32			hw_qtd_next;
-	__le32			hw_alt_next;
-	__le32			hw_token;
-	__le32			hw_buf [5];
-	__le32			hw_buf_hi [5];
+	__hc32			hw_qtd_next;
+	__hc32			hw_alt_next;
+	__hc32			hw_token;
+	__hc32			hw_buf [5];
+	__hc32			hw_buf_hi [5];
 
 	/* the rest is HCD-private */
 	dma_addr_t		qh_dma;		/* address of qh */
@@ -456,7 +488,7 @@
 struct ehci_iso_packet {
 	/* These will be copied to iTD when scheduling */
 	u64			bufp;		/* itd->hw_bufp{,_hi}[pg] |= */
-	__le32			transaction;	/* itd->hw_transaction[i] |= */
+	__hc32			transaction;	/* itd->hw_transaction[i] |= */
 	u8			cross;		/* buf crosses pages */
 	/* for full speed OUT splits */
 	u32			buf1;
@@ -478,8 +510,8 @@
  */
 struct ehci_iso_stream {
 	/* first two fields match QH, but info1 == 0 */
-	__le32			hw_next;
-	__le32			hw_info1;
+	__hc32			hw_next;
+	__hc32			hw_info1;
 
 	u32			refcount;
 	u8			bEndpointAddress;
@@ -494,7 +526,7 @@
 	unsigned long		start;		/* jiffies */
 	unsigned long		rescheduled;
 	int			next_uframe;
-	__le32			splits;
+	__hc32			splits;
 
 	/* the rest is derived from the endpoint descriptor,
 	 * trusting urb->interval == f(epdesc->bInterval) and
@@ -508,12 +540,12 @@
 	unsigned		bandwidth;
 
 	/* This is used to initialize iTD's hw_bufp fields */
-	__le32			buf0;
-	__le32			buf1;
-	__le32			buf2;
+	__hc32			buf0;
+	__hc32			buf1;
+	__hc32			buf2;
 
 	/* this is used to initialize sITD's tt info */
-	__le32			address;
+	__hc32			address;
 };
 
 /*-------------------------------------------------------------------------*/
@@ -526,8 +558,8 @@
  */
 struct ehci_itd {
 	/* first part defined by EHCI spec */
-	__le32			hw_next;           /* see EHCI 3.3.1 */
-	__le32			hw_transaction [8]; /* see EHCI 3.3.2 */
+	__hc32			hw_next;           /* see EHCI 3.3.1 */
+	__hc32			hw_transaction [8]; /* see EHCI 3.3.2 */
 #define EHCI_ISOC_ACTIVE        (1<<31)        /* activate transfer this slot */
 #define EHCI_ISOC_BUF_ERR       (1<<30)        /* Data buffer error */
 #define EHCI_ISOC_BABBLE        (1<<29)        /* babble detected */
@@ -535,10 +567,10 @@
 #define	EHCI_ITD_LENGTH(tok)	(((tok)>>16) & 0x0fff)
 #define	EHCI_ITD_IOC		(1 << 15)	/* interrupt on complete */
 
-#define ITD_ACTIVE	__constant_cpu_to_le32(EHCI_ISOC_ACTIVE)
+#define ITD_ACTIVE(ehci)	cpu_to_hc32(ehci, EHCI_ISOC_ACTIVE)
 
-	__le32			hw_bufp [7];	/* see EHCI 3.3.3 */
-	__le32			hw_bufp_hi [7];	/* Appendix B */
+	__hc32			hw_bufp [7];	/* see EHCI 3.3.3 */
+	__hc32			hw_bufp_hi [7];	/* Appendix B */
 
 	/* the rest is HCD-private */
 	dma_addr_t		itd_dma;	/* for this itd */
@@ -565,11 +597,11 @@
  */
 struct ehci_sitd {
 	/* first part defined by EHCI spec */
-	__le32			hw_next;
+	__hc32			hw_next;
 /* uses bit field macros above - see EHCI 0.95 Table 3-8 */
-	__le32			hw_fullspeed_ep;	/* EHCI table 3-9 */
-	__le32			hw_uframe;		/* EHCI table 3-10 */
-	__le32			hw_results;		/* EHCI table 3-11 */
+	__hc32			hw_fullspeed_ep;	/* EHCI table 3-9 */
+	__hc32			hw_uframe;		/* EHCI table 3-10 */
+	__hc32			hw_results;		/* EHCI table 3-11 */
 #define	SITD_IOC	(1 << 31)	/* interrupt on completion */
 #define	SITD_PAGE	(1 << 30)	/* buffer 0/1 */
 #define	SITD_LENGTH(x)	(0x3ff & ((x)>>16))
@@ -581,11 +613,11 @@
 #define	SITD_STS_MMF	(1 << 2)	/* incomplete split transaction */
 #define	SITD_STS_STS	(1 << 1)	/* split transaction state */
 
-#define SITD_ACTIVE	__constant_cpu_to_le32(SITD_STS_ACTIVE)
+#define SITD_ACTIVE(ehci)	cpu_to_hc32(ehci, SITD_STS_ACTIVE)
 
-	__le32			hw_buf [2];		/* EHCI table 3-12 */
-	__le32			hw_backpointer;		/* EHCI table 3-13 */
-	__le32			hw_buf_hi [2];		/* Appendix B */
+	__hc32			hw_buf [2];		/* EHCI table 3-12 */
+	__hc32			hw_backpointer;		/* EHCI table 3-13 */
+	__hc32			hw_buf_hi [2];		/* Appendix B */
 
 	/* the rest is HCD-private */
 	dma_addr_t		sitd_dma;
@@ -610,8 +642,8 @@
  * it hits a "restore" FSTN; then it returns to finish other uframe 0/1 work.
  */
 struct ehci_fstn {
-	__le32			hw_next;	/* any periodic q entry */
-	__le32			hw_prev;	/* qh or EHCI_LIST_END */
+	__hc32			hw_next;	/* any periodic q entry */
+	__hc32			hw_prev;	/* qh or EHCI_LIST_END */
 
 	/* the rest is HCD-private */
 	dma_addr_t		fstn_dma;
@@ -683,8 +715,21 @@
 #define ehci_big_endian_mmio(e)		0
 #endif
 
-static inline unsigned int ehci_readl (const struct ehci_hcd *ehci,
-				       __u32 __iomem * regs)
+/*
+ * Big-endian read/write functions are arch-specific.
+ * Other arches can be added if/when they're needed.
+ *
+ * REVISIT: arch/powerpc now has readl/writel_be, so the
+ * definition below can die once the 4xx support is
+ * finally ported over.
+ */
+#if defined(CONFIG_PPC)
+#define readl_be(addr)		in_be32((__force unsigned *)addr)
+#define writel_be(val, addr)	out_be32((__force unsigned *)addr, val)
+#endif
+
+static inline unsigned int ehci_readl(const struct ehci_hcd *ehci,
+		__u32 __iomem * regs)
 {
 #ifdef CONFIG_USB_EHCI_BIG_ENDIAN_MMIO
 	return ehci_big_endian_mmio(ehci) ?
@@ -695,8 +740,8 @@
 #endif
 }
 
-static inline void ehci_writel (const struct ehci_hcd *ehci,
-				const unsigned int val, __u32 __iomem *regs)
+static inline void ehci_writel(const struct ehci_hcd *ehci,
+		const unsigned int val, __u32 __iomem *regs)
 {
 #ifdef CONFIG_USB_EHCI_BIG_ENDIAN_MMIO
 	ehci_big_endian_mmio(ehci) ?
@@ -709,6 +754,62 @@
 
 /*-------------------------------------------------------------------------*/
 
+/*
+ * The AMCC 440EPx not only implements its EHCI registers in big-endian
+ * format, but also its DMA data structures (descriptors).
+ *
+ * EHCI controllers accessed through PCI work normally (little-endian
+ * everywhere), so we won't bother supporting a BE-only mode for now.
+ */
+#ifdef CONFIG_USB_EHCI_BIG_ENDIAN_DESC
+#define ehci_big_endian_desc(e)		((e)->big_endian_desc)
+
+/* cpu to ehci */
+static inline __hc32 cpu_to_hc32 (const struct ehci_hcd *ehci, const u32 x)
+{
+	return ehci_big_endian_desc(ehci)
+		? (__force __hc32)cpu_to_be32(x)
+		: (__force __hc32)cpu_to_le32(x);
+}
+
+/* ehci to cpu */
+static inline u32 hc32_to_cpu (const struct ehci_hcd *ehci, const __hc32 x)
+{
+	return ehci_big_endian_desc(ehci)
+		? be32_to_cpu((__force __be32)x)
+		: le32_to_cpu((__force __le32)x);
+}
+
+static inline u32 hc32_to_cpup (const struct ehci_hcd *ehci, const __hc32 *x)
+{
+	return ehci_big_endian_desc(ehci)
+		? be32_to_cpup((__force __be32 *)x)
+		: le32_to_cpup((__force __le32 *)x);
+}
+
+#else
+
+/* cpu to ehci */
+static inline __hc32 cpu_to_hc32 (const struct ehci_hcd *ehci, const u32 x)
+{
+	return cpu_to_le32(x);
+}
+
+/* ehci to cpu */
+static inline u32 hc32_to_cpu (const struct ehci_hcd *ehci, const __hc32 x)
+{
+	return le32_to_cpu(x);
+}
+
+static inline u32 hc32_to_cpup (const struct ehci_hcd *ehci, const __hc32 *x)
+{
+	return le32_to_cpup(x);
+}
+
+#endif
+
+/*-------------------------------------------------------------------------*/
+
 #ifndef DEBUG
 #define STUB_DEBUG_FILES
 #endif	/* DEBUG */