xhci: Make xHCI driver endian-safe

This patch changes the struct members defining access to xHCI device-visible
memory to use __le32/__le64 where appropriate, and then adds swaps where
required.  Checked with sparse that all accesses are correct.

MMIO accesses use readl/writel so already are performed LE, but prototypes
now reflect this with __le*.

There were a couple of (debug) instances of DMA pointers being truncated to
32bits which have been fixed too.

Signed-off-by: Matt Evans <matt@ozlabs.org>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 7437386..9b1eeb0 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -100,7 +100,7 @@
 		return (trb == &seg->trbs[TRBS_PER_SEGMENT]) &&
 			(seg->next == xhci->event_ring->first_seg);
 	else
-		return trb->link.control & LINK_TOGGLE;
+		return le32_to_cpu(trb->link.control) & LINK_TOGGLE;
 }
 
 /* Is this TRB a link TRB or was the last TRB the last TRB in this event ring
@@ -113,13 +113,15 @@
 	if (ring == xhci->event_ring)
 		return trb == &seg->trbs[TRBS_PER_SEGMENT];
 	else
-		return (trb->link.control & TRB_TYPE_BITMASK) == TRB_TYPE(TRB_LINK);
+		return (le32_to_cpu(trb->link.control) & TRB_TYPE_BITMASK)
+			== TRB_TYPE(TRB_LINK);
 }
 
 static int enqueue_is_link_trb(struct xhci_ring *ring)
 {
 	struct xhci_link_trb *link = &ring->enqueue->link;
-	return ((link->control & TRB_TYPE_BITMASK) == TRB_TYPE(TRB_LINK));
+	return ((le32_to_cpu(link->control) & TRB_TYPE_BITMASK) ==
+		TRB_TYPE(TRB_LINK));
 }
 
 /* Updates trb to point to the next TRB in the ring, and updates seg if the next
@@ -197,7 +199,7 @@
 	union xhci_trb *next;
 	unsigned long long addr;
 
-	chain = ring->enqueue->generic.field[3] & TRB_CHAIN;
+	chain = le32_to_cpu(ring->enqueue->generic.field[3]) & TRB_CHAIN;
 	next = ++(ring->enqueue);
 
 	ring->enq_updates++;
@@ -223,12 +225,14 @@
 				 * (which may mean the chain bit is cleared).
 				 */
 				if (!xhci_link_trb_quirk(xhci)) {
-					next->link.control &= ~TRB_CHAIN;
-					next->link.control |= chain;
+					next->link.control &=
+						cpu_to_le32(~TRB_CHAIN);
+					next->link.control |=
+						cpu_to_le32(chain);
 				}
 				/* Give this link TRB to the hardware */
 				wmb();
-				next->link.control ^= TRB_CYCLE;
+				next->link.control ^= cpu_to_le32(TRB_CYCLE);
 			}
 			/* Toggle the cycle bit after the last ring segment. */
 			if (last_trb_on_last_seg(xhci, ring, ring->enq_seg, next)) {
@@ -319,7 +323,7 @@
 		unsigned int ep_index,
 		unsigned int stream_id)
 {
-	__u32 __iomem *db_addr = &xhci->dba->doorbell[slot_id];
+	__le32 __iomem *db_addr = &xhci->dba->doorbell[slot_id];
 	struct xhci_virt_ep *ep = &xhci->devs[slot_id]->eps[ep_index];
 	unsigned int ep_state = ep->ep_state;
 
@@ -380,7 +384,7 @@
 	while (cur_seg->trbs > trb ||
 			&cur_seg->trbs[TRBS_PER_SEGMENT - 1] < trb) {
 		generic_trb = &cur_seg->trbs[TRBS_PER_SEGMENT - 1].generic;
-		if (generic_trb->field[3] & LINK_TOGGLE)
+		if (le32_to_cpu(generic_trb->field[3]) & LINK_TOGGLE)
 			*cycle_state ^= 0x1;
 		cur_seg = cur_seg->next;
 		if (cur_seg == start_seg)
@@ -447,6 +451,10 @@
  *    any link TRBs with the toggle cycle bit set.
  *  - Finally we move the dequeue state one TRB further, toggling the cycle bit
  *    if we've moved it past a link TRB with the toggle cycle bit set.
+ *
+ * Some of the uses of xhci_generic_trb are grotty, but if they're done
+ * with correct __le32 accesses they should work fine.  Only users of this are
+ * in here.
  */
 void xhci_find_new_dequeue_state(struct xhci_hcd *xhci,
 		unsigned int slot_id, unsigned int ep_index,
@@ -480,7 +488,7 @@
 	/* Dig out the cycle state saved by the xHC during the stop ep cmd */
 	xhci_dbg(xhci, "Finding endpoint context\n");
 	ep_ctx = xhci_get_ep_ctx(xhci, dev->out_ctx, ep_index);
-	state->new_cycle_state = 0x1 & ep_ctx->deq;
+	state->new_cycle_state = 0x1 & le64_to_cpu(ep_ctx->deq);
 
 	state->new_deq_ptr = cur_td->last_trb;
 	xhci_dbg(xhci, "Finding segment containing last TRB in TD.\n");
@@ -493,8 +501,8 @@
 	}
 
 	trb = &state->new_deq_ptr->generic;
-	if ((trb->field[3] & TRB_TYPE_BITMASK) == TRB_TYPE(TRB_LINK) &&
-				(trb->field[3] & LINK_TOGGLE))
+	if ((le32_to_cpu(trb->field[3]) & TRB_TYPE_BITMASK) ==
+	    TRB_TYPE(TRB_LINK) && (le32_to_cpu(trb->field[3]) & LINK_TOGGLE))
 		state->new_cycle_state ^= 0x1;
 	next_trb(xhci, ep_ring, &state->new_deq_seg, &state->new_deq_ptr);
 
@@ -529,12 +537,12 @@
 	for (cur_seg = cur_td->start_seg, cur_trb = cur_td->first_trb;
 			true;
 			next_trb(xhci, ep_ring, &cur_seg, &cur_trb)) {
-		if ((cur_trb->generic.field[3] & TRB_TYPE_BITMASK) ==
-				TRB_TYPE(TRB_LINK)) {
+		if ((le32_to_cpu(cur_trb->generic.field[3]) & TRB_TYPE_BITMASK)
+		    == TRB_TYPE(TRB_LINK)) {
 			/* Unchain any chained Link TRBs, but
 			 * leave the pointers intact.
 			 */
-			cur_trb->generic.field[3] &= ~TRB_CHAIN;
+			cur_trb->generic.field[3] &= cpu_to_le32(~TRB_CHAIN);
 			xhci_dbg(xhci, "Cancel (unchain) link TRB\n");
 			xhci_dbg(xhci, "Address = %p (0x%llx dma); "
 					"in seg %p (0x%llx dma)\n",
@@ -547,8 +555,9 @@
 			cur_trb->generic.field[1] = 0;
 			cur_trb->generic.field[2] = 0;
 			/* Preserve only the cycle bit of this TRB */
-			cur_trb->generic.field[3] &= TRB_CYCLE;
-			cur_trb->generic.field[3] |= TRB_TYPE(TRB_TR_NOOP);
+			cur_trb->generic.field[3] &= cpu_to_le32(TRB_CYCLE);
+			cur_trb->generic.field[3] |= cpu_to_le32(
+				TRB_TYPE(TRB_TR_NOOP));
 			xhci_dbg(xhci, "Cancel TRB %p (0x%llx dma) "
 					"in seg %p (0x%llx dma)\n",
 					cur_trb,
@@ -662,9 +671,9 @@
 	struct xhci_dequeue_state deq_state;
 
 	if (unlikely(TRB_TO_SUSPEND_PORT(
-			xhci->cmd_ring->dequeue->generic.field[3]))) {
+			     le32_to_cpu(xhci->cmd_ring->dequeue->generic.field[3])))) {
 		slot_id = TRB_TO_SLOT_ID(
-			xhci->cmd_ring->dequeue->generic.field[3]);
+			le32_to_cpu(xhci->cmd_ring->dequeue->generic.field[3]));
 		virt_dev = xhci->devs[slot_id];
 		if (virt_dev)
 			handle_cmd_in_cmd_wait_list(xhci, virt_dev,
@@ -677,8 +686,8 @@
 	}
 
 	memset(&deq_state, 0, sizeof(deq_state));
-	slot_id = TRB_TO_SLOT_ID(trb->generic.field[3]);
-	ep_index = TRB_TO_EP_INDEX(trb->generic.field[3]);
+	slot_id = TRB_TO_SLOT_ID(le32_to_cpu(trb->generic.field[3]));
+	ep_index = TRB_TO_EP_INDEX(le32_to_cpu(trb->generic.field[3]));
 	ep = &xhci->devs[slot_id]->eps[ep_index];
 
 	if (list_empty(&ep->cancelled_td_list)) {
@@ -910,9 +919,9 @@
 	struct xhci_ep_ctx *ep_ctx;
 	struct xhci_slot_ctx *slot_ctx;
 
-	slot_id = TRB_TO_SLOT_ID(trb->generic.field[3]);
-	ep_index = TRB_TO_EP_INDEX(trb->generic.field[3]);
-	stream_id = TRB_TO_STREAM_ID(trb->generic.field[2]);
+	slot_id = TRB_TO_SLOT_ID(le32_to_cpu(trb->generic.field[3]));
+	ep_index = TRB_TO_EP_INDEX(le32_to_cpu(trb->generic.field[3]));
+	stream_id = TRB_TO_STREAM_ID(le32_to_cpu(trb->generic.field[2]));
 	dev = xhci->devs[slot_id];
 
 	ep_ring = xhci_stream_id_to_ring(dev, ep_index, stream_id);
@@ -928,11 +937,11 @@
 	ep_ctx = xhci_get_ep_ctx(xhci, dev->out_ctx, ep_index);
 	slot_ctx = xhci_get_slot_ctx(xhci, dev->out_ctx);
 
-	if (GET_COMP_CODE(event->status) != COMP_SUCCESS) {
+	if (GET_COMP_CODE(le32_to_cpu(event->status)) != COMP_SUCCESS) {
 		unsigned int ep_state;
 		unsigned int slot_state;
 
-		switch (GET_COMP_CODE(event->status)) {
+		switch (GET_COMP_CODE(le32_to_cpu(event->status))) {
 		case COMP_TRB_ERR:
 			xhci_warn(xhci, "WARN Set TR Deq Ptr cmd invalid because "
 					"of stream ID configuration\n");
@@ -940,9 +949,9 @@
 		case COMP_CTX_STATE:
 			xhci_warn(xhci, "WARN Set TR Deq Ptr cmd failed due "
 					"to incorrect slot or ep state.\n");
-			ep_state = ep_ctx->ep_info;
+			ep_state = le32_to_cpu(ep_ctx->ep_info);
 			ep_state &= EP_STATE_MASK;
-			slot_state = slot_ctx->dev_state;
+			slot_state = le32_to_cpu(slot_ctx->dev_state);
 			slot_state = GET_SLOT_STATE(slot_state);
 			xhci_dbg(xhci, "Slot state = %u, EP state = %u\n",
 					slot_state, ep_state);
@@ -954,7 +963,7 @@
 		default:
 			xhci_warn(xhci, "WARN Set TR Deq Ptr cmd with unknown "
 					"completion code of %u.\n",
-					GET_COMP_CODE(event->status));
+				  GET_COMP_CODE(le32_to_cpu(event->status)));
 			break;
 		}
 		/* OK what do we do now?  The endpoint state is hosed, and we
@@ -965,10 +974,10 @@
 		 */
 	} else {
 		xhci_dbg(xhci, "Successful Set TR Deq Ptr cmd, deq = @%08llx\n",
-				ep_ctx->deq);
+			 le64_to_cpu(ep_ctx->deq));
 		if (xhci_trb_virt_to_dma(dev->eps[ep_index].queued_deq_seg,
-					dev->eps[ep_index].queued_deq_ptr) ==
-				(ep_ctx->deq & ~(EP_CTX_CYCLE_MASK))) {
+					 dev->eps[ep_index].queued_deq_ptr) ==
+		    (le64_to_cpu(ep_ctx->deq) & ~(EP_CTX_CYCLE_MASK))) {
 			/* Update the ring's dequeue segment and dequeue pointer
 			 * to reflect the new position.
 			 */
@@ -997,13 +1006,13 @@
 	int slot_id;
 	unsigned int ep_index;
 
-	slot_id = TRB_TO_SLOT_ID(trb->generic.field[3]);
-	ep_index = TRB_TO_EP_INDEX(trb->generic.field[3]);
+	slot_id = TRB_TO_SLOT_ID(le32_to_cpu(trb->generic.field[3]));
+	ep_index = TRB_TO_EP_INDEX(le32_to_cpu(trb->generic.field[3]));
 	/* This command will only fail if the endpoint wasn't halted,
 	 * but we don't care.
 	 */
 	xhci_dbg(xhci, "Ignoring reset ep completion code of %u\n",
-			(unsigned int) GET_COMP_CODE(event->status));
+		 (unsigned int) GET_COMP_CODE(le32_to_cpu(event->status)));
 
 	/* HW with the reset endpoint quirk needs to have a configure endpoint
 	 * command complete before the endpoint can be used.  Queue that here
@@ -1040,8 +1049,7 @@
 	if (xhci->cmd_ring->dequeue != command->command_trb)
 		return 0;
 
-	command->status =
-		GET_COMP_CODE(event->status);
+	command->status = GET_COMP_CODE(le32_to_cpu(event->status));
 	list_del(&command->cmd_list);
 	if (command->completion)
 		complete(command->completion);
@@ -1053,7 +1061,7 @@
 static void handle_cmd_completion(struct xhci_hcd *xhci,
 		struct xhci_event_cmd *event)
 {
-	int slot_id = TRB_TO_SLOT_ID(event->flags);
+	int slot_id = TRB_TO_SLOT_ID(le32_to_cpu(event->flags));
 	u64 cmd_dma;
 	dma_addr_t cmd_dequeue_dma;
 	struct xhci_input_control_ctx *ctrl_ctx;
@@ -1062,7 +1070,7 @@
 	struct xhci_ring *ep_ring;
 	unsigned int ep_state;
 
-	cmd_dma = event->cmd_trb;
+	cmd_dma = le64_to_cpu(event->cmd_trb);
 	cmd_dequeue_dma = xhci_trb_virt_to_dma(xhci->cmd_ring->deq_seg,
 			xhci->cmd_ring->dequeue);
 	/* Is the command ring deq ptr out of sync with the deq seg ptr? */
@@ -1075,9 +1083,10 @@
 		xhci->error_bitmask |= 1 << 5;
 		return;
 	}
-	switch (xhci->cmd_ring->dequeue->generic.field[3] & TRB_TYPE_BITMASK) {
+	switch (le32_to_cpu(xhci->cmd_ring->dequeue->generic.field[3])
+		& TRB_TYPE_BITMASK) {
 	case TRB_TYPE(TRB_ENABLE_SLOT):
-		if (GET_COMP_CODE(event->status) == COMP_SUCCESS)
+		if (GET_COMP_CODE(le32_to_cpu(event->status)) == COMP_SUCCESS)
 			xhci->slot_id = slot_id;
 		else
 			xhci->slot_id = 0;
@@ -1102,7 +1111,7 @@
 		ctrl_ctx = xhci_get_input_control_ctx(xhci,
 				virt_dev->in_ctx);
 		/* Input ctx add_flags are the endpoint index plus one */
-		ep_index = xhci_last_valid_endpoint(ctrl_ctx->add_flags) - 1;
+		ep_index = xhci_last_valid_endpoint(le32_to_cpu(ctrl_ctx->add_flags)) - 1;
 		/* A usb_set_interface() call directly after clearing a halted
 		 * condition may race on this quirky hardware.  Not worth
 		 * worrying about, since this is prototype hardware.  Not sure
@@ -1111,8 +1120,8 @@
 		 */
 		if (xhci->quirks & XHCI_RESET_EP_QUIRK &&
 				ep_index != (unsigned int) -1 &&
-				ctrl_ctx->add_flags - SLOT_FLAG ==
-					ctrl_ctx->drop_flags) {
+		    le32_to_cpu(ctrl_ctx->add_flags) - SLOT_FLAG ==
+		    le32_to_cpu(ctrl_ctx->drop_flags)) {
 			ep_ring = xhci->devs[slot_id]->eps[ep_index].ring;
 			ep_state = xhci->devs[slot_id]->eps[ep_index].ep_state;
 			if (!(ep_state & EP_HALTED))
@@ -1129,18 +1138,18 @@
 bandwidth_change:
 		xhci_dbg(xhci, "Completed config ep cmd\n");
 		xhci->devs[slot_id]->cmd_status =
-			GET_COMP_CODE(event->status);
+			GET_COMP_CODE(le32_to_cpu(event->status));
 		complete(&xhci->devs[slot_id]->cmd_completion);
 		break;
 	case TRB_TYPE(TRB_EVAL_CONTEXT):
 		virt_dev = xhci->devs[slot_id];
 		if (handle_cmd_in_cmd_wait_list(xhci, virt_dev, event))
 			break;
-		xhci->devs[slot_id]->cmd_status = GET_COMP_CODE(event->status);
+		xhci->devs[slot_id]->cmd_status = GET_COMP_CODE(le32_to_cpu(event->status));
 		complete(&xhci->devs[slot_id]->cmd_completion);
 		break;
 	case TRB_TYPE(TRB_ADDR_DEV):
-		xhci->devs[slot_id]->cmd_status = GET_COMP_CODE(event->status);
+		xhci->devs[slot_id]->cmd_status = GET_COMP_CODE(le32_to_cpu(event->status));
 		complete(&xhci->addr_dev);
 		break;
 	case TRB_TYPE(TRB_STOP_RING):
@@ -1157,7 +1166,7 @@
 	case TRB_TYPE(TRB_RESET_DEV):
 		xhci_dbg(xhci, "Completed reset device command.\n");
 		slot_id = TRB_TO_SLOT_ID(
-				xhci->cmd_ring->dequeue->generic.field[3]);
+			le32_to_cpu(xhci->cmd_ring->dequeue->generic.field[3]));
 		virt_dev = xhci->devs[slot_id];
 		if (virt_dev)
 			handle_cmd_in_cmd_wait_list(xhci, virt_dev, event);
@@ -1171,8 +1180,8 @@
 			break;
 		}
 		xhci_dbg(xhci, "NEC firmware version %2x.%02x\n",
-				NEC_FW_MAJOR(event->status),
-				NEC_FW_MINOR(event->status));
+			 NEC_FW_MAJOR(le32_to_cpu(event->status)),
+			 NEC_FW_MINOR(le32_to_cpu(event->status)));
 		break;
 	default:
 		/* Skip over unknown commands on the event ring */
@@ -1187,7 +1196,7 @@
 {
 	u32 trb_type;
 
-	trb_type = TRB_FIELD_TO_TYPE(event->generic.field[3]);
+	trb_type = TRB_FIELD_TO_TYPE(le32_to_cpu(event->generic.field[3]));
 	xhci_dbg(xhci, "Vendor specific event TRB type = %u\n", trb_type);
 	if (trb_type == TRB_NEC_CMD_COMP && (xhci->quirks & XHCI_NEC_HOST))
 		handle_cmd_completion(xhci, &event->event_cmd);
@@ -1241,15 +1250,15 @@
 	unsigned int faked_port_index;
 	u8 major_revision;
 	struct xhci_bus_state *bus_state;
-	u32 __iomem **port_array;
+	__le32 __iomem **port_array;
 	bool bogus_port_status = false;
 
 	/* Port status change events always have a successful completion code */
-	if (GET_COMP_CODE(event->generic.field[2]) != COMP_SUCCESS) {
+	if (GET_COMP_CODE(le32_to_cpu(event->generic.field[2])) != COMP_SUCCESS) {
 		xhci_warn(xhci, "WARN: xHC returned failed port status event\n");
 		xhci->error_bitmask |= 1 << 8;
 	}
-	port_id = GET_PORT_ID(event->generic.field[0]);
+	port_id = GET_PORT_ID(le32_to_cpu(event->generic.field[0]));
 	xhci_dbg(xhci, "Port Status Change Event for port %d\n", port_id);
 
 	max_ports = HCS_MAX_PORTS(xhci->hcs_params1);
@@ -1456,7 +1465,7 @@
 		 * endpoint anyway.  Check if a babble halted the
 		 * endpoint.
 		 */
-		if ((ep_ctx->ep_info & EP_STATE_MASK) == EP_STATE_HALTED)
+		if ((le32_to_cpu(ep_ctx->ep_info) & EP_STATE_MASK) == EP_STATE_HALTED)
 			return 1;
 
 	return 0;
@@ -1494,12 +1503,12 @@
 	struct urb_priv	*urb_priv;
 	u32 trb_comp_code;
 
-	slot_id = TRB_TO_SLOT_ID(event->flags);
+	slot_id = TRB_TO_SLOT_ID(le32_to_cpu(event->flags));
 	xdev = xhci->devs[slot_id];
-	ep_index = TRB_TO_EP_ID(event->flags) - 1;
-	ep_ring = xhci_dma_to_transfer_ring(ep, event->buffer);
+	ep_index = TRB_TO_EP_ID(le32_to_cpu(event->flags)) - 1;
+	ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer));
 	ep_ctx = xhci_get_ep_ctx(xhci, xdev->out_ctx, ep_index);
-	trb_comp_code = GET_COMP_CODE(event->transfer_len);
+	trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len));
 
 	if (skip)
 		goto td_cleanup;
@@ -1602,12 +1611,12 @@
 	struct xhci_ep_ctx *ep_ctx;
 	u32 trb_comp_code;
 
-	slot_id = TRB_TO_SLOT_ID(event->flags);
+	slot_id = TRB_TO_SLOT_ID(le32_to_cpu(event->flags));
 	xdev = xhci->devs[slot_id];
-	ep_index = TRB_TO_EP_ID(event->flags) - 1;
-	ep_ring = xhci_dma_to_transfer_ring(ep, event->buffer);
+	ep_index = TRB_TO_EP_ID(le32_to_cpu(event->flags)) - 1;
+	ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer));
 	ep_ctx = xhci_get_ep_ctx(xhci, xdev->out_ctx, ep_index);
-	trb_comp_code = GET_COMP_CODE(event->transfer_len);
+	trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len));
 
 	xhci_debug_trb(xhci, xhci->event_ring->dequeue);
 	switch (trb_comp_code) {
@@ -1646,7 +1655,7 @@
 				event_trb != td->last_trb)
 			td->urb->actual_length =
 				td->urb->transfer_buffer_length
-				- TRB_LEN(event->transfer_len);
+				- TRB_LEN(le32_to_cpu(event->transfer_len));
 		else
 			td->urb->actual_length = 0;
 
@@ -1680,7 +1689,7 @@
 				/* We didn't stop on a link TRB in the middle */
 				td->urb->actual_length =
 					td->urb->transfer_buffer_length -
-					TRB_LEN(event->transfer_len);
+					TRB_LEN(le32_to_cpu(event->transfer_len));
 				xhci_dbg(xhci, "Waiting for status "
 						"stage event\n");
 				return 0;
@@ -1708,8 +1717,8 @@
 	u32 trb_comp_code;
 	bool skip_td = false;
 
-	ep_ring = xhci_dma_to_transfer_ring(ep, event->buffer);
-	trb_comp_code = GET_COMP_CODE(event->transfer_len);
+	ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer));
+	trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len));
 	urb_priv = td->urb->hcpriv;
 	idx = urb_priv->td_cnt;
 	frame = &td->urb->iso_frame_desc[idx];
@@ -1752,15 +1761,14 @@
 		for (cur_trb = ep_ring->dequeue,
 		     cur_seg = ep_ring->deq_seg; cur_trb != event_trb;
 		     next_trb(xhci, ep_ring, &cur_seg, &cur_trb)) {
-			if ((cur_trb->generic.field[3] &
+			if ((le32_to_cpu(cur_trb->generic.field[3]) &
 			 TRB_TYPE_BITMASK) != TRB_TYPE(TRB_TR_NOOP) &&
-			    (cur_trb->generic.field[3] &
+			    (le32_to_cpu(cur_trb->generic.field[3]) &
 			 TRB_TYPE_BITMASK) != TRB_TYPE(TRB_LINK))
-				len +=
-				    TRB_LEN(cur_trb->generic.field[2]);
+				len += TRB_LEN(le32_to_cpu(cur_trb->generic.field[2]));
 		}
-		len += TRB_LEN(cur_trb->generic.field[2]) -
-			TRB_LEN(event->transfer_len);
+		len += TRB_LEN(le32_to_cpu(cur_trb->generic.field[2])) -
+			TRB_LEN(le32_to_cpu(event->transfer_len));
 
 		if (trb_comp_code != COMP_STOP_INVAL) {
 			frame->actual_length = len;
@@ -1815,8 +1823,8 @@
 	struct xhci_segment *cur_seg;
 	u32 trb_comp_code;
 
-	ep_ring = xhci_dma_to_transfer_ring(ep, event->buffer);
-	trb_comp_code = GET_COMP_CODE(event->transfer_len);
+	ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer));
+	trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len));
 
 	switch (trb_comp_code) {
 	case COMP_SUCCESS:
@@ -1852,18 +1860,18 @@
 			"%d bytes untransferred\n",
 			td->urb->ep->desc.bEndpointAddress,
 			td->urb->transfer_buffer_length,
-			TRB_LEN(event->transfer_len));
+		 TRB_LEN(le32_to_cpu(event->transfer_len)));
 	/* Fast path - was this the last TRB in the TD for this URB? */
 	if (event_trb == td->last_trb) {
-		if (TRB_LEN(event->transfer_len) != 0) {
+		if (TRB_LEN(le32_to_cpu(event->transfer_len)) != 0) {
 			td->urb->actual_length =
 				td->urb->transfer_buffer_length -
-				TRB_LEN(event->transfer_len);
+				TRB_LEN(le32_to_cpu(event->transfer_len));
 			if (td->urb->transfer_buffer_length <
 					td->urb->actual_length) {
 				xhci_warn(xhci, "HC gave bad length "
 						"of %d bytes left\n",
-						TRB_LEN(event->transfer_len));
+					  TRB_LEN(le32_to_cpu(event->transfer_len)));
 				td->urb->actual_length = 0;
 				if (td->urb->transfer_flags & URB_SHORT_NOT_OK)
 					*status = -EREMOTEIO;
@@ -1894,20 +1902,20 @@
 		for (cur_trb = ep_ring->dequeue, cur_seg = ep_ring->deq_seg;
 				cur_trb != event_trb;
 				next_trb(xhci, ep_ring, &cur_seg, &cur_trb)) {
-			if ((cur_trb->generic.field[3] &
+			if ((le32_to_cpu(cur_trb->generic.field[3]) &
 			 TRB_TYPE_BITMASK) != TRB_TYPE(TRB_TR_NOOP) &&
-			    (cur_trb->generic.field[3] &
+			    (le32_to_cpu(cur_trb->generic.field[3]) &
 			 TRB_TYPE_BITMASK) != TRB_TYPE(TRB_LINK))
 				td->urb->actual_length +=
-					TRB_LEN(cur_trb->generic.field[2]);
+					TRB_LEN(le32_to_cpu(cur_trb->generic.field[2]));
 		}
 		/* If the ring didn't stop on a Link or No-op TRB, add
 		 * in the actual bytes transferred from the Normal TRB
 		 */
 		if (trb_comp_code != COMP_STOP_INVAL)
 			td->urb->actual_length +=
-				TRB_LEN(cur_trb->generic.field[2]) -
-				TRB_LEN(event->transfer_len);
+				TRB_LEN(le32_to_cpu(cur_trb->generic.field[2])) -
+				TRB_LEN(le32_to_cpu(event->transfer_len));
 	}
 
 	return finish_td(xhci, td, event_trb, event, ep, status, false);
@@ -1937,7 +1945,7 @@
 	u32 trb_comp_code;
 	int ret = 0;
 
-	slot_id = TRB_TO_SLOT_ID(event->flags);
+	slot_id = TRB_TO_SLOT_ID(le32_to_cpu(event->flags));
 	xdev = xhci->devs[slot_id];
 	if (!xdev) {
 		xhci_err(xhci, "ERROR Transfer event pointed to bad slot\n");
@@ -1945,20 +1953,21 @@
 	}
 
 	/* Endpoint ID is 1 based, our index is zero based */
-	ep_index = TRB_TO_EP_ID(event->flags) - 1;
+	ep_index = TRB_TO_EP_ID(le32_to_cpu(event->flags)) - 1;
 	xhci_dbg(xhci, "%s - ep index = %d\n", __func__, ep_index);
 	ep = &xdev->eps[ep_index];
-	ep_ring = xhci_dma_to_transfer_ring(ep, event->buffer);
+	ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer));
 	ep_ctx = xhci_get_ep_ctx(xhci, xdev->out_ctx, ep_index);
 	if (!ep_ring ||
-		(ep_ctx->ep_info & EP_STATE_MASK) == EP_STATE_DISABLED) {
+	    (le32_to_cpu(ep_ctx->ep_info) & EP_STATE_MASK) ==
+	    EP_STATE_DISABLED) {
 		xhci_err(xhci, "ERROR Transfer event for disabled endpoint "
 				"or incorrect stream ring\n");
 		return -ENODEV;
 	}
 
-	event_dma = event->buffer;
-	trb_comp_code = GET_COMP_CODE(event->transfer_len);
+	event_dma = le64_to_cpu(event->buffer);
+	trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len));
 	/* Look for common error cases */
 	switch (trb_comp_code) {
 	/* Skip codes that require special handling depending on
@@ -2011,14 +2020,16 @@
 		if (!list_empty(&ep_ring->td_list))
 			xhci_dbg(xhci, "Underrun Event for slot %d ep %d "
 					"still with TDs queued?\n",
-				TRB_TO_SLOT_ID(event->flags), ep_index);
+				 TRB_TO_SLOT_ID(le32_to_cpu(event->flags)),
+				 ep_index);
 		goto cleanup;
 	case COMP_OVERRUN:
 		xhci_dbg(xhci, "overrun event on endpoint\n");
 		if (!list_empty(&ep_ring->td_list))
 			xhci_dbg(xhci, "Overrun Event for slot %d ep %d "
 					"still with TDs queued?\n",
-				TRB_TO_SLOT_ID(event->flags), ep_index);
+				 TRB_TO_SLOT_ID(le32_to_cpu(event->flags)),
+				 ep_index);
 		goto cleanup;
 	case COMP_MISSED_INT:
 		/*
@@ -2047,9 +2058,11 @@
 		if (list_empty(&ep_ring->td_list)) {
 			xhci_warn(xhci, "WARN Event TRB for slot %d ep %d "
 					"with no TDs queued?\n",
-				  TRB_TO_SLOT_ID(event->flags), ep_index);
+				  TRB_TO_SLOT_ID(le32_to_cpu(event->flags)),
+				  ep_index);
 			xhci_dbg(xhci, "Event TRB with TRB type ID %u\n",
-			  (unsigned int) (event->flags & TRB_TYPE_BITMASK)>>10);
+				 (unsigned int) (le32_to_cpu(event->flags)
+						 & TRB_TYPE_BITMASK)>>10);
 			xhci_print_trb_offsets(xhci, (union xhci_trb *) event);
 			if (ep->skip) {
 				ep->skip = false;
@@ -2092,7 +2105,8 @@
 		 * corresponding TD has been cancelled. Just ignore
 		 * the TD.
 		 */
-		if ((event_trb->generic.field[3] & TRB_TYPE_BITMASK)
+		if ((le32_to_cpu(event_trb->generic.field[3])
+			     & TRB_TYPE_BITMASK)
 				 == TRB_TYPE(TRB_TR_NOOP)) {
 			xhci_dbg(xhci,
 				 "event_trb is a no-op TRB. Skip it\n");
@@ -2172,15 +2186,15 @@
 
 	event = xhci->event_ring->dequeue;
 	/* Does the HC or OS own the TRB? */
-	if ((event->event_cmd.flags & TRB_CYCLE) !=
-			xhci->event_ring->cycle_state) {
+	if ((le32_to_cpu(event->event_cmd.flags) & TRB_CYCLE) !=
+	    xhci->event_ring->cycle_state) {
 		xhci->error_bitmask |= 1 << 2;
 		return;
 	}
 	xhci_dbg(xhci, "%s - OS owns TRB\n", __func__);
 
 	/* FIXME: Handle more event types. */
-	switch ((event->event_cmd.flags & TRB_TYPE_BITMASK)) {
+	switch ((le32_to_cpu(event->event_cmd.flags) & TRB_TYPE_BITMASK)) {
 	case TRB_TYPE(TRB_COMPLETION):
 		xhci_dbg(xhci, "%s - calling handle_cmd_completion\n", __func__);
 		handle_cmd_completion(xhci, &event->event_cmd);
@@ -2202,7 +2216,8 @@
 			update_ptrs = 0;
 		break;
 	default:
-		if ((event->event_cmd.flags & TRB_TYPE_BITMASK) >= TRB_TYPE(48))
+		if ((le32_to_cpu(event->event_cmd.flags) & TRB_TYPE_BITMASK) >=
+		    TRB_TYPE(48))
 			handle_vendor_event(xhci, event);
 		else
 			xhci->error_bitmask |= 1 << 3;
@@ -2252,12 +2267,12 @@
 	xhci_dbg(xhci, "op reg status = %08x\n", status);
 	xhci_dbg(xhci, "Event ring dequeue ptr:\n");
 	xhci_dbg(xhci, "@%llx %08x %08x %08x %08x\n",
-			(unsigned long long)
-			xhci_trb_virt_to_dma(xhci->event_ring->deq_seg, trb),
-			lower_32_bits(trb->link.segment_ptr),
-			upper_32_bits(trb->link.segment_ptr),
-			(unsigned int) trb->link.intr_target,
-			(unsigned int) trb->link.control);
+		 (unsigned long long)
+		 xhci_trb_virt_to_dma(xhci->event_ring->deq_seg, trb),
+		 lower_32_bits(le64_to_cpu(trb->link.segment_ptr)),
+		 upper_32_bits(le64_to_cpu(trb->link.segment_ptr)),
+		 (unsigned int) le32_to_cpu(trb->link.intr_target),
+		 (unsigned int) le32_to_cpu(trb->link.control));
 
 	if (status & STS_FATAL) {
 		xhci_warn(xhci, "WARNING: Host System Error\n");
@@ -2358,10 +2373,10 @@
 	struct xhci_generic_trb *trb;
 
 	trb = &ring->enqueue->generic;
-	trb->field[0] = field1;
-	trb->field[1] = field2;
-	trb->field[2] = field3;
-	trb->field[3] = field4;
+	trb->field[0] = cpu_to_le32(field1);
+	trb->field[1] = cpu_to_le32(field2);
+	trb->field[2] = cpu_to_le32(field3);
+	trb->field[3] = cpu_to_le32(field4);
 	inc_enq(xhci, ring, consumer, more_trbs_coming);
 }
 
@@ -2414,17 +2429,16 @@
 		next = ring->enqueue;
 
 		while (last_trb(xhci, ring, ring->enq_seg, next)) {
-
 			/* If we're not dealing with 0.95 hardware,
 			 * clear the chain bit.
 			 */
 			if (!xhci_link_trb_quirk(xhci))
-				next->link.control &= ~TRB_CHAIN;
+				next->link.control &= cpu_to_le32(~TRB_CHAIN);
 			else
-				next->link.control |= TRB_CHAIN;
+				next->link.control |= cpu_to_le32(TRB_CHAIN);
 
 			wmb();
-			next->link.control ^= (u32) TRB_CYCLE;
+			next->link.control ^= cpu_to_le32((u32) TRB_CYCLE);
 
 			/* Toggle the cycle bit after the last ring segment. */
 			if (last_trb_on_last_seg(xhci, ring, ring->enq_seg, next)) {
@@ -2467,8 +2481,8 @@
 	}
 
 	ret = prepare_ring(xhci, ep_ring,
-			ep_ctx->ep_info & EP_STATE_MASK,
-			num_trbs, mem_flags);
+			   le32_to_cpu(ep_ctx->ep_info) & EP_STATE_MASK,
+			   num_trbs, mem_flags);
 	if (ret)
 		return ret;
 
@@ -2570,9 +2584,9 @@
 	 */
 	wmb();
 	if (start_cycle)
-		start_trb->field[3] |= start_cycle;
+		start_trb->field[3] |= cpu_to_le32(start_cycle);
 	else
-		start_trb->field[3] &= ~0x1;
+		start_trb->field[3] &= cpu_to_le32(~TRB_CYCLE);
 	xhci_ring_ep_doorbell(xhci, slot_id, ep_index, stream_id);
 }
 
@@ -2590,7 +2604,7 @@
 	int xhci_interval;
 	int ep_interval;
 
-	xhci_interval = EP_INTERVAL_TO_UFRAMES(ep_ctx->ep_info);
+	xhci_interval = EP_INTERVAL_TO_UFRAMES(le32_to_cpu(ep_ctx->ep_info));
 	ep_interval = urb->interval;
 	/* Convert to microframes */
 	if (urb->dev->speed == USB_SPEED_LOW ||
@@ -2979,12 +2993,11 @@
 	if (start_cycle == 0)
 		field |= 0x1;
 	queue_trb(xhci, ep_ring, false, true,
-			/* FIXME endianness is probably going to bite my ass here. */
-			setup->bRequestType | setup->bRequest << 8 | setup->wValue << 16,
-			setup->wIndex | setup->wLength << 16,
-			TRB_LEN(8) | TRB_INTR_TARGET(0),
-			/* Immediate data in pointer */
-			field);
+		  setup->bRequestType | setup->bRequest << 8 | le16_to_cpu(setup->wValue) << 16,
+		  le16_to_cpu(setup->wIndex) | le16_to_cpu(setup->wLength) << 16,
+		  TRB_LEN(8) | TRB_INTR_TARGET(0),
+		  /* Immediate data in pointer */
+		  field);
 
 	/* If there's data, queue data TRBs */
 	field = 0;
@@ -3211,8 +3224,8 @@
 	/* Check the ring to guarantee there is enough room for the whole urb.
 	 * Do not insert any td of the urb to the ring if the check failed.
 	 */
-	ret = prepare_ring(xhci, ep_ring, ep_ctx->ep_info & EP_STATE_MASK,
-				num_trbs, mem_flags);
+	ret = prepare_ring(xhci, ep_ring, le32_to_cpu(ep_ctx->ep_info) & EP_STATE_MASK,
+			   num_trbs, mem_flags);
 	if (ret)
 		return ret;
 
@@ -3224,7 +3237,7 @@
 			urb->dev->speed == USB_SPEED_FULL)
 		urb->start_frame >>= 3;
 
-	xhci_interval = EP_INTERVAL_TO_UFRAMES(ep_ctx->ep_info);
+	xhci_interval = EP_INTERVAL_TO_UFRAMES(le32_to_cpu(ep_ctx->ep_info));
 	ep_interval = urb->interval;
 	/* Convert to microframes */
 	if (urb->dev->speed == USB_SPEED_LOW ||