USB: xhci: Correct assumptions about number of rings per endpoint.

Much of the xHCI driver code assumes that endpoints only have one ring.
Now an endpoint can have one ring per enabled stream ID, so correct that
assumption.  Use functions that translate the stream_id field in the URB
or the DMA address of a TRB into the correct stream ring.

Correct the polling loop to print out all enabled stream rings.  Make the
URB cancellation routine find the correct stream ring if the URB has
stream_id set.  Make sure the URB enqueueing routine does the same.  Also
correct the code that handles stalled/halted endpoints.

Check that commands and registers that can take stream IDs handle them
properly.  That includes ringing an endpoint doorbell, resetting a
stalled/halted endpoint, and setting a transfer ring dequeue pointer
(since that command can set the dequeue pointer in a stream context or an
endpoint context).

Correct the transfer event handler to translate a TRB DMA address into the
stream ring it was enqueued to.  Make the code to allocate and prepare TD
structures adds the TD to the right td_list for the stream ring.  Make
sure the code to give the first TRB in a TD to the hardware manipulates
the correct stream ring.

When an endpoint stalls, store the stream ID of the stream ring that
stalled in the xhci_virt_ep structure.  Use that instead of the stream ID
in the URB, since an URB may be re-used after it is given back after a
non-control endpoint stall.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/host/xhci-dbg.c b/drivers/usb/host/xhci-dbg.c
index 105fa8b..fcbf4ab 100644
--- a/drivers/usb/host/xhci-dbg.c
+++ b/drivers/usb/host/xhci-dbg.c
@@ -364,6 +364,30 @@
 		xhci_debug_segment(xhci, seg);
 }
 
+void xhci_dbg_ep_rings(struct xhci_hcd *xhci,
+		unsigned int slot_id, unsigned int ep_index,
+		struct xhci_virt_ep *ep)
+{
+	int i;
+	struct xhci_ring *ring;
+
+	if (ep->ep_state & EP_HAS_STREAMS) {
+		for (i = 1; i < ep->stream_info->num_streams; i++) {
+			ring = ep->stream_info->stream_rings[i];
+			xhci_dbg(xhci, "Dev %d endpoint %d stream ID %d:\n",
+				slot_id, ep_index, i);
+			xhci_debug_segment(xhci, ring->deq_seg);
+		}
+	} else {
+		ring = ep->ring;
+		if (!ring)
+			return;
+		xhci_dbg(xhci, "Dev %d endpoint ring %d:\n",
+				slot_id, ep_index);
+		xhci_debug_segment(xhci, ring->deq_seg);
+	}
+}
+
 void xhci_dbg_erst(struct xhci_hcd *xhci, struct xhci_erst *erst)
 {
 	u32 addr = (u32) erst->erst_dma_addr;
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index d299ffa..5711048 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -353,8 +353,19 @@
 				mem_flags, dma);
 }
 
+struct xhci_ring *xhci_dma_to_transfer_ring(
+		struct xhci_virt_ep *ep,
+		u64 address)
+{
+	if (ep->ep_state & EP_HAS_STREAMS)
+		return radix_tree_lookup(&ep->stream_info->trb_address_map,
+				address >> SEGMENT_SHIFT);
+	return ep->ring;
+}
+
+/* Only use this when you know stream_info is valid */
 #ifdef CONFIG_USB_XHCI_HCD_DEBUGGING
-struct xhci_ring *dma_to_stream_ring(
+static struct xhci_ring *dma_to_stream_ring(
 		struct xhci_stream_info *stream_info,
 		u64 address)
 {
@@ -363,6 +374,66 @@
 }
 #endif	/* CONFIG_USB_XHCI_HCD_DEBUGGING */
 
+struct xhci_ring *xhci_stream_id_to_ring(
+		struct xhci_virt_device *dev,
+		unsigned int ep_index,
+		unsigned int stream_id)
+{
+	struct xhci_virt_ep *ep = &dev->eps[ep_index];
+
+	if (stream_id == 0)
+		return ep->ring;
+	if (!ep->stream_info)
+		return NULL;
+
+	if (stream_id > ep->stream_info->num_streams)
+		return NULL;
+	return ep->stream_info->stream_rings[stream_id];
+}
+
+struct xhci_ring *xhci_triad_to_transfer_ring(struct xhci_hcd *xhci,
+		unsigned int slot_id, unsigned int ep_index,
+		unsigned int stream_id)
+{
+	struct xhci_virt_ep *ep;
+
+	ep = &xhci->devs[slot_id]->eps[ep_index];
+	/* Common case: no streams */
+	if (!(ep->ep_state & EP_HAS_STREAMS))
+		return ep->ring;
+
+	if (stream_id == 0) {
+		xhci_warn(xhci,
+				"WARN: Slot ID %u, ep index %u has streams, "
+				"but URB has no stream ID.\n",
+				slot_id, ep_index);
+		return NULL;
+	}
+
+	if (stream_id < ep->stream_info->num_streams)
+		return ep->stream_info->stream_rings[stream_id];
+
+	xhci_warn(xhci,
+			"WARN: Slot ID %u, ep index %u has "
+			"stream IDs 1 to %u allocated, "
+			"but stream ID %u is requested.\n",
+			slot_id, ep_index,
+			ep->stream_info->num_streams - 1,
+			stream_id);
+	return NULL;
+}
+
+/* Get the right ring for the given URB.
+ * If the endpoint supports streams, boundary check the URB's stream ID.
+ * If the endpoint doesn't support streams, return the singular endpoint ring.
+ */
+struct xhci_ring *xhci_urb_to_transfer_ring(struct xhci_hcd *xhci,
+		struct urb *urb)
+{
+	return xhci_triad_to_transfer_ring(xhci, urb->dev->slot_id,
+		xhci_get_endpoint_index(&urb->ep->desc), urb->stream_id);
+}
+
 #ifdef CONFIG_USB_XHCI_HCD_DEBUGGING
 static int xhci_test_radix_tree(struct xhci_hcd *xhci,
 		unsigned int num_streams,
@@ -515,6 +586,7 @@
 		cur_ring = stream_info->stream_rings[cur_stream];
 		if (!cur_ring)
 			goto cleanup_rings;
+		cur_ring->stream_id = cur_stream;
 		/* Set deq ptr, cycle bit, and stream context type */
 		addr = cur_ring->first_seg->dma |
 			SCT_FOR_CTX(SCT_PRI_TR) |
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index a14f657..16ef5fd 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -312,7 +312,8 @@
 
 static void ring_ep_doorbell(struct xhci_hcd *xhci,
 		unsigned int slot_id,
-		unsigned int ep_index)
+		unsigned int ep_index,
+		unsigned int stream_id)
 {
 	struct xhci_virt_ep *ep;
 	unsigned int ep_state;
@@ -331,7 +332,8 @@
 	if (!(ep_state & EP_HALT_PENDING) && !(ep_state & SET_DEQ_PENDING)
 			&& !(ep_state & EP_HALTED)) {
 		field = xhci_readl(xhci, db_addr) & DB_MASK;
-		xhci_writel(xhci, field | EPI_TO_DB(ep_index), db_addr);
+		field |= EPI_TO_DB(ep_index) | STREAM_ID_TO_DB(stream_id);
+		xhci_writel(xhci, field, db_addr);
 		/* Flush PCI posted writes - FIXME Matthew Wilcox says this
 		 * isn't time-critical and we shouldn't make the CPU wait for
 		 * the flush.
@@ -340,6 +342,31 @@
 	}
 }
 
+/* Ring the doorbell for any rings with pending URBs */
+static void ring_doorbell_for_active_rings(struct xhci_hcd *xhci,
+		unsigned int slot_id,
+		unsigned int ep_index)
+{
+	unsigned int stream_id;
+	struct xhci_virt_ep *ep;
+
+	ep = &xhci->devs[slot_id]->eps[ep_index];
+
+	/* A ring has pending URBs if its TD list is not empty */
+	if (!(ep->ep_state & EP_HAS_STREAMS)) {
+		if (!(list_empty(&ep->ring->td_list)))
+			ring_ep_doorbell(xhci, slot_id, ep_index, 0);
+		return;
+	}
+
+	for (stream_id = 1; stream_id < ep->stream_info->num_streams;
+			stream_id++) {
+		struct xhci_stream_info *stream_info = ep->stream_info;
+		if (!list_empty(&stream_info->stream_rings[stream_id]->td_list))
+			ring_ep_doorbell(xhci, slot_id, ep_index, stream_id);
+	}
+}
+
 /*
  * Find the segment that trb is in.  Start searching in start_seg.
  * If we must move past a segment that has a link TRB with a toggle cycle state
@@ -382,14 +409,23 @@
  */
 void xhci_find_new_dequeue_state(struct xhci_hcd *xhci,
 		unsigned int slot_id, unsigned int ep_index,
-		struct xhci_td *cur_td, struct xhci_dequeue_state *state)
+		unsigned int stream_id, struct xhci_td *cur_td,
+		struct xhci_dequeue_state *state)
 {
 	struct xhci_virt_device *dev = xhci->devs[slot_id];
-	struct xhci_ring *ep_ring = dev->eps[ep_index].ring;
+	struct xhci_ring *ep_ring;
 	struct xhci_generic_trb *trb;
 	struct xhci_ep_ctx *ep_ctx;
 	dma_addr_t addr;
 
+	ep_ring = xhci_triad_to_transfer_ring(xhci, slot_id,
+			ep_index, stream_id);
+	if (!ep_ring) {
+		xhci_warn(xhci, "WARN can't find new dequeue state "
+				"for invalid stream ID %u.\n",
+				stream_id);
+		return;
+	}
 	state->new_cycle_state = 0;
 	xhci_dbg(xhci, "Finding segment containing stopped TRB.\n");
 	state->new_deq_seg = find_trb_seg(cur_td->start_seg,
@@ -469,11 +505,13 @@
 }
 
 static int queue_set_tr_deq(struct xhci_hcd *xhci, int slot_id,
-		unsigned int ep_index, struct xhci_segment *deq_seg,
+		unsigned int ep_index, unsigned int stream_id,
+		struct xhci_segment *deq_seg,
 		union xhci_trb *deq_ptr, u32 cycle_state);
 
 void xhci_queue_new_dequeue_state(struct xhci_hcd *xhci,
 		unsigned int slot_id, unsigned int ep_index,
+		unsigned int stream_id,
 		struct xhci_dequeue_state *deq_state)
 {
 	struct xhci_virt_ep *ep = &xhci->devs[slot_id]->eps[ep_index];
@@ -485,7 +523,7 @@
 			deq_state->new_deq_ptr,
 			(unsigned long long)xhci_trb_virt_to_dma(deq_state->new_deq_seg, deq_state->new_deq_ptr),
 			deq_state->new_cycle_state);
-	queue_set_tr_deq(xhci, slot_id, ep_index,
+	queue_set_tr_deq(xhci, slot_id, ep_index, stream_id,
 			deq_state->new_deq_seg,
 			deq_state->new_deq_ptr,
 			(u32) deq_state->new_cycle_state);
@@ -553,11 +591,10 @@
 	slot_id = TRB_TO_SLOT_ID(trb->generic.field[3]);
 	ep_index = TRB_TO_EP_INDEX(trb->generic.field[3]);
 	ep = &xhci->devs[slot_id]->eps[ep_index];
-	ep_ring = ep->ring;
 
 	if (list_empty(&ep->cancelled_td_list)) {
 		xhci_stop_watchdog_timer_in_irq(xhci, ep);
-		ring_ep_doorbell(xhci, slot_id, ep_index);
+		ring_doorbell_for_active_rings(xhci, slot_id, ep_index);
 		return;
 	}
 
@@ -571,15 +608,36 @@
 		xhci_dbg(xhci, "Cancelling TD starting at %p, 0x%llx (dma).\n",
 				cur_td->first_trb,
 				(unsigned long long)xhci_trb_virt_to_dma(cur_td->start_seg, cur_td->first_trb));
+		ep_ring = xhci_urb_to_transfer_ring(xhci, cur_td->urb);
+		if (!ep_ring) {
+			/* This shouldn't happen unless a driver is mucking
+			 * with the stream ID after submission.  This will
+			 * leave the TD on the hardware ring, and the hardware
+			 * will try to execute it, and may access a buffer
+			 * that has already been freed.  In the best case, the
+			 * hardware will execute it, and the event handler will
+			 * ignore the completion event for that TD, since it was
+			 * removed from the td_list for that endpoint.  In
+			 * short, don't muck with the stream ID after
+			 * submission.
+			 */
+			xhci_warn(xhci, "WARN Cancelled URB %p "
+					"has invalid stream ID %u.\n",
+					cur_td->urb,
+					cur_td->urb->stream_id);
+			goto remove_finished_td;
+		}
 		/*
 		 * If we stopped on the TD we need to cancel, then we have to
 		 * move the xHC endpoint ring dequeue pointer past this TD.
 		 */
 		if (cur_td == ep->stopped_td)
-			xhci_find_new_dequeue_state(xhci, slot_id, ep_index, cur_td,
-					&deq_state);
+			xhci_find_new_dequeue_state(xhci, slot_id, ep_index,
+					cur_td->urb->stream_id,
+					cur_td, &deq_state);
 		else
 			td_to_noop(xhci, ep_ring, cur_td);
+remove_finished_td:
 		/*
 		 * The event handler won't see a completion for this TD anymore,
 		 * so remove it from the endpoint ring's TD list.  Keep it in
@@ -593,11 +651,13 @@
 	/* If necessary, queue a Set Transfer Ring Dequeue Pointer command */
 	if (deq_state.new_deq_ptr && deq_state.new_deq_seg) {
 		xhci_queue_new_dequeue_state(xhci,
-				slot_id, ep_index, &deq_state);
+				slot_id, ep_index,
+				ep->stopped_td->urb->stream_id,
+				&deq_state);
 		xhci_ring_cmd_db(xhci);
 	} else {
-		/* Otherwise just ring the doorbell to restart the ring */
-		ring_ep_doorbell(xhci, slot_id, ep_index);
+		/* Otherwise ring the doorbell(s) to restart queued transfers */
+		ring_doorbell_for_active_rings(xhci, slot_id, ep_index);
 	}
 	ep->stopped_td = NULL;
 	ep->stopped_trb = NULL;
@@ -757,6 +817,7 @@
 {
 	unsigned int slot_id;
 	unsigned int ep_index;
+	unsigned int stream_id;
 	struct xhci_ring *ep_ring;
 	struct xhci_virt_device *dev;
 	struct xhci_ep_ctx *ep_ctx;
@@ -764,8 +825,19 @@
 
 	slot_id = TRB_TO_SLOT_ID(trb->generic.field[3]);
 	ep_index = TRB_TO_EP_INDEX(trb->generic.field[3]);
+	stream_id = TRB_TO_STREAM_ID(trb->generic.field[2]);
 	dev = xhci->devs[slot_id];
-	ep_ring = dev->eps[ep_index].ring;
+
+	ep_ring = xhci_stream_id_to_ring(dev, ep_index, stream_id);
+	if (!ep_ring) {
+		xhci_warn(xhci, "WARN Set TR deq ptr command for "
+				"freed stream ID %u\n",
+				stream_id);
+		/* XXX: Harmless??? */
+		dev->eps[ep_index].ep_state &= ~SET_DEQ_PENDING;
+		return;
+	}
+
 	ep_ctx = xhci_get_ep_ctx(xhci, dev->out_ctx, ep_index);
 	slot_ctx = xhci_get_slot_ctx(xhci, dev->out_ctx);
 
@@ -810,7 +882,8 @@
 	}
 
 	dev->eps[ep_index].ep_state &= ~SET_DEQ_PENDING;
-	ring_ep_doorbell(xhci, slot_id, ep_index);
+	/* Restart any rings with pending URBs */
+	ring_doorbell_for_active_rings(xhci, slot_id, ep_index);
 }
 
 static void handle_reset_ep_completion(struct xhci_hcd *xhci,
@@ -819,11 +892,9 @@
 {
 	int slot_id;
 	unsigned int ep_index;
-	struct xhci_ring *ep_ring;
 
 	slot_id = TRB_TO_SLOT_ID(trb->generic.field[3]);
 	ep_index = TRB_TO_EP_INDEX(trb->generic.field[3]);
-	ep_ring = xhci->devs[slot_id]->eps[ep_index].ring;
 	/* This command will only fail if the endpoint wasn't halted,
 	 * but we don't care.
 	 */
@@ -841,9 +912,9 @@
 				false);
 		xhci_ring_cmd_db(xhci);
 	} else {
-		/* Clear our internal halted state and restart the ring */
+		/* Clear our internal halted state and restart the ring(s) */
 		xhci->devs[slot_id]->eps[ep_index].ep_state &= ~EP_HALTED;
-		ring_ep_doorbell(xhci, slot_id, ep_index);
+		ring_doorbell_for_active_rings(xhci, slot_id, ep_index);
 	}
 }
 
@@ -929,8 +1000,10 @@
 		/* Input ctx add_flags are the endpoint index plus one */
 		ep_index = xhci_last_valid_endpoint(ctrl_ctx->add_flags) - 1;
 		/* A usb_set_interface() call directly after clearing a halted
-		 * condition may race on this quirky hardware.
-		 * Not worth worrying about, since this is prototype hardware.
+		 * condition may race on this quirky hardware.  Not worth
+		 * worrying about, since this is prototype hardware.  Not sure
+		 * if this will work for streams, but streams support was
+		 * untested on this prototype.
 		 */
 		if (xhci->quirks & XHCI_RESET_EP_QUIRK &&
 				ep_index != (unsigned int) -1 &&
@@ -943,10 +1016,10 @@
 			xhci_dbg(xhci, "Completed config ep cmd - "
 					"last ep index = %d, state = %d\n",
 					ep_index, ep_state);
-			/* Clear our internal halted state and restart ring */
+			/* Clear internal halted state and restart ring(s) */
 			xhci->devs[slot_id]->eps[ep_index].ep_state &=
 				~EP_HALTED;
-			ring_ep_doorbell(xhci, slot_id, ep_index);
+			ring_doorbell_for_active_rings(xhci, slot_id, ep_index);
 			break;
 		}
 bandwidth_change:
@@ -1079,12 +1152,14 @@
 
 static void xhci_cleanup_halted_endpoint(struct xhci_hcd *xhci,
 		unsigned int slot_id, unsigned int ep_index,
+		unsigned int stream_id,
 		struct xhci_td *td, union xhci_trb *event_trb)
 {
 	struct xhci_virt_ep *ep = &xhci->devs[slot_id]->eps[ep_index];
 	ep->ep_state |= EP_HALTED;
 	ep->stopped_td = td;
 	ep->stopped_trb = event_trb;
+	ep->stopped_stream = stream_id;
 
 	xhci_queue_reset_ep(xhci, slot_id, ep_index);
 	xhci_cleanup_stalled_ring(xhci, td->urb->dev, ep_index);
@@ -1169,10 +1244,11 @@
 	ep_index = TRB_TO_EP_ID(event->flags) - 1;
 	xhci_dbg(xhci, "%s - ep index = %d\n", __func__, ep_index);
 	ep = &xdev->eps[ep_index];
-	ep_ring = ep->ring;
+	ep_ring = xhci_dma_to_transfer_ring(ep, event->buffer);
 	ep_ctx = xhci_get_ep_ctx(xhci, xdev->out_ctx, ep_index);
 	if (!ep_ring || (ep_ctx->ep_info & EP_STATE_MASK) == EP_STATE_DISABLED) {
-		xhci_err(xhci, "ERROR Transfer event pointed to disabled endpoint\n");
+		xhci_err(xhci, "ERROR Transfer event for disabled endpoint "
+				"or incorrect stream ring\n");
 		return -ENODEV;
 	}
 
@@ -1303,7 +1379,7 @@
 				td->urb->actual_length = 0;
 
 			xhci_cleanup_halted_endpoint(xhci,
-					slot_id, ep_index, td, event_trb);
+					slot_id, ep_index, 0, td, event_trb);
 			goto td_cleanup;
 		}
 		/*
@@ -1452,6 +1528,7 @@
 			 */
 			ep->stopped_td = td;
 			ep->stopped_trb = event_trb;
+			ep->stopped_stream = ep_ring->stream_id;
 		} else if (xhci_requires_manual_halt_cleanup(xhci,
 					ep_ctx, trb_comp_code)) {
 			/* Other types of errors halt the endpoint, but the
@@ -1460,7 +1537,7 @@
 			 * xHCI hardware manually.
 			 */
 			xhci_cleanup_halted_endpoint(xhci,
-					slot_id, ep_index, td, event_trb);
+					slot_id, ep_index, ep_ring->stream_id, td, event_trb);
 		} else {
 			/* Update ring dequeue pointer */
 			while (ep_ring->dequeue != td->last_trb)
@@ -1656,14 +1733,24 @@
 static int prepare_transfer(struct xhci_hcd *xhci,
 		struct xhci_virt_device *xdev,
 		unsigned int ep_index,
+		unsigned int stream_id,
 		unsigned int num_trbs,
 		struct urb *urb,
 		struct xhci_td **td,
 		gfp_t mem_flags)
 {
 	int ret;
+	struct xhci_ring *ep_ring;
 	struct xhci_ep_ctx *ep_ctx = xhci_get_ep_ctx(xhci, xdev->out_ctx, ep_index);
-	ret = prepare_ring(xhci, xdev->eps[ep_index].ring,
+
+	ep_ring = xhci_stream_id_to_ring(xdev, ep_index, stream_id);
+	if (!ep_ring) {
+		xhci_dbg(xhci, "Can't prepare ring for bad stream ID %u\n",
+				stream_id);
+		return -EINVAL;
+	}
+
+	ret = prepare_ring(xhci, ep_ring,
 			ep_ctx->ep_info & EP_STATE_MASK,
 			num_trbs, mem_flags);
 	if (ret)
@@ -1683,9 +1770,9 @@
 	(*td)->urb = urb;
 	urb->hcpriv = (void *) (*td);
 	/* Add this TD to the tail of the endpoint ring's TD list */
-	list_add_tail(&(*td)->td_list, &xdev->eps[ep_index].ring->td_list);
-	(*td)->start_seg = xdev->eps[ep_index].ring->enq_seg;
-	(*td)->first_trb = xdev->eps[ep_index].ring->enqueue;
+	list_add_tail(&(*td)->td_list, &ep_ring->td_list);
+	(*td)->start_seg = ep_ring->enq_seg;
+	(*td)->first_trb = ep_ring->enqueue;
 
 	return 0;
 }
@@ -1751,7 +1838,7 @@
 }
 
 static void giveback_first_trb(struct xhci_hcd *xhci, int slot_id,
-		unsigned int ep_index, int start_cycle,
+		unsigned int ep_index, unsigned int stream_id, int start_cycle,
 		struct xhci_generic_trb *start_trb, struct xhci_td *td)
 {
 	/*
@@ -1760,7 +1847,7 @@
 	 */
 	wmb();
 	start_trb->field[3] |= start_cycle;
-	ring_ep_doorbell(xhci, slot_id, ep_index);
+	ring_ep_doorbell(xhci, slot_id, ep_index, stream_id);
 }
 
 /*
@@ -1834,12 +1921,16 @@
 	struct xhci_generic_trb *start_trb;
 	int start_cycle;
 
-	ep_ring = xhci->devs[slot_id]->eps[ep_index].ring;
+	ep_ring = xhci_urb_to_transfer_ring(xhci, urb);
+	if (!ep_ring)
+		return -EINVAL;
+
 	num_trbs = count_sg_trbs_needed(xhci, urb);
 	num_sgs = urb->num_sgs;
 
 	trb_buff_len = prepare_transfer(xhci, xhci->devs[slot_id],
-			ep_index, num_trbs, urb, &td, mem_flags);
+			ep_index, urb->stream_id,
+			num_trbs, urb, &td, mem_flags);
 	if (trb_buff_len < 0)
 		return trb_buff_len;
 	/*
@@ -1948,7 +2039,8 @@
 	} while (running_total < urb->transfer_buffer_length);
 
 	check_trb_math(urb, num_trbs, running_total);
-	giveback_first_trb(xhci, slot_id, ep_index, start_cycle, start_trb, td);
+	giveback_first_trb(xhci, slot_id, ep_index, urb->stream_id,
+			start_cycle, start_trb, td);
 	return 0;
 }
 
@@ -1970,7 +2062,9 @@
 	if (urb->num_sgs)
 		return queue_bulk_sg_tx(xhci, mem_flags, urb, slot_id, ep_index);
 
-	ep_ring = xhci->devs[slot_id]->eps[ep_index].ring;
+	ep_ring = xhci_urb_to_transfer_ring(xhci, urb);
+	if (!ep_ring)
+		return -EINVAL;
 
 	num_trbs = 0;
 	/* How much data is (potentially) left before the 64KB boundary? */
@@ -1997,7 +2091,8 @@
 				(unsigned long long)urb->transfer_dma,
 				num_trbs);
 
-	ret = prepare_transfer(xhci, xhci->devs[slot_id], ep_index,
+	ret = prepare_transfer(xhci, xhci->devs[slot_id],
+			ep_index, urb->stream_id,
 			num_trbs, urb, &td, mem_flags);
 	if (ret < 0)
 		return ret;
@@ -2067,7 +2162,8 @@
 	} while (running_total < urb->transfer_buffer_length);
 
 	check_trb_math(urb, num_trbs, running_total);
-	giveback_first_trb(xhci, slot_id, ep_index, start_cycle, start_trb, td);
+	giveback_first_trb(xhci, slot_id, ep_index, urb->stream_id,
+			start_cycle, start_trb, td);
 	return 0;
 }
 
@@ -2084,7 +2180,9 @@
 	u32 field, length_field;
 	struct xhci_td *td;
 
-	ep_ring = xhci->devs[slot_id]->eps[ep_index].ring;
+	ep_ring = xhci_urb_to_transfer_ring(xhci, urb);
+	if (!ep_ring)
+		return -EINVAL;
 
 	/*
 	 * Need to copy setup packet into setup TRB, so we can't use the setup
@@ -2105,8 +2203,9 @@
 	 */
 	if (urb->transfer_buffer_length > 0)
 		num_trbs++;
-	ret = prepare_transfer(xhci, xhci->devs[slot_id], ep_index, num_trbs,
-			urb, &td, mem_flags);
+	ret = prepare_transfer(xhci, xhci->devs[slot_id],
+			ep_index, urb->stream_id,
+			num_trbs, urb, &td, mem_flags);
 	if (ret < 0)
 		return ret;
 
@@ -2161,7 +2260,8 @@
 			/* Event on completion */
 			field | TRB_IOC | TRB_TYPE(TRB_STATUS) | ep_ring->cycle_state);
 
-	giveback_first_trb(xhci, slot_id, ep_index, start_cycle, start_trb, td);
+	giveback_first_trb(xhci, slot_id, ep_index, 0,
+			start_cycle, start_trb, td);
 	return 0;
 }
 
@@ -2273,12 +2373,14 @@
  * This should not be used for endpoints that have streams enabled.
  */
 static int queue_set_tr_deq(struct xhci_hcd *xhci, int slot_id,
-		unsigned int ep_index, struct xhci_segment *deq_seg,
+		unsigned int ep_index, unsigned int stream_id,
+		struct xhci_segment *deq_seg,
 		union xhci_trb *deq_ptr, u32 cycle_state)
 {
 	dma_addr_t addr;
 	u32 trb_slot_id = SLOT_ID_FOR_TRB(slot_id);
 	u32 trb_ep_index = EP_ID_FOR_TRB(ep_index);
+	u32 trb_stream_id = STREAM_ID_FOR_TRB(stream_id);
 	u32 type = TRB_TYPE(TRB_SET_DEQ);
 
 	addr = xhci_trb_virt_to_dma(deq_seg, deq_ptr);
@@ -2289,7 +2391,7 @@
 		return 0;
 	}
 	return queue_command(xhci, lower_32_bits(addr) | cycle_state,
-			upper_32_bits(addr), 0,
+			upper_32_bits(addr), trb_stream_id,
 			trb_slot_id | trb_ep_index | type, false);
 }
 
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 2e370fe..3cac2ff 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -353,11 +353,7 @@
 		if (!xhci->devs[i])
 			continue;
 		for (j = 0; j < 31; ++j) {
-			struct xhci_ring *ring = xhci->devs[i]->eps[j].ring;
-			if (!ring)
-				continue;
-			xhci_dbg(xhci, "Dev %d endpoint ring %d:\n", i, j);
-			xhci_debug_segment(xhci, ring->deq_seg);
+			xhci_dbg_ep_rings(xhci, i, j, &xhci->devs[i]->eps[j]);
 		}
 	}
 
@@ -839,7 +835,12 @@
 	xhci_debug_ring(xhci, xhci->event_ring);
 	ep_index = xhci_get_endpoint_index(&urb->ep->desc);
 	ep = &xhci->devs[urb->dev->slot_id]->eps[ep_index];
-	ep_ring = ep->ring;
+	ep_ring = xhci_urb_to_transfer_ring(xhci, urb);
+	if (!ep_ring) {
+		ret = -EINVAL;
+		goto done;
+	}
+
 	xhci_dbg(xhci, "Endpoint ring:\n");
 	xhci_debug_ring(xhci, ep_ring);
 	td = (struct xhci_td *) urb->hcpriv;
@@ -1383,7 +1384,7 @@
 	 * or it will attempt to resend it on the next doorbell ring.
 	 */
 	xhci_find_new_dequeue_state(xhci, udev->slot_id,
-			ep_index, ep->stopped_td,
+			ep_index, ep->stopped_stream, ep->stopped_td,
 			&deq_state);
 
 	/* HW with the reset endpoint quirk will use the saved dequeue state to
@@ -1392,10 +1393,12 @@
 	if (!(xhci->quirks & XHCI_RESET_EP_QUIRK)) {
 		xhci_dbg(xhci, "Queueing new dequeue state\n");
 		xhci_queue_new_dequeue_state(xhci, udev->slot_id,
-				ep_index, &deq_state);
+				ep_index, ep->stopped_stream, &deq_state);
 	} else {
 		/* Better hope no one uses the input context between now and the
 		 * reset endpoint completion!
+		 * XXX: No idea how this hardware will react when stream rings
+		 * are enabled.
 		 */
 		xhci_dbg(xhci, "Setting up input context for "
 				"configure endpoint command\n");
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 7a9447c..dada2fb 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -444,6 +444,7 @@
 
 /* Endpoint Target - bits 0:7 */
 #define EPI_TO_DB(p)		(((p) + 1) & 0xff)
+#define STREAM_ID_TO_DB(p)	(((p) & 0xffff) << 16)
 
 
 /**
@@ -714,6 +715,7 @@
 	/* The TRB that was last reported in a stopped endpoint ring */
 	union xhci_trb		*stopped_trb;
 	struct xhci_td		*stopped_td;
+	unsigned int		stopped_stream;
 	/* Watchdog timer for stop endpoint command to cancel URBs */
 	struct timer_list	stop_cmd_timer;
 	int			stop_cmds_pending;
@@ -871,6 +873,10 @@
 #define TRB_TO_EP_INDEX(p)		((((p) & (0x1f << 16)) >> 16) - 1)
 #define	EP_ID_FOR_TRB(p)		((((p) + 1) & 0x1f) << 16)
 
+/* Set TR Dequeue Pointer command TRB fields */
+#define TRB_TO_STREAM_ID(p)		((((p) & (0xffff << 16)) >> 16))
+#define STREAM_ID_FOR_TRB(p)		((((p)) & 0xffff) << 16)
+
 
 /* Port Status Change Event TRB fields */
 /* Port ID - bits 31:24 */
@@ -1040,6 +1046,7 @@
 	 * if we own the TRB (if we are the consumer).  See section 4.9.1.
 	 */
 	u32			cycle_state;
+	unsigned int		stream_id;
 };
 
 struct xhci_erst_entry {
@@ -1265,6 +1272,9 @@
 void xhci_dbg_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx, unsigned int last_ep);
 char *xhci_get_slot_state(struct xhci_hcd *xhci,
 		struct xhci_container_ctx *ctx);
+void xhci_dbg_ep_rings(struct xhci_hcd *xhci,
+		unsigned int slot_id, unsigned int ep_index,
+		struct xhci_virt_ep *ep);
 
 /* xHCI memory management */
 void xhci_mem_cleanup(struct xhci_hcd *xhci);
@@ -1302,6 +1312,18 @@
 void xhci_setup_no_streams_ep_input_ctx(struct xhci_hcd *xhci,
 		struct xhci_ep_ctx *ep_ctx,
 		struct xhci_virt_ep *ep);
+struct xhci_ring *xhci_dma_to_transfer_ring(
+		struct xhci_virt_ep *ep,
+		u64 address);
+struct xhci_ring *xhci_urb_to_transfer_ring(struct xhci_hcd *xhci,
+		struct urb *urb);
+struct xhci_ring *xhci_triad_to_transfer_ring(struct xhci_hcd *xhci,
+		unsigned int slot_id, unsigned int ep_index,
+		unsigned int stream_id);
+struct xhci_ring *xhci_stream_id_to_ring(
+		struct xhci_virt_device *dev,
+		unsigned int ep_index,
+		unsigned int stream_id);
 struct xhci_command *xhci_alloc_command(struct xhci_hcd *xhci,
 		bool allocate_in_ctx, bool allocate_completion,
 		gfp_t mem_flags);
@@ -1374,9 +1396,11 @@
 int xhci_queue_reset_device(struct xhci_hcd *xhci, u32 slot_id);
 void xhci_find_new_dequeue_state(struct xhci_hcd *xhci,
 		unsigned int slot_id, unsigned int ep_index,
-		struct xhci_td *cur_td, struct xhci_dequeue_state *state);
+		unsigned int stream_id, struct xhci_td *cur_td,
+		struct xhci_dequeue_state *state);
 void xhci_queue_new_dequeue_state(struct xhci_hcd *xhci,
 		unsigned int slot_id, unsigned int ep_index,
+		unsigned int stream_id,
 		struct xhci_dequeue_state *deq_state);
 void xhci_cleanup_stalled_ring(struct xhci_hcd *xhci,
 		struct usb_device *udev, unsigned int ep_index);