usb: dwc2: host: Get aligned DMA in a more supported way

All other host controllers who want aligned buffers for DMA do it a
certain way.  Let's do that too instead of working behind the USB core's
back.  This makes our interrupt handler not take forever and also rips
out a lot of code, simplifying things a bunch.

This also has the side effect of removing the 65535 max transfer size
limit.

NOTE: The actual code to allocate the aligned buffers is ripped almost
completely from the tegra EHCI driver.  At some point in the future we
may want to add this functionality to the USB core to share more code
everywhere.

Signed-off-by: Douglas Anderson <dianders@chromium.org>
Acked-by: John Youn <johnyoun@synopsys.com>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Tested-by: John Youn <johnyoun@synopsys.com>
Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Felipe Balbi <balbi@kernel.org>
diff --git a/drivers/usb/dwc2/core.c b/drivers/usb/dwc2/core.c
index 46c4ba7..7b69c2a 100644
--- a/drivers/usb/dwc2/core.c
+++ b/drivers/usb/dwc2/core.c
@@ -1958,19 +1958,11 @@
 	}
 
 	if (hsotg->core_params->dma_enable > 0) {
-		dma_addr_t dma_addr;
-
-		if (chan->align_buf) {
-			if (dbg_hc(chan))
-				dev_vdbg(hsotg->dev, "align_buf\n");
-			dma_addr = chan->align_buf;
-		} else {
-			dma_addr = chan->xfer_dma;
-		}
-		dwc2_writel((u32)dma_addr, hsotg->regs + HCDMA(chan->hc_num));
+		dwc2_writel((u32)chan->xfer_dma,
+			    hsotg->regs + HCDMA(chan->hc_num));
 		if (dbg_hc(chan))
 			dev_vdbg(hsotg->dev, "Wrote %08lx to HCDMA(%d)\n",
-				 (unsigned long)dma_addr, chan->hc_num);
+				 (unsigned long)chan->xfer_dma, chan->hc_num);
 	}
 
 	/* Start the split */
@@ -3355,13 +3347,6 @@
 	width = (hwcfg3 & GHWCFG3_XFER_SIZE_CNTR_WIDTH_MASK) >>
 		GHWCFG3_XFER_SIZE_CNTR_WIDTH_SHIFT;
 	hw->max_transfer_size = (1 << (width + 11)) - 1;
-	/*
-	 * Clip max_transfer_size to 65535. dwc2_hc_setup_align_buf() allocates
-	 * coherent buffers with this size, and if it's too large we can
-	 * exhaust the coherent DMA pool.
-	 */
-	if (hw->max_transfer_size > 65535)
-		hw->max_transfer_size = 65535;
 	width = (hwcfg3 & GHWCFG3_PACKET_SIZE_CNTR_WIDTH_MASK) >>
 		GHWCFG3_PACKET_SIZE_CNTR_WIDTH_SHIFT;
 	hw->max_packet_count = (1 << (width + 4)) - 1;
diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index 8847c72..bc4bdbc 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -635,9 +635,9 @@
 	chan->hub_port = (u8)hub_port;
 }
 
-static void *dwc2_hc_init_xfer(struct dwc2_hsotg *hsotg,
-			       struct dwc2_host_chan *chan,
-			       struct dwc2_qtd *qtd, void *bufptr)
+static void dwc2_hc_init_xfer(struct dwc2_hsotg *hsotg,
+			      struct dwc2_host_chan *chan,
+			      struct dwc2_qtd *qtd)
 {
 	struct dwc2_hcd_urb *urb = qtd->urb;
 	struct dwc2_hcd_iso_packet_desc *frame_desc;
@@ -657,7 +657,6 @@
 			else
 				chan->xfer_buf = urb->setup_packet;
 			chan->xfer_len = 8;
-			bufptr = NULL;
 			break;
 
 		case DWC2_CONTROL_DATA:
@@ -684,7 +683,6 @@
 				chan->xfer_dma = hsotg->status_buf_dma;
 			else
 				chan->xfer_buf = hsotg->status_buf;
-			bufptr = NULL;
 			break;
 		}
 		break;
@@ -717,14 +715,6 @@
 
 		chan->xfer_len = frame_desc->length - qtd->isoc_split_offset;
 
-		/* For non-dword aligned buffers */
-		if (hsotg->core_params->dma_enable > 0 &&
-		    (chan->xfer_dma & 0x3))
-			bufptr = (u8 *)urb->buf + frame_desc->offset +
-					qtd->isoc_split_offset;
-		else
-			bufptr = NULL;
-
 		if (chan->xact_pos == DWC2_HCSPLT_XACTPOS_ALL) {
 			if (chan->xfer_len <= 188)
 				chan->xact_pos = DWC2_HCSPLT_XACTPOS_ALL;
@@ -733,63 +723,93 @@
 		}
 		break;
 	}
-
-	return bufptr;
 }
 
-static int dwc2_hc_setup_align_buf(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh,
-				   struct dwc2_host_chan *chan,
-				   struct dwc2_hcd_urb *urb, void *bufptr)
+#define DWC2_USB_DMA_ALIGN 4
+
+struct dma_aligned_buffer {
+	void *kmalloc_ptr;
+	void *old_xfer_buffer;
+	u8 data[0];
+};
+
+static void dwc2_free_dma_aligned_buffer(struct urb *urb)
 {
-	u32 buf_size;
-	struct urb *usb_urb;
-	struct usb_hcd *hcd;
+	struct dma_aligned_buffer *temp;
 
-	if (!qh->dw_align_buf) {
-		if (chan->ep_type != USB_ENDPOINT_XFER_ISOC)
-			buf_size = hsotg->core_params->max_transfer_size;
-		else
-			/* 3072 = 3 max-size Isoc packets */
-			buf_size = 3072;
+	if (!(urb->transfer_flags & URB_ALIGNED_TEMP_BUFFER))
+		return;
 
-		qh->dw_align_buf = kmalloc(buf_size, GFP_ATOMIC | GFP_DMA);
-		if (!qh->dw_align_buf)
-			return -ENOMEM;
-		qh->dw_align_buf_size = buf_size;
-	}
+	temp = container_of(urb->transfer_buffer,
+		struct dma_aligned_buffer, data);
 
-	if (chan->xfer_len) {
-		dev_vdbg(hsotg->dev, "%s(): non-aligned buffer\n", __func__);
-		usb_urb = urb->priv;
+	if (usb_urb_dir_in(urb))
+		memcpy(temp->old_xfer_buffer, temp->data,
+		       urb->transfer_buffer_length);
+	urb->transfer_buffer = temp->old_xfer_buffer;
+	kfree(temp->kmalloc_ptr);
 
-		if (usb_urb) {
-			if (usb_urb->transfer_flags &
-			    (URB_SETUP_MAP_SINGLE | URB_DMA_MAP_SG |
-			     URB_DMA_MAP_PAGE | URB_DMA_MAP_SINGLE)) {
-				hcd = dwc2_hsotg_to_hcd(hsotg);
-				usb_hcd_unmap_urb_for_dma(hcd, usb_urb);
-			}
-			if (!chan->ep_is_in)
-				memcpy(qh->dw_align_buf, bufptr,
-				       chan->xfer_len);
-		} else {
-			dev_warn(hsotg->dev, "no URB in dwc2_urb\n");
-		}
-	}
+	urb->transfer_flags &= ~URB_ALIGNED_TEMP_BUFFER;
+}
 
-	qh->dw_align_buf_dma = dma_map_single(hsotg->dev,
-			qh->dw_align_buf, qh->dw_align_buf_size,
-			chan->ep_is_in ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
-	if (dma_mapping_error(hsotg->dev, qh->dw_align_buf_dma)) {
-		dev_err(hsotg->dev, "can't map align_buf\n");
-		chan->align_buf = 0;
-		return -EINVAL;
-	}
+static int dwc2_alloc_dma_aligned_buffer(struct urb *urb, gfp_t mem_flags)
+{
+	struct dma_aligned_buffer *temp, *kmalloc_ptr;
+	size_t kmalloc_size;
 
-	chan->align_buf = qh->dw_align_buf_dma;
+	if (urb->num_sgs || urb->sg ||
+	    urb->transfer_buffer_length == 0 ||
+	    !((uintptr_t)urb->transfer_buffer & (DWC2_USB_DMA_ALIGN - 1)))
+		return 0;
+
+	/* Allocate a buffer with enough padding for alignment */
+	kmalloc_size = urb->transfer_buffer_length +
+		sizeof(struct dma_aligned_buffer) + DWC2_USB_DMA_ALIGN - 1;
+
+	kmalloc_ptr = kmalloc(kmalloc_size, mem_flags);
+	if (!kmalloc_ptr)
+		return -ENOMEM;
+
+	/* Position our struct dma_aligned_buffer such that data is aligned */
+	temp = PTR_ALIGN(kmalloc_ptr + 1, DWC2_USB_DMA_ALIGN) - 1;
+	temp->kmalloc_ptr = kmalloc_ptr;
+	temp->old_xfer_buffer = urb->transfer_buffer;
+	if (usb_urb_dir_out(urb))
+		memcpy(temp->data, urb->transfer_buffer,
+		       urb->transfer_buffer_length);
+	urb->transfer_buffer = temp->data;
+
+	urb->transfer_flags |= URB_ALIGNED_TEMP_BUFFER;
+
 	return 0;
 }
 
+static int dwc2_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
+				      gfp_t mem_flags)
+{
+	int ret;
+
+	/* We assume setup_dma is always aligned; warn if not */
+	WARN_ON_ONCE(urb->setup_dma &&
+		     (urb->setup_dma & (DWC2_USB_DMA_ALIGN - 1)));
+
+	ret = dwc2_alloc_dma_aligned_buffer(urb, mem_flags);
+	if (ret)
+		return ret;
+
+	ret = usb_hcd_map_urb_for_dma(hcd, urb, mem_flags);
+	if (ret)
+		dwc2_free_dma_aligned_buffer(urb);
+
+	return ret;
+}
+
+static void dwc2_unmap_urb_for_dma(struct usb_hcd *hcd, struct urb *urb)
+{
+	usb_hcd_unmap_urb_for_dma(hcd, urb);
+	dwc2_free_dma_aligned_buffer(urb);
+}
+
 /**
  * dwc2_assign_and_init_hc() - Assigns transactions from a QTD to a free host
  * channel and initializes the host channel to perform the transactions. The
@@ -804,7 +824,6 @@
 	struct dwc2_host_chan *chan;
 	struct dwc2_hcd_urb *urb;
 	struct dwc2_qtd *qtd;
-	void *bufptr = NULL;
 
 	if (dbg_qh(qh))
 		dev_vdbg(hsotg->dev, "%s(%p,%p)\n", __func__, hsotg, qh);
@@ -866,16 +885,10 @@
 		!dwc2_hcd_is_pipe_in(&urb->pipe_info))
 		urb->actual_length = urb->length;
 
-	if (hsotg->core_params->dma_enable > 0) {
+	if (hsotg->core_params->dma_enable > 0)
 		chan->xfer_dma = urb->dma + urb->actual_length;
-
-		/* For non-dword aligned case */
-		if (hsotg->core_params->dma_desc_enable <= 0 &&
-		    (chan->xfer_dma & 0x3))
-			bufptr = (u8 *)urb->buf + urb->actual_length;
-	} else {
+	else
 		chan->xfer_buf = (u8 *)urb->buf + urb->actual_length;
-	}
 
 	chan->xfer_len = urb->length - urb->actual_length;
 	chan->xfer_count = 0;
@@ -887,27 +900,7 @@
 		chan->do_split = 0;
 
 	/* Set the transfer attributes */
-	bufptr = dwc2_hc_init_xfer(hsotg, chan, qtd, bufptr);
-
-	/* Non DWORD-aligned buffer case */
-	if (bufptr) {
-		dev_vdbg(hsotg->dev, "Non-aligned buffer\n");
-		if (dwc2_hc_setup_align_buf(hsotg, qh, chan, urb, bufptr)) {
-			dev_err(hsotg->dev,
-				"%s: Failed to allocate memory to handle non-dword aligned buffer\n",
-				__func__);
-			/* Add channel back to free list */
-			chan->align_buf = 0;
-			chan->multi_count = 0;
-			list_add_tail(&chan->hc_list_entry,
-				      &hsotg->free_hc_list);
-			qtd->in_process = 0;
-			qh->channel = NULL;
-			return -ENOMEM;
-		}
-	} else {
-		chan->align_buf = 0;
-	}
+	dwc2_hc_init_xfer(hsotg, chan, qtd);
 
 	if (chan->ep_type == USB_ENDPOINT_XFER_INT ||
 	    chan->ep_type == USB_ENDPOINT_XFER_ISOC)
@@ -2971,6 +2964,9 @@
 
 	.bus_suspend = _dwc2_hcd_suspend,
 	.bus_resume = _dwc2_hcd_resume,
+
+	.map_urb_for_dma	= dwc2_map_urb_for_dma,
+	.unmap_urb_for_dma	= dwc2_unmap_urb_for_dma,
 };
 
 /*
diff --git a/drivers/usb/dwc2/hcd.h b/drivers/usb/dwc2/hcd.h
index 8f0a29c..42f2e4e 100644
--- a/drivers/usb/dwc2/hcd.h
+++ b/drivers/usb/dwc2/hcd.h
@@ -75,8 +75,6 @@
  *                      (micro)frame
  * @xfer_buf:           Pointer to current transfer buffer position
  * @xfer_dma:           DMA address of xfer_buf
- * @align_buf:          In Buffer DMA mode this will be used if xfer_buf is not
- *                      DWORD aligned
  * @xfer_len:           Total number of bytes to transfer
  * @xfer_count:         Number of bytes transferred so far
  * @start_pkt_count:    Packet count at start of transfer
@@ -133,7 +131,6 @@
 
 	u8 *xfer_buf;
 	dma_addr_t xfer_dma;
-	dma_addr_t align_buf;
 	u32 xfer_len;
 	u32 xfer_count;
 	u16 start_pkt_count;
@@ -243,10 +240,6 @@
  * @frame_usecs:        Internal variable used by the microframe scheduler
  * @start_split_frame:  (Micro)frame at which last start split was initialized
  * @ntd:                Actual number of transfer descriptors in a list
- * @dw_align_buf:       Used instead of original buffer if its physical address
- *                      is not dword-aligned
- * @dw_align_buf_size:  Size of dw_align_buf
- * @dw_align_buf_dma:   DMA address for dw_align_buf
  * @qtd_list:           List of QTDs for this QH
  * @channel:            Host channel currently processing transfers for this QH
  * @qh_list_entry:      Entry for QH in either the periodic or non-periodic
@@ -279,9 +272,6 @@
 	u16 frame_usecs[8];
 	u16 start_split_frame;
 	u16 ntd;
-	u8 *dw_align_buf;
-	int dw_align_buf_size;
-	dma_addr_t dw_align_buf_dma;
 	struct list_head qtd_list;
 	struct dwc2_host_chan *channel;
 	struct list_head qh_list_entry;
diff --git a/drivers/usb/dwc2/hcd_intr.c b/drivers/usb/dwc2/hcd_intr.c
index cadba8b..4270f6c 100644
--- a/drivers/usb/dwc2/hcd_intr.c
+++ b/drivers/usb/dwc2/hcd_intr.c
@@ -472,18 +472,6 @@
 		xfer_length = urb->length - urb->actual_length;
 	}
 
-	/* Non DWORD-aligned buffer case handling */
-	if (chan->align_buf && xfer_length) {
-		dev_vdbg(hsotg->dev, "%s(): non-aligned buffer\n", __func__);
-		dma_unmap_single(hsotg->dev, chan->qh->dw_align_buf_dma,
-				chan->qh->dw_align_buf_size,
-				chan->ep_is_in ?
-				DMA_FROM_DEVICE : DMA_TO_DEVICE);
-		if (chan->ep_is_in)
-			memcpy(urb->buf + urb->actual_length,
-					chan->qh->dw_align_buf, xfer_length);
-	}
-
 	dev_vdbg(hsotg->dev, "urb->actual_length=%d xfer_length=%d\n",
 		 urb->actual_length, xfer_length);
 	urb->actual_length += xfer_length;
@@ -573,21 +561,6 @@
 		frame_desc->status = 0;
 		frame_desc->actual_length = dwc2_get_actual_xfer_length(hsotg,
 					chan, chnum, qtd, halt_status, NULL);
-
-		/* Non DWORD-aligned buffer case handling */
-		if (chan->align_buf && frame_desc->actual_length) {
-			dev_vdbg(hsotg->dev, "%s(): non-aligned buffer\n",
-				 __func__);
-			dma_unmap_single(hsotg->dev, chan->qh->dw_align_buf_dma,
-					chan->qh->dw_align_buf_size,
-					chan->ep_is_in ?
-					DMA_FROM_DEVICE : DMA_TO_DEVICE);
-			if (chan->ep_is_in)
-				memcpy(urb->buf + frame_desc->offset +
-					qtd->isoc_split_offset,
-					chan->qh->dw_align_buf,
-					frame_desc->actual_length);
-		}
 		break;
 	case DWC2_HC_XFER_FRAME_OVERRUN:
 		urb->error_count++;
@@ -608,21 +581,6 @@
 		frame_desc->actual_length = dwc2_get_actual_xfer_length(hsotg,
 					chan, chnum, qtd, halt_status, NULL);
 
-		/* Non DWORD-aligned buffer case handling */
-		if (chan->align_buf && frame_desc->actual_length) {
-			dev_vdbg(hsotg->dev, "%s(): non-aligned buffer\n",
-				 __func__);
-			dma_unmap_single(hsotg->dev, chan->qh->dw_align_buf_dma,
-					chan->qh->dw_align_buf_size,
-					chan->ep_is_in ?
-					DMA_FROM_DEVICE : DMA_TO_DEVICE);
-			if (chan->ep_is_in)
-				memcpy(urb->buf + frame_desc->offset +
-					qtd->isoc_split_offset,
-					chan->qh->dw_align_buf,
-					frame_desc->actual_length);
-		}
-
 		/* Skip whole frame */
 		if (chan->qh->do_split &&
 		    chan->ep_type == USB_ENDPOINT_XFER_ISOC && chan->ep_is_in &&
@@ -688,8 +646,6 @@
 	}
 
 no_qtd:
-	if (qh->channel)
-		qh->channel->align_buf = 0;
 	qh->channel = NULL;
 	dwc2_hcd_qh_deactivate(hsotg, qh, continue_split);
 }
@@ -954,14 +910,6 @@
 
 	frame_desc->actual_length += len;
 
-	if (chan->align_buf) {
-		dev_vdbg(hsotg->dev, "%s(): non-aligned buffer\n", __func__);
-		dma_unmap_single(hsotg->dev, chan->qh->dw_align_buf_dma,
-				chan->qh->dw_align_buf_size, DMA_FROM_DEVICE);
-		memcpy(qtd->urb->buf + frame_desc->offset +
-		       qtd->isoc_split_offset, chan->qh->dw_align_buf, len);
-	}
-
 	qtd->isoc_split_offset += len;
 
 	if (frame_desc->actual_length >= frame_desc->length) {
@@ -1184,19 +1132,6 @@
 		xfer_length = urb->length - urb->actual_length;
 	}
 
-	/* Non DWORD-aligned buffer case handling */
-	if (chan->align_buf && xfer_length && chan->ep_is_in) {
-		dev_vdbg(hsotg->dev, "%s(): non-aligned buffer\n", __func__);
-		dma_unmap_single(hsotg->dev, chan->qh->dw_align_buf_dma,
-				chan->qh->dw_align_buf_size,
-				chan->ep_is_in ?
-				DMA_FROM_DEVICE : DMA_TO_DEVICE);
-		if (chan->ep_is_in)
-			memcpy(urb->buf + urb->actual_length,
-					chan->qh->dw_align_buf,
-					xfer_length);
-	}
-
 	urb->actual_length += xfer_length;
 
 	hctsiz = dwc2_readl(hsotg->regs + HCTSIZ(chnum));
diff --git a/drivers/usb/dwc2/hcd_queue.c b/drivers/usb/dwc2/hcd_queue.c
index 27d402f..e0933a9 100644
--- a/drivers/usb/dwc2/hcd_queue.c
+++ b/drivers/usb/dwc2/hcd_queue.c
@@ -232,13 +232,8 @@
  */
 void dwc2_hcd_qh_free(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh)
 {
-	if (qh->desc_list) {
+	if (qh->desc_list)
 		dwc2_hcd_qh_free_ddma(hsotg, qh);
-	} else {
-		/* kfree(NULL) is safe */
-		kfree(qh->dw_align_buf);
-		qh->dw_align_buf_dma = (dma_addr_t)0;
-	}
 	kfree(qh);
 }