serial: fsl_lpuart: add DMA support

Add dma support for lpuart. This function depend on DMA driver.
You can turn on it by write both the dmas and dma-name properties in dts node.

Signed-off-by: Yuan Yao <yao.yuan@freescale.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index 8978dc9a..c5eb897 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -13,14 +13,19 @@
 #define SUPPORT_SYSRQ
 #endif
 
-#include <linux/module.h>
+#include <linux/clk.h>
+#include <linux/console.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
 #include <linux/io.h>
 #include <linux/irq.h>
-#include <linux/clk.h>
+#include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/console.h>
+#include <linux/of_dma.h>
 #include <linux/serial_core.h>
+#include <linux/slab.h>
 #include <linux/tty_flip.h>
 
 /* All registers are 8-bit width */
@@ -112,6 +117,10 @@
 #define UARTSFIFO_TXOF		0x02
 #define UARTSFIFO_RXUF		0x01
 
+#define DMA_MAXBURST		16
+#define DMA_MAXBURST_MASK	(DMA_MAXBURST - 1)
+#define FSL_UART_RX_DMA_BUFFER_SIZE	64
+
 #define DRIVER_NAME	"fsl-lpuart"
 #define DEV_NAME	"ttyLP"
 #define UART_NR		6
@@ -121,6 +130,24 @@
 	struct clk		*clk;
 	unsigned int		txfifo_size;
 	unsigned int		rxfifo_size;
+
+	bool			lpuart_dma_use;
+	struct dma_chan		*dma_tx_chan;
+	struct dma_chan		*dma_rx_chan;
+	struct dma_async_tx_descriptor  *dma_tx_desc;
+	struct dma_async_tx_descriptor  *dma_rx_desc;
+	dma_addr_t		dma_tx_buf_bus;
+	dma_addr_t		dma_rx_buf_bus;
+	dma_cookie_t		dma_tx_cookie;
+	dma_cookie_t		dma_rx_cookie;
+	unsigned char		*dma_tx_buf_virt;
+	unsigned char		*dma_rx_buf_virt;
+	unsigned int		dma_tx_bytes;
+	unsigned int		dma_rx_bytes;
+	int			dma_tx_in_progress;
+	int			dma_rx_in_progress;
+	unsigned int		dma_rx_timeout;
+	struct timer_list	lpuart_timer;
 };
 
 static struct of_device_id lpuart_dt_ids[] = {
@@ -131,6 +158,10 @@
 };
 MODULE_DEVICE_TABLE(of, lpuart_dt_ids);
 
+/* Forward declare this for the dma callbacks*/
+static void lpuart_dma_tx_complete(void *arg);
+static void lpuart_dma_rx_complete(void *arg);
+
 static void lpuart_stop_tx(struct uart_port *port)
 {
 	unsigned char temp;
@@ -152,6 +183,210 @@
 {
 }
 
+static void lpuart_copy_rx_to_tty(struct lpuart_port *sport,
+		struct tty_port *tty, int count)
+{
+	int copied;
+
+	sport->port.icount.rx += count;
+
+	if (!tty) {
+		dev_err(sport->port.dev, "No tty port\n");
+		return;
+	}
+
+	dma_sync_single_for_cpu(sport->port.dev, sport->dma_rx_buf_bus,
+			FSL_UART_RX_DMA_BUFFER_SIZE, DMA_FROM_DEVICE);
+	copied = tty_insert_flip_string(tty,
+			((unsigned char *)(sport->dma_rx_buf_virt)), count);
+
+	if (copied != count) {
+		WARN_ON(1);
+		dev_err(sport->port.dev, "RxData copy to tty layer failed\n");
+	}
+
+	dma_sync_single_for_device(sport->port.dev, sport->dma_rx_buf_bus,
+			FSL_UART_RX_DMA_BUFFER_SIZE, DMA_TO_DEVICE);
+}
+
+static void lpuart_pio_tx(struct lpuart_port *sport)
+{
+	struct circ_buf *xmit = &sport->port.state->xmit;
+	unsigned long flags;
+
+	spin_lock_irqsave(&sport->port.lock, flags);
+
+	while (!uart_circ_empty(xmit) &&
+		readb(sport->port.membase + UARTTCFIFO) < sport->txfifo_size) {
+		writeb(xmit->buf[xmit->tail], sport->port.membase + UARTDR);
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+		sport->port.icount.tx++;
+	}
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(&sport->port);
+
+	if (uart_circ_empty(xmit))
+		writeb(readb(sport->port.membase + UARTCR5) | UARTCR5_TDMAS,
+			sport->port.membase + UARTCR5);
+
+	spin_unlock_irqrestore(&sport->port.lock, flags);
+}
+
+static int lpuart_dma_tx(struct lpuart_port *sport, unsigned long count)
+{
+	struct circ_buf *xmit = &sport->port.state->xmit;
+	dma_addr_t tx_bus_addr;
+
+	dma_sync_single_for_device(sport->port.dev, sport->dma_tx_buf_bus,
+				UART_XMIT_SIZE, DMA_TO_DEVICE);
+	sport->dma_tx_bytes = count & ~(DMA_MAXBURST_MASK);
+	tx_bus_addr = sport->dma_tx_buf_bus + xmit->tail;
+	sport->dma_tx_desc = dmaengine_prep_slave_single(sport->dma_tx_chan,
+					tx_bus_addr, sport->dma_tx_bytes,
+					DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT);
+
+	if (!sport->dma_tx_desc) {
+		dev_err(sport->port.dev, "Not able to get desc for tx\n");
+		return -EIO;
+	}
+
+	sport->dma_tx_desc->callback = lpuart_dma_tx_complete;
+	sport->dma_tx_desc->callback_param = sport;
+	sport->dma_tx_in_progress = 1;
+	sport->dma_tx_cookie = dmaengine_submit(sport->dma_tx_desc);
+	dma_async_issue_pending(sport->dma_tx_chan);
+
+	return 0;
+}
+
+static void lpuart_prepare_tx(struct lpuart_port *sport)
+{
+	struct circ_buf *xmit = &sport->port.state->xmit;
+	unsigned long count =  CIRC_CNT_TO_END(xmit->head,
+					xmit->tail, UART_XMIT_SIZE);
+
+	if (!count)
+		return;
+
+	if (count < DMA_MAXBURST)
+		writeb(readb(sport->port.membase + UARTCR5) & ~UARTCR5_TDMAS,
+				sport->port.membase + UARTCR5);
+	else {
+		writeb(readb(sport->port.membase + UARTCR5) | UARTCR5_TDMAS,
+				sport->port.membase + UARTCR5);
+		lpuart_dma_tx(sport, count);
+	}
+}
+
+static void lpuart_dma_tx_complete(void *arg)
+{
+	struct lpuart_port *sport = arg;
+	struct circ_buf *xmit = &sport->port.state->xmit;
+	unsigned long flags;
+
+	async_tx_ack(sport->dma_tx_desc);
+
+	spin_lock_irqsave(&sport->port.lock, flags);
+
+	xmit->tail = (xmit->tail + sport->dma_tx_bytes) & (UART_XMIT_SIZE - 1);
+	sport->dma_tx_in_progress = 0;
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(&sport->port);
+
+	lpuart_prepare_tx(sport);
+
+	spin_unlock_irqrestore(&sport->port.lock, flags);
+}
+
+static int lpuart_dma_rx(struct lpuart_port *sport)
+{
+	dma_sync_single_for_device(sport->port.dev, sport->dma_rx_buf_bus,
+			FSL_UART_RX_DMA_BUFFER_SIZE, DMA_TO_DEVICE);
+	sport->dma_rx_desc = dmaengine_prep_slave_single(sport->dma_rx_chan,
+			sport->dma_rx_buf_bus, FSL_UART_RX_DMA_BUFFER_SIZE,
+			DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT);
+
+	if (!sport->dma_rx_desc) {
+		dev_err(sport->port.dev, "Not able to get desc for rx\n");
+		return -EIO;
+	}
+
+	sport->dma_rx_desc->callback = lpuart_dma_rx_complete;
+	sport->dma_rx_desc->callback_param = sport;
+	sport->dma_rx_in_progress = 1;
+	sport->dma_rx_cookie = dmaengine_submit(sport->dma_rx_desc);
+	dma_async_issue_pending(sport->dma_rx_chan);
+
+	return 0;
+}
+
+static void lpuart_dma_rx_complete(void *arg)
+{
+	struct lpuart_port *sport = arg;
+	struct tty_port *port = &sport->port.state->port;
+	unsigned long flags;
+
+	async_tx_ack(sport->dma_rx_desc);
+
+	spin_lock_irqsave(&sport->port.lock, flags);
+
+	sport->dma_rx_in_progress = 0;
+	lpuart_copy_rx_to_tty(sport, port, FSL_UART_RX_DMA_BUFFER_SIZE);
+	tty_flip_buffer_push(port);
+	lpuart_dma_rx(sport);
+
+	spin_unlock_irqrestore(&sport->port.lock, flags);
+}
+
+static void lpuart_timer_func(unsigned long data)
+{
+	struct lpuart_port *sport = (struct lpuart_port *)data;
+	struct tty_port *port = &sport->port.state->port;
+	struct dma_tx_state state;
+	unsigned long flags;
+	unsigned char temp;
+	int count;
+
+	del_timer(&sport->lpuart_timer);
+	dmaengine_pause(sport->dma_rx_chan);
+	dmaengine_tx_status(sport->dma_rx_chan, sport->dma_rx_cookie, &state);
+	dmaengine_terminate_all(sport->dma_rx_chan);
+	count = FSL_UART_RX_DMA_BUFFER_SIZE - state.residue;
+	async_tx_ack(sport->dma_rx_desc);
+
+	spin_lock_irqsave(&sport->port.lock, flags);
+
+	sport->dma_rx_in_progress = 0;
+	lpuart_copy_rx_to_tty(sport, port, count);
+	tty_flip_buffer_push(port);
+	temp = readb(sport->port.membase + UARTCR5);
+	writeb(temp & ~UARTCR5_RDMAS, sport->port.membase + UARTCR5);
+
+	spin_unlock_irqrestore(&sport->port.lock, flags);
+}
+
+static inline void lpuart_prepare_rx(struct lpuart_port *sport)
+{
+	unsigned long flags;
+	unsigned char temp;
+
+	spin_lock_irqsave(&sport->port.lock, flags);
+
+	init_timer(&sport->lpuart_timer);
+	sport->lpuart_timer.function = lpuart_timer_func;
+	sport->lpuart_timer.data = (unsigned long)sport;
+	sport->lpuart_timer.expires = jiffies + sport->dma_rx_timeout;
+	add_timer(&sport->lpuart_timer);
+
+	lpuart_dma_rx(sport);
+	temp = readb(sport->port.membase + UARTCR5);
+	writeb(temp | UARTCR5_RDMAS, sport->port.membase + UARTCR5);
+
+	spin_unlock_irqrestore(&sport->port.lock, flags);
+}
+
 static inline void lpuart_transmit_buffer(struct lpuart_port *sport)
 {
 	struct circ_buf *xmit = &sport->port.state->xmit;
@@ -172,14 +407,21 @@
 
 static void lpuart_start_tx(struct uart_port *port)
 {
-	struct lpuart_port *sport = container_of(port, struct lpuart_port, port);
+	struct lpuart_port *sport = container_of(port,
+			struct lpuart_port, port);
+	struct circ_buf *xmit = &sport->port.state->xmit;
 	unsigned char temp;
 
 	temp = readb(port->membase + UARTCR2);
 	writeb(temp | UARTCR2_TIE, port->membase + UARTCR2);
 
-	if (readb(port->membase + UARTSR1) & UARTSR1_TDRE)
-		lpuart_transmit_buffer(sport);
+	if (sport->lpuart_dma_use) {
+		if (!uart_circ_empty(xmit) && !sport->dma_tx_in_progress)
+			lpuart_prepare_tx(sport);
+	} else {
+		if (readb(port->membase + UARTSR1) & UARTSR1_TDRE)
+			lpuart_transmit_buffer(sport);
+	}
 }
 
 static irqreturn_t lpuart_txint(int irq, void *dev_id)
@@ -279,12 +521,19 @@
 
 	sts = readb(sport->port.membase + UARTSR1);
 
-	if (sts & UARTSR1_RDRF)
-		lpuart_rxint(irq, dev_id);
-
+	if (sts & UARTSR1_RDRF) {
+		if (sport->lpuart_dma_use)
+			lpuart_prepare_rx(sport);
+		else
+			lpuart_rxint(irq, dev_id);
+	}
 	if (sts & UARTSR1_TDRE &&
-		!(readb(sport->port.membase + UARTCR5) & UARTCR5_TDMAS))
-		lpuart_txint(irq, dev_id);
+		!(readb(sport->port.membase + UARTCR5) & UARTCR5_TDMAS)) {
+		if (sport->lpuart_dma_use)
+			lpuart_pio_tx(sport);
+		else
+			lpuart_txint(irq, dev_id);
+	}
 
 	return IRQ_HANDLED;
 }
@@ -366,13 +615,156 @@
 	writeb(UARTCFIFO_TXFLUSH | UARTCFIFO_RXFLUSH,
 			sport->port.membase + UARTCFIFO);
 
-	writeb(2, sport->port.membase + UARTTWFIFO);
+	writeb(0, sport->port.membase + UARTTWFIFO);
 	writeb(1, sport->port.membase + UARTRWFIFO);
 
 	/* Restore cr2 */
 	writeb(cr2_saved, sport->port.membase + UARTCR2);
 }
 
+static int lpuart_dma_tx_request(struct uart_port *port)
+{
+	struct lpuart_port *sport = container_of(port,
+					struct lpuart_port, port);
+	struct dma_chan *tx_chan;
+	struct dma_slave_config dma_tx_sconfig;
+	dma_addr_t dma_bus;
+	unsigned char *dma_buf;
+	int ret;
+
+	tx_chan  = dma_request_slave_channel(sport->port.dev, "tx");
+
+	if (!tx_chan) {
+		dev_err(sport->port.dev, "Dma tx channel request failed!\n");
+		return -ENODEV;
+	}
+
+	dma_bus = dma_map_single(tx_chan->device->dev,
+				sport->port.state->xmit.buf,
+				UART_XMIT_SIZE, DMA_TO_DEVICE);
+
+	if (dma_mapping_error(tx_chan->device->dev, dma_bus)) {
+		dev_err(sport->port.dev, "dma_map_single tx failed\n");
+		dma_release_channel(tx_chan);
+		return -ENOMEM;
+	}
+
+	dma_buf = sport->port.state->xmit.buf;
+	dma_tx_sconfig.dst_addr = sport->port.mapbase + UARTDR;
+	dma_tx_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+	dma_tx_sconfig.dst_maxburst = DMA_MAXBURST;
+	dma_tx_sconfig.direction = DMA_MEM_TO_DEV;
+	ret = dmaengine_slave_config(tx_chan, &dma_tx_sconfig);
+
+	if (ret < 0) {
+		dev_err(sport->port.dev,
+				"Dma slave config failed, err = %d\n", ret);
+		dma_release_channel(tx_chan);
+		return ret;
+	}
+
+	sport->dma_tx_chan = tx_chan;
+	sport->dma_tx_buf_virt = dma_buf;
+	sport->dma_tx_buf_bus = dma_bus;
+	sport->dma_tx_in_progress = 0;
+
+	return 0;
+}
+
+static int lpuart_dma_rx_request(struct uart_port *port)
+{
+	struct lpuart_port *sport = container_of(port,
+					struct lpuart_port, port);
+	struct dma_chan *rx_chan;
+	struct dma_slave_config dma_rx_sconfig;
+	dma_addr_t dma_bus;
+	unsigned char *dma_buf;
+	int ret;
+
+	rx_chan  = dma_request_slave_channel(sport->port.dev, "rx");
+
+	if (!rx_chan) {
+		dev_err(sport->port.dev, "Dma rx channel request failed!\n");
+		return -ENODEV;
+	}
+
+	dma_buf = devm_kzalloc(sport->port.dev,
+				FSL_UART_RX_DMA_BUFFER_SIZE, GFP_KERNEL);
+
+	if (!dma_buf) {
+		dev_err(sport->port.dev, "Dma rx alloc failed\n");
+		dma_release_channel(rx_chan);
+		return -ENOMEM;
+	}
+
+	dma_bus = dma_map_single(rx_chan->device->dev, dma_buf,
+				FSL_UART_RX_DMA_BUFFER_SIZE, DMA_FROM_DEVICE);
+
+	if (dma_mapping_error(rx_chan->device->dev, dma_bus)) {
+		dev_err(sport->port.dev, "dma_map_single rx failed\n");
+		dma_release_channel(rx_chan);
+		return -ENOMEM;
+	}
+
+	dma_rx_sconfig.src_addr = sport->port.mapbase + UARTDR;
+	dma_rx_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+	dma_rx_sconfig.src_maxburst = 1;
+	dma_rx_sconfig.direction = DMA_DEV_TO_MEM;
+	ret = dmaengine_slave_config(rx_chan, &dma_rx_sconfig);
+
+	if (ret < 0) {
+		dev_err(sport->port.dev,
+				"Dma slave config failed, err = %d\n", ret);
+		dma_release_channel(rx_chan);
+		return ret;
+	}
+
+	sport->dma_rx_chan = rx_chan;
+	sport->dma_rx_buf_virt = dma_buf;
+	sport->dma_rx_buf_bus = dma_bus;
+	sport->dma_rx_in_progress = 0;
+
+	sport->dma_rx_timeout = (sport->port.timeout - HZ / 50) *
+				FSL_UART_RX_DMA_BUFFER_SIZE * 3 /
+				sport->rxfifo_size / 2;
+
+	if (sport->dma_rx_timeout < msecs_to_jiffies(20))
+		sport->dma_rx_timeout = msecs_to_jiffies(20);
+
+	return 0;
+}
+
+static void lpuart_dma_tx_free(struct uart_port *port)
+{
+	struct lpuart_port *sport = container_of(port,
+					struct lpuart_port, port);
+	struct dma_chan *dma_chan;
+
+	dma_unmap_single(sport->port.dev, sport->dma_tx_buf_bus,
+			UART_XMIT_SIZE, DMA_TO_DEVICE);
+	dma_chan = sport->dma_tx_chan;
+	sport->dma_tx_chan = NULL;
+	sport->dma_tx_buf_bus = 0;
+	sport->dma_tx_buf_virt = NULL;
+	dma_release_channel(dma_chan);
+}
+
+static void lpuart_dma_rx_free(struct uart_port *port)
+{
+	struct lpuart_port *sport = container_of(port,
+					struct lpuart_port, port);
+	struct dma_chan *dma_chan;
+
+	dma_unmap_single(sport->port.dev, sport->dma_rx_buf_bus,
+			FSL_UART_RX_DMA_BUFFER_SIZE, DMA_FROM_DEVICE);
+
+	dma_chan = sport->dma_rx_chan;
+	sport->dma_rx_chan = NULL;
+	sport->dma_rx_buf_bus = 0;
+	sport->dma_rx_buf_virt = NULL;
+	dma_release_channel(dma_chan);
+}
+
 static int lpuart_startup(struct uart_port *port)
 {
 	struct lpuart_port *sport = container_of(port, struct lpuart_port, port);
@@ -380,6 +772,15 @@
 	unsigned long flags;
 	unsigned char temp;
 
+	/*whether use dma support by dma request results*/
+	if (lpuart_dma_tx_request(port) || lpuart_dma_rx_request(port)) {
+		sport->lpuart_dma_use = false;
+	} else {
+		sport->lpuart_dma_use = true;
+		temp = readb(port->membase + UARTCR5);
+		writeb(temp | UARTCR5_TDMAS, port->membase + UARTCR5);
+	}
+
 	ret = devm_request_irq(port->dev, port->irq, lpuart_int, 0,
 				DRIVER_NAME, sport);
 	if (ret)
@@ -414,6 +815,11 @@
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	devm_free_irq(port->dev, port->irq, sport);
+
+	if (sport->lpuart_dma_use) {
+		lpuart_dma_tx_free(port);
+		lpuart_dma_rx_free(port);
+	}
 }
 
 static void