spi: add spi_tegra driver

v2 changes:
  from Thierry Reding:
    * add "select TEGRA_SYSTEM_DMA" to Kconfig
  from Grant Likely:
    * add oneline description to header
    * inline references to DRIVER_NAME
    * inline references to BUSY_TIMEOUT
    * open coded bytes_per_word()
    * spi_readl/writel -> spi_tegra_readl/writel
    * move transfer validation to spi_tegra_transfer
    * don't request_mem_region iomem as platform bus does that for us
    * __exit -> __devexit

v3 changes:
  from Russell King:
    * put request_mem_region back int
  from Grant Likely:
    * remove #undef DEBUG
    * add SLINK_ to register bit defines
    * remove unused bytes_per_word
    * make spi_tegra_readl/writel static linine
    * various refactoring for clarity
    * mark err if BSY bit is not cleared after 1000 retries
    * move spinlock to protect setting of RDY bit
    * subsys_initcall -> module_init

v3 changes:
  from Grant Likely:
    * update spi_tegra to use PTR_ERRless dma API

v4 changes:
  from Grant Likely:
    * remove empty spi_tegra_cleanup fucntion
    * allow device ids of -1

Signed-off-by: Erik Gilling <konkers@android.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Cc: Thierry Reding <thierry.reding@avionic-design.de>
Cc: Russell King <linux@arm.linux.org.uk>

spi: tegra: cleanups from upstream review

Change-Id: Icecf7e64efcb39de072a15234ba1faa4bad40d25
Signed-off-by: Erik Gilling <konkers@android.com>
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 4b9eec6..78f9fd0 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -329,6 +329,13 @@
 	help
 	  SPI driver for Freescale STMP37xx/378x SoC SSP interface
 
+config SPI_TEGRA
+	tristate "Nvidia Tegra SPI controller"
+	depends on ARCH_TEGRA
+	select TEGRA_SYSTEM_DMA
+	help
+	  SPI driver for NVidia Tegra SoCs
+
 config SPI_TOPCLIFF_PCH
 	tristate "Topcliff PCH SPI Controller"
 	depends on PCI
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 557aaad..8bc1a5a 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -39,6 +39,7 @@
 obj-$(CONFIG_SPI_S3C24XX_GPIO)		+= spi_s3c24xx_gpio.o
 obj-$(CONFIG_SPI_S3C24XX)		+= spi_s3c24xx_hw.o
 obj-$(CONFIG_SPI_S3C64XX)		+= spi_s3c64xx.o
+obj-$(CONFIG_SPI_TEGRA)			+= spi_tegra.o
 obj-$(CONFIG_SPI_TOPCLIFF_PCH)		+= spi_topcliff_pch.o
 obj-$(CONFIG_SPI_TXX9)			+= spi_txx9.o
 obj-$(CONFIG_SPI_XILINX)		+= xilinx_spi.o
diff --git a/drivers/spi/spi_tegra.c b/drivers/spi/spi_tegra.c
new file mode 100644
index 0000000..0385fde
--- /dev/null
+++ b/drivers/spi/spi_tegra.c
@@ -0,0 +1,618 @@
+/*
+ * Driver for Nvidia TEGRA spi controller.
+ *
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * Author:
+ *     Erik Gilling <konkers@android.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+
+#include <linux/spi/spi.h>
+
+#include <mach/dma.h>
+
+#define SLINK_COMMAND		0x000
+#define   SLINK_BIT_LENGTH(x)		(((x) & 0x1f) << 0)
+#define   SLINK_WORD_SIZE(x)		(((x) & 0x1f) << 5)
+#define   SLINK_BOTH_EN			(1 << 10)
+#define   SLINK_CS_SW			(1 << 11)
+#define   SLINK_CS_VALUE		(1 << 12)
+#define   SLINK_CS_POLARITY		(1 << 13)
+#define   SLINK_IDLE_SDA_DRIVE_LOW	(0 << 16)
+#define   SLINK_IDLE_SDA_DRIVE_HIGH	(1 << 16)
+#define   SLINK_IDLE_SDA_PULL_LOW	(2 << 16)
+#define   SLINK_IDLE_SDA_PULL_HIGH	(3 << 16)
+#define   SLINK_IDLE_SDA_MASK		(3 << 16)
+#define   SLINK_CS_POLARITY1		(1 << 20)
+#define   SLINK_CK_SDA			(1 << 21)
+#define   SLINK_CS_POLARITY2		(1 << 22)
+#define   SLINK_CS_POLARITY3		(1 << 23)
+#define   SLINK_IDLE_SCLK_DRIVE_LOW	(0 << 24)
+#define   SLINK_IDLE_SCLK_DRIVE_HIGH	(1 << 24)
+#define   SLINK_IDLE_SCLK_PULL_LOW	(2 << 24)
+#define   SLINK_IDLE_SCLK_PULL_HIGH	(3 << 24)
+#define   SLINK_IDLE_SCLK_MASK		(3 << 24)
+#define   SLINK_M_S			(1 << 28)
+#define   SLINK_WAIT			(1 << 29)
+#define   SLINK_GO			(1 << 30)
+#define   SLINK_ENB			(1 << 31)
+
+#define SLINK_COMMAND2		0x004
+#define   SLINK_LSBFE			(1 << 0)
+#define   SLINK_SSOE			(1 << 1)
+#define   SLINK_SPIE			(1 << 4)
+#define   SLINK_BIDIROE			(1 << 6)
+#define   SLINK_MODFEN			(1 << 7)
+#define   SLINK_INT_SIZE(x)		(((x) & 0x1f) << 8)
+#define   SLINK_CS_ACTIVE_BETWEEN	(1 << 17)
+#define   SLINK_SS_EN_CS(x)		(((x) & 0x3) << 18)
+#define   SLINK_SS_SETUP(x)		(((x) & 0x3) << 20)
+#define   SLINK_FIFO_REFILLS_0		(0 << 22)
+#define   SLINK_FIFO_REFILLS_1		(1 << 22)
+#define   SLINK_FIFO_REFILLS_2		(2 << 22)
+#define   SLINK_FIFO_REFILLS_3		(3 << 22)
+#define   SLINK_FIFO_REFILLS_MASK	(3 << 22)
+#define   SLINK_WAIT_PACK_INT(x)	(((x) & 0x7) << 26)
+#define   SLINK_SPC0			(1 << 29)
+#define   SLINK_TXEN			(1 << 30)
+#define   SLINK_RXEN			(1 << 31)
+
+#define SLINK_STATUS		0x008
+#define   SLINK_COUNT(val)		(((val) >> 0) & 0x1f)
+#define   SLINK_WORD(val)		(((val) >> 5) & 0x1f)
+#define   SLINK_BLK_CNT(val)		(((val) >> 0) & 0xffff)
+#define   SLINK_MODF			(1 << 16)
+#define   SLINK_RX_UNF			(1 << 18)
+#define   SLINK_TX_OVF			(1 << 19)
+#define   SLINK_TX_FULL			(1 << 20)
+#define   SLINK_TX_EMPTY		(1 << 21)
+#define   SLINK_RX_FULL			(1 << 22)
+#define   SLINK_RX_EMPTY		(1 << 23)
+#define   SLINK_TX_UNF			(1 << 24)
+#define   SLINK_RX_OVF			(1 << 25)
+#define   SLINK_TX_FLUSH		(1 << 26)
+#define   SLINK_RX_FLUSH		(1 << 27)
+#define   SLINK_SCLK			(1 << 28)
+#define   SLINK_ERR			(1 << 29)
+#define   SLINK_RDY			(1 << 30)
+#define   SLINK_BSY			(1 << 31)
+
+#define SLINK_MAS_DATA		0x010
+#define SLINK_SLAVE_DATA	0x014
+
+#define SLINK_DMA_CTL		0x018
+#define   SLINK_DMA_BLOCK_SIZE(x)	(((x) & 0xffff) << 0)
+#define   SLINK_TX_TRIG_1		(0 << 16)
+#define   SLINK_TX_TRIG_4		(1 << 16)
+#define   SLINK_TX_TRIG_8		(2 << 16)
+#define   SLINK_TX_TRIG_16		(3 << 16)
+#define   SLINK_TX_TRIG_MASK		(3 << 16)
+#define   SLINK_RX_TRIG_1		(0 << 18)
+#define   SLINK_RX_TRIG_4		(1 << 18)
+#define   SLINK_RX_TRIG_8		(2 << 18)
+#define   SLINK_RX_TRIG_16		(3 << 18)
+#define   SLINK_RX_TRIG_MASK		(3 << 18)
+#define   SLINK_PACKED			(1 << 20)
+#define   SLINK_PACK_SIZE_4		(0 << 21)
+#define   SLINK_PACK_SIZE_8		(1 << 21)
+#define   SLINK_PACK_SIZE_16		(2 << 21)
+#define   SLINK_PACK_SIZE_32		(3 << 21)
+#define   SLINK_PACK_SIZE_MASK		(3 << 21)
+#define   SLINK_IE_TXC			(1 << 26)
+#define   SLINK_IE_RXC			(1 << 27)
+#define   SLINK_DMA_EN			(1 << 31)
+
+#define SLINK_STATUS2		0x01c
+#define   SLINK_TX_FIFO_EMPTY_COUNT(val)	(((val) & 0x3f) >> 0)
+#define   SLINK_RX_FIFO_FULL_COUNT(val)		(((val) & 0x3f) >> 16)
+
+#define SLINK_TX_FIFO		0x100
+#define SLINK_RX_FIFO		0x180
+
+static const unsigned long spi_tegra_req_sels[] = {
+	TEGRA_DMA_REQ_SEL_SL2B1,
+	TEGRA_DMA_REQ_SEL_SL2B2,
+	TEGRA_DMA_REQ_SEL_SL2B3,
+	TEGRA_DMA_REQ_SEL_SL2B4,
+};
+
+#define BB_LEN			32
+
+struct spi_tegra_data {
+	struct spi_master	*master;
+	struct platform_device	*pdev;
+	spinlock_t		lock;
+
+	struct clk		*clk;
+	void __iomem		*base;
+	unsigned long		phys;
+
+	u32			cur_speed;
+
+	struct list_head	queue;
+	struct spi_transfer	*cur;
+	unsigned		cur_pos;
+	unsigned		cur_len;
+	unsigned		cur_bytes_per_word;
+
+	/* The tegra spi controller has a bug which causes the first word
+	 * in PIO transactions to be garbage.  Since packed DMA transactions
+	 * require transfers to be 4 byte aligned we need a bounce buffer
+	 * for the generic case.
+	 */
+	struct tegra_dma_req	rx_dma_req;
+	struct tegra_dma_channel *rx_dma;
+	u32			*rx_bb;
+	dma_addr_t		rx_bb_phys;
+};
+
+
+static inline unsigned long spi_tegra_readl(struct spi_tegra_data *tspi,
+					    unsigned long reg)
+{
+	return readl(tspi->base + reg);
+}
+
+static inline void spi_tegra_writel(struct spi_tegra_data *tspi,
+				    unsigned long val,
+				    unsigned long reg)
+{
+	writel(val, tspi->base + reg);
+}
+
+static void spi_tegra_go(struct spi_tegra_data *tspi)
+{
+	unsigned long val;
+
+	wmb();
+
+	val = spi_tegra_readl(tspi, SLINK_DMA_CTL);
+	val &= ~SLINK_DMA_BLOCK_SIZE(~0) & ~SLINK_DMA_EN;
+	val |= SLINK_DMA_BLOCK_SIZE(tspi->rx_dma_req.size / 4 - 1);
+	spi_tegra_writel(tspi, val, SLINK_DMA_CTL);
+
+	tegra_dma_enqueue_req(tspi->rx_dma, &tspi->rx_dma_req);
+
+	val |= SLINK_DMA_EN;
+	spi_tegra_writel(tspi, val, SLINK_DMA_CTL);
+}
+
+static unsigned spi_tegra_fill_tx_fifo(struct spi_tegra_data *tspi,
+				  struct spi_transfer *t)
+{
+	unsigned len = min(t->len - tspi->cur_pos, BB_LEN *
+			   tspi->cur_bytes_per_word);
+	u8 *tx_buf = (u8 *)t->tx_buf + tspi->cur_pos;
+	int i, j;
+	unsigned long val;
+
+	val = spi_tegra_readl(tspi, SLINK_COMMAND);
+	val &= ~SLINK_WORD_SIZE(~0);
+	val |= SLINK_WORD_SIZE(len / tspi->cur_bytes_per_word - 1);
+	spi_tegra_writel(tspi, val, SLINK_COMMAND);
+
+	for (i = 0; i < len; i += tspi->cur_bytes_per_word) {
+		val = 0;
+		for (j = 0; j < tspi->cur_bytes_per_word; j++)
+			val |= tx_buf[i + j] << j * 8;
+
+		spi_tegra_writel(tspi, val, SLINK_TX_FIFO);
+	}
+
+	tspi->rx_dma_req.size = len / tspi->cur_bytes_per_word * 4;
+
+	return len;
+}
+
+static unsigned spi_tegra_drain_rx_fifo(struct spi_tegra_data *tspi,
+				  struct spi_transfer *t)
+{
+	unsigned len = tspi->cur_len;
+	u8 *rx_buf = (u8 *)t->rx_buf + tspi->cur_pos;
+	int i, j;
+	unsigned long val;
+
+	for (i = 0; i < len; i += tspi->cur_bytes_per_word) {
+		val = tspi->rx_bb[i / tspi->cur_bytes_per_word];
+		for (j = 0; j < tspi->cur_bytes_per_word; j++)
+			rx_buf[i + j] = (val >> (j * 8)) & 0xff;
+	}
+
+	return len;
+}
+
+static void spi_tegra_start_transfer(struct spi_device *spi,
+				    struct spi_transfer *t)
+{
+	struct spi_tegra_data *tspi = spi_master_get_devdata(spi->master);
+	u32 speed;
+	u8 bits_per_word;
+	unsigned long val;
+
+	speed = t->speed_hz ? t->speed_hz : spi->max_speed_hz;
+	bits_per_word = t->bits_per_word ? t->bits_per_word  :
+		spi->bits_per_word;
+
+	tspi->cur_bytes_per_word = (bits_per_word - 1) / 8 + 1;
+
+	if (speed != tspi->cur_speed)
+		clk_set_rate(tspi->clk, speed);
+
+	if (tspi->cur_speed == 0)
+		clk_enable(tspi->clk);
+
+	tspi->cur_speed = speed;
+
+	val = spi_tegra_readl(tspi, SLINK_COMMAND2);
+	val &= ~SLINK_SS_EN_CS(~0) | SLINK_RXEN | SLINK_TXEN;
+	if (t->rx_buf)
+		val |= SLINK_RXEN;
+	if (t->tx_buf)
+		val |= SLINK_TXEN;
+	val |= SLINK_SS_EN_CS(spi->chip_select);
+	val |= SLINK_SPIE;
+	spi_tegra_writel(tspi, val, SLINK_COMMAND2);
+
+	val = spi_tegra_readl(tspi, SLINK_COMMAND);
+	val &= ~SLINK_BIT_LENGTH(~0);
+	val |= SLINK_BIT_LENGTH(bits_per_word - 1);
+
+	/* FIXME: should probably control CS manually so that we can be sure
+	 * it does not go low between transfer and to support delay_usecs
+	 * correctly.
+	 */
+	val &= ~SLINK_IDLE_SCLK_MASK & ~SLINK_CK_SDA & ~SLINK_CS_SW;
+
+	if (spi->mode & SPI_CPHA)
+		val |= SLINK_CK_SDA;
+
+	if (spi->mode & SPI_CPOL)
+		val |= SLINK_IDLE_SCLK_DRIVE_HIGH;
+	else
+		val |= SLINK_IDLE_SCLK_DRIVE_LOW;
+
+	val |= SLINK_M_S;
+
+	spi_tegra_writel(tspi, val, SLINK_COMMAND);
+
+	spi_tegra_writel(tspi, SLINK_RX_FLUSH | SLINK_TX_FLUSH, SLINK_STATUS);
+
+	tspi->cur = t;
+	tspi->cur_pos = 0;
+	tspi->cur_len = spi_tegra_fill_tx_fifo(tspi, t);
+
+	spi_tegra_go(tspi);
+}
+
+static void spi_tegra_start_message(struct spi_device *spi,
+				    struct spi_message *m)
+{
+	struct spi_transfer *t;
+
+	m->actual_length = 0;
+	m->status = 0;
+
+	t = list_first_entry(&m->transfers, struct spi_transfer, transfer_list);
+	spi_tegra_start_transfer(spi, t);
+}
+
+static void tegra_spi_rx_dma_complete(struct tegra_dma_req *req)
+{
+	struct spi_tegra_data *tspi = req->dev;
+	unsigned long flags;
+	struct spi_message *m;
+	struct spi_device *spi;
+	int timeout = 0;
+	unsigned long val;
+
+	/* the SPI controller may come back with both the BSY and RDY bits
+	 * set.  In this case we need to wait for the BSY bit to clear so
+	 * that we are sure the DMA is finished.  1000 reads was empirically
+	 * determined to be long enough.
+	 */
+	while (timeout++ < 1000) {
+		if (!(spi_tegra_readl(tspi, SLINK_STATUS) & SLINK_BSY))
+			break;
+	}
+
+	spin_lock_irqsave(&tspi->lock, flags);
+
+	if (timeout >= 1000)
+		m->status = -EIO;
+
+	val = spi_tegra_readl(tspi, SLINK_STATUS);
+	val |= SLINK_RDY;
+	spi_tegra_writel(tspi, val, SLINK_STATUS);
+
+
+	m = list_first_entry(&tspi->queue, struct spi_message, queue);
+	spi = m->state;
+
+	tspi->cur_pos += spi_tegra_drain_rx_fifo(tspi, tspi->cur);
+	m->actual_length += tspi->cur_pos;
+
+	if (tspi->cur_pos < tspi->cur->len) {
+		tspi->cur_len = spi_tegra_fill_tx_fifo(tspi, tspi->cur);
+		spi_tegra_go(tspi);
+	} else if (!list_is_last(&tspi->cur->transfer_list,
+				 &m->transfers)) {
+		tspi->cur =  list_first_entry(&tspi->cur->transfer_list,
+					      struct spi_transfer,
+					      transfer_list);
+		spi_tegra_start_transfer(spi, tspi->cur);
+	} else {
+		list_del(&m->queue);
+
+		m->complete(m->context);
+
+		if (!list_empty(&tspi->queue)) {
+			m = list_first_entry(&tspi->queue, struct spi_message,
+					     queue);
+			spi = m->state;
+			spi_tegra_start_message(spi, m);
+		} else {
+			clk_disable(tspi->clk);
+			tspi->cur_speed = 0;
+		}
+	}
+
+	spin_unlock_irqrestore(&tspi->lock, flags);
+}
+
+static int spi_tegra_setup(struct spi_device *spi)
+{
+	struct spi_tegra_data *tspi = spi_master_get_devdata(spi->master);
+	unsigned long cs_bit;
+	unsigned long val;
+	unsigned long flags;
+
+	dev_dbg(&spi->dev, "setup %d bpw, %scpol, %scpha, %dHz\n",
+		spi->bits_per_word,
+		spi->mode & SPI_CPOL ? "" : "~",
+		spi->mode & SPI_CPHA ? "" : "~",
+		spi->max_speed_hz);
+
+
+	switch (spi->chip_select) {
+	case 0:
+		cs_bit = SLINK_CS_POLARITY;
+		break;
+
+	case 1:
+		cs_bit = SLINK_CS_POLARITY1;
+		break;
+
+	case 2:
+		cs_bit = SLINK_CS_POLARITY2;
+		break;
+
+	case 4:
+		cs_bit = SLINK_CS_POLARITY3;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&tspi->lock, flags);
+
+	val = spi_tegra_readl(tspi, SLINK_COMMAND);
+	if (spi->mode & SPI_CS_HIGH)
+		val |= cs_bit;
+	else
+		val &= ~cs_bit;
+	spi_tegra_writel(tspi, val, SLINK_COMMAND);
+
+	spin_unlock_irqrestore(&tspi->lock, flags);
+
+	return 0;
+}
+
+static int spi_tegra_transfer(struct spi_device *spi, struct spi_message *m)
+{
+	struct spi_tegra_data *tspi = spi_master_get_devdata(spi->master);
+	struct spi_transfer *t;
+	unsigned long flags;
+	int was_empty;
+
+	if (list_empty(&m->transfers) || !m->complete)
+		return -EINVAL;
+
+	list_for_each_entry(t, &m->transfers, transfer_list) {
+		if (t->bits_per_word < 0 || t->bits_per_word > 32)
+			return -EINVAL;
+
+		if (t->len == 0)
+			return -EINVAL;
+
+		if (!t->rx_buf && !t->tx_buf)
+			return -EINVAL;
+	}
+
+	m->state = spi;
+
+	spin_lock_irqsave(&tspi->lock, flags);
+	was_empty = list_empty(&tspi->queue);
+	list_add_tail(&m->queue, &tspi->queue);
+
+	if (was_empty)
+		spi_tegra_start_message(spi, m);
+
+	spin_unlock_irqrestore(&tspi->lock, flags);
+
+	return 0;
+}
+
+static int __init spi_tegra_probe(struct platform_device *pdev)
+{
+	struct spi_master	*master;
+	struct spi_tegra_data	*tspi;
+	struct resource		*r;
+	int ret;
+
+	master = spi_alloc_master(&pdev->dev, sizeof *tspi);
+	if (master == NULL) {
+		dev_err(&pdev->dev, "master allocation failed\n");
+		return -ENOMEM;
+	}
+
+	/* the spi->mode bits understood by this driver: */
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+
+	master->bus_num = pdev->id;
+
+	master->setup = spi_tegra_setup;
+	master->transfer = spi_tegra_transfer;
+	master->num_chipselect = 4;
+
+	dev_set_drvdata(&pdev->dev, master);
+	tspi = spi_master_get_devdata(master);
+	tspi->master = master;
+	tspi->pdev = pdev;
+	spin_lock_init(&tspi->lock);
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (r == NULL) {
+		ret = -ENODEV;
+		goto err0;
+	}
+
+	if (!request_mem_region(r->start, (r->end - r->start) + 1,
+				dev_name(&pdev->dev))) {
+		ret = -EBUSY;
+		goto err0;
+	}
+
+	tspi->phys = r->start;
+	tspi->base = ioremap(r->start, r->end - r->start + 1);
+	if (!tspi->base) {
+		dev_err(&pdev->dev, "can't ioremap iomem\n");
+		ret = -ENOMEM;
+		goto err1;
+	}
+
+	tspi->clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR_OR_NULL(tspi->clk)) {
+		dev_err(&pdev->dev, "can not get clock\n");
+		ret = PTR_ERR(tspi->clk);
+		goto err2;
+	}
+
+	INIT_LIST_HEAD(&tspi->queue);
+
+	tspi->rx_dma = tegra_dma_allocate_channel(TEGRA_DMA_MODE_ONESHOT);
+	if (!tspi->rx_dma) {
+		dev_err(&pdev->dev, "can not allocate rx dma channel\n");
+		ret = -ENODEV;
+		goto err3;
+	}
+
+	tspi->rx_bb = dma_alloc_coherent(&pdev->dev, sizeof(u32) * BB_LEN,
+					 &tspi->rx_bb_phys, GFP_KERNEL);
+	if (!tspi->rx_bb) {
+		dev_err(&pdev->dev, "can not allocate rx bounce buffer\n");
+		ret = -ENOMEM;
+		goto err4;
+	}
+
+	tspi->rx_dma_req.complete = tegra_spi_rx_dma_complete;
+	tspi->rx_dma_req.to_memory = 1;
+	tspi->rx_dma_req.dest_addr = tspi->rx_bb_phys;
+	tspi->rx_dma_req.dest_bus_width = 32;
+	tspi->rx_dma_req.source_addr = tspi->phys + SLINK_RX_FIFO;
+	tspi->rx_dma_req.source_bus_width = 32;
+	tspi->rx_dma_req.source_wrap = 4;
+	tspi->rx_dma_req.req_sel = spi_tegra_req_sels[pdev->id];
+	tspi->rx_dma_req.dev = tspi;
+
+	ret = spi_register_master(master);
+
+	if (ret < 0)
+		goto err5;
+
+	return ret;
+
+err5:
+	dma_free_coherent(&pdev->dev, sizeof(u32) * BB_LEN,
+			  tspi->rx_bb, tspi->rx_bb_phys);
+err4:
+	tegra_dma_free_channel(tspi->rx_dma);
+err3:
+	clk_put(tspi->clk);
+err2:
+	iounmap(tspi->base);
+err1:
+	release_mem_region(r->start, (r->end - r->start) + 1);
+err0:
+	spi_master_put(master);
+	return ret;
+}
+
+static int __devexit spi_tegra_remove(struct platform_device *pdev)
+{
+	struct spi_master	*master;
+	struct spi_tegra_data	*tspi;
+	struct resource		*r;
+
+	master = dev_get_drvdata(&pdev->dev);
+	tspi = spi_master_get_devdata(master);
+
+	tegra_dma_free_channel(tspi->rx_dma);
+
+	dma_free_coherent(&pdev->dev, sizeof(u32) * BB_LEN,
+			  tspi->rx_bb, tspi->rx_bb_phys);
+
+	clk_put(tspi->clk);
+	iounmap(tspi->base);
+
+	spi_master_put(master);
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	release_mem_region(r->start, (r->end - r->start) + 1);
+
+	return 0;
+}
+
+MODULE_ALIAS("platform:spi_tegra");
+
+static struct platform_driver spi_tegra_driver = {
+	.driver = {
+		.name =		"spi_tegra",
+		.owner =	THIS_MODULE,
+	},
+	.remove =	__devexit_p(spi_tegra_remove),
+};
+
+static int __init spi_tegra_init(void)
+{
+	return platform_driver_probe(&spi_tegra_driver, spi_tegra_probe);
+}
+module_init(spi_tegra_init);
+
+static void __exit spi_tegra_exit(void)
+{
+	platform_driver_unregister(&spi_tegra_driver);
+}
+module_exit(spi_tegra_exit);
+
+MODULE_LICENSE("GPL");