Merge tag 'spi-v4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi

Pull spi updates from Mark Brown:
 "Only one framework update this time around, a change from Lars-Peter
  to move full to pm_ops and remove the legacy bus PM ops.  Otherwise
  it's all driver updates:

   - make the spidev driver complain loudly if registered as spidev with
     DT rather than with a compatible string, hopefully helping people
     avoid making that mistake.

   - error handling and robustness fixes for the Designware and Intel
     MID drivers from Andy Shevchenko.

   - substantial performance improvements for the Raspberry Pi driver
     from Martin Sperl.

   - several new features for spidev_test from Adrian Remonda and Ian
     Abbott"

* tag 'spi-v4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi: (75 commits)
  spi: bcm2835: enabling polling mode for transfers shorter than 30us
  spi: bcm2835: transform native-cs to gpio-cs on first spi_setup
  spi: img-spfi: Control CS lines with GPIO
  spi: img-spfi: Reset controller after each message
  spi: img-spfi: Implement a handle_err() callback
  spi: img-spfi: Setup TRANSACTION register before CONTROL register
  spi: Make master->handle_err() callback optional to avoid crashes
  spi: img-spfi: Limit bit clock to 1/4th of input clock
  spi: img-spfi: Implement a prepare_message() callback
  spi: fsl-dspi: Add ~50ns delay between cs and sck
  spi: fsl-dspi: Add cs-sck delays
  spi: fsl-dspi: Fix clock rate scale values
  spi: signedness bug in qspi_trigger_transfer_out_int()
  spi: imx: read back the RX/TX watermark levels earlier
  spi: spi-bfin5xx: Initialize cr_width in bfin_spi_pump_transfers()
  spi: bitbang: only toggle bitchanges
  spi: pxa2xx: missing break in pxa2xx_ssp_get_clk_div()
  spi: fsl-dspi: Fix clock rate scale values
  spi: Using Trigger number to transmit/receive data
  spi: bcm2835: fill FIFO before enabling interrupts to reduce interrupts/message
  ...
diff --git a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
index aad527b..523341a 100644
--- a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
+++ b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
@@ -2,11 +2,21 @@
   (CSPI/eCSPI) for i.MX
 
 Required properties:
-- compatible : Should be "fsl,<soc>-cspi" or "fsl,<soc>-ecspi"
+- compatible :
+  - "fsl,imx1-cspi" for SPI compatible with the one integrated on i.MX1
+  - "fsl,imx21-cspi" for SPI compatible with the one integrated on i.MX21
+  - "fsl,imx27-cspi" for SPI compatible with the one integrated on i.MX27
+  - "fsl,imx31-cspi" for SPI compatible with the one integrated on i.MX31
+  - "fsl,imx35-cspi" for SPI compatible with the one integrated on i.MX35
+  - "fsl,imx51-ecspi" for SPI compatible with the one integrated on i.MX51
 - reg : Offset and length of the register set for the device
 - interrupts : Should contain CSPI/eCSPI interrupt
 - fsl,spi-num-chipselects : Contains the number of the chipselect
 - cs-gpios : Specifies the gpio pins to be used for chipselects.
+- clocks : Clock specifiers for both ipg and per clocks.
+- clock-names : Clock names should include both "ipg" and "per"
+See the clock consumer binding,
+	Documentation/devicetree/bindings/clock/clock-bindings.txt
 - dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
 		Documentation/devicetree/bindings/dma/dma.txt
 - dma-names: DMA request names should include "tx" and "rx" if present.
diff --git a/Documentation/devicetree/bindings/spi/qcom,spi-qup.txt b/Documentation/devicetree/bindings/spi/qcom,spi-qup.txt
index e2c88df..5c09077 100644
--- a/Documentation/devicetree/bindings/spi/qcom,spi-qup.txt
+++ b/Documentation/devicetree/bindings/spi/qcom,spi-qup.txt
@@ -33,6 +33,11 @@
 		nodes.  If unspecified, a single SPI device without a chip
 		select can be used.
 
+- dmas:         Two DMA channel specifiers following the convention outlined
+                in bindings/dma/dma.txt
+- dma-names:    Names for the dma channels, if present. There must be at
+                least one channel named "tx" for transmit and named "rx" for
+                receive.
 
 SPI slave nodes must be children of the SPI master node and can contain
 properties described in Documentation/devicetree/bindings/spi/spi-bus.txt
@@ -51,6 +56,9 @@
 		clocks = <&gcc GCC_BLSP2_QUP2_SPI_APPS_CLK>, <&gcc GCC_BLSP2_AHB_CLK>;
 		clock-names = "core", "iface";
 
+		dmas = <&blsp1_bam 13>, <&blsp1_bam 12>;
+		dma-names = "rx", "tx";
+
 		pinctrl-names = "default";
 		pinctrl-0 = <&spi8_default>;
 
diff --git a/Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt b/Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt
index cbbe16e..70af78a 100644
--- a/Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt
+++ b/Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt
@@ -16,6 +16,12 @@
   in big endian mode, otherwise in native mode(same with CPU), for more
   detail please see: Documentation/devicetree/bindings/regmap/regmap.txt.
 
+Optional SPI slave node properties:
+- fsl,spi-cs-sck-delay: a delay in nanoseconds between activating chip
+  select and the start of clock signal, at the start of a transfer.
+- fsl,spi-sck-cs-delay: a delay in nanoseconds between stopping the clock
+  signal and deactivating chip select, at the end of a transfer.
+
 Example:
 
 dspi0@4002c000 {
@@ -43,6 +49,8 @@
 		reg = <0>;
 		linux,modalias = "m25p80";
 		modal = "at26df081a";
+		fsl,spi-cs-sck-delay = <100>;
+		fsl,spi-sck-cs-delay = <50>;
 	};
 };
 
diff --git a/Documentation/devicetree/bindings/spi/spi-img-spfi.txt b/Documentation/devicetree/bindings/spi/spi-img-spfi.txt
index c7dd50f..e02fbf1 100644
--- a/Documentation/devicetree/bindings/spi/spi-img-spfi.txt
+++ b/Documentation/devicetree/bindings/spi/spi-img-spfi.txt
@@ -14,6 +14,7 @@
 - dma-names: Must include the following entries:
   - rx
   - tx
+- cs-gpios: Must specify the GPIOs used for chipselect lines.
 - #address-cells: Must be 1.
 - #size-cells: Must be 0.
 
diff --git a/Documentation/devicetree/bindings/spi/spi-rockchip.txt b/Documentation/devicetree/bindings/spi/spi-rockchip.txt
index 467dec4..0c491bd 100644
--- a/Documentation/devicetree/bindings/spi/spi-rockchip.txt
+++ b/Documentation/devicetree/bindings/spi/spi-rockchip.txt
@@ -24,6 +24,9 @@
 - dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
 		Documentation/devicetree/bindings/dma/dma.txt
 - dma-names: DMA request names should include "tx" and "rx" if present.
+- rx-sample-delay-ns: nanoseconds to delay after the SCLK edge before sampling
+		Rx data (may need to be fine tuned for high capacitance lines).
+		No delay (0) by default.
 
 
 Example:
@@ -33,6 +36,7 @@
 		reg = <0xff110000 0x1000>;
 		dmas = <&pdma1 11>, <&pdma1 12>;
 		dma-names = "tx", "rx";
+		rx-sample-delay-ns = <10>;
 		#address-cells = <1>;
 		#size-cells = <0>;
 		interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/Documentation/spi/spi-summary b/Documentation/spi/spi-summary
index d29734b..d1824b3 100644
--- a/Documentation/spi/spi-summary
+++ b/Documentation/spi/spi-summary
@@ -342,12 +342,11 @@
 		.driver = {
 			.name		= "CHIP",
 			.owner		= THIS_MODULE,
+			.pm		= &CHIP_pm_ops,
 		},
 
 		.probe		= CHIP_probe,
 		.remove		= CHIP_remove,
-		.suspend	= CHIP_suspend,
-		.resume		= CHIP_resume,
 	};
 
 The driver core will automatically attempt to bind this driver to any SPI
diff --git a/Documentation/spi/spidev_test.c b/Documentation/spi/spidev_test.c
index 3a2f9d5..94f574b 100644
--- a/Documentation/spi/spidev_test.c
+++ b/Documentation/spi/spidev_test.c
@@ -15,6 +15,7 @@
 #include <unistd.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 #include <getopt.h>
 #include <fcntl.h>
 #include <sys/ioctl.h>
@@ -34,24 +35,79 @@
 static uint8_t bits = 8;
 static uint32_t speed = 500000;
 static uint16_t delay;
+static int verbose;
 
-static void transfer(int fd)
+uint8_t default_tx[] = {
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0x40, 0x00, 0x00, 0x00, 0x00, 0x95,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xF0, 0x0D,
+};
+
+uint8_t default_rx[ARRAY_SIZE(default_tx)] = {0, };
+char *input_tx;
+
+static void hex_dump(const void *src, size_t length, size_t line_size, char *prefix)
+{
+	int i = 0;
+	const unsigned char *address = src;
+	const unsigned char *line = address;
+	unsigned char c;
+
+	printf("%s | ", prefix);
+	while (length-- > 0) {
+		printf("%02X ", *address++);
+		if (!(++i % line_size) || (length == 0 && i % line_size)) {
+			if (length == 0) {
+				while (i++ % line_size)
+					printf("__ ");
+			}
+			printf(" | ");  /* right close */
+			while (line < address) {
+				c = *line++;
+				printf("%c", (c < 33 || c == 255) ? 0x2E : c);
+			}
+			printf("\n");
+			if (length > 0)
+				printf("%s | ", prefix);
+		}
+	}
+}
+
+/*
+ *  Unescape - process hexadecimal escape character
+ *      converts shell input "\x23" -> 0x23
+ */
+int unespcape(char *_dst, char *_src, size_t len)
+{
+	int ret = 0;
+	char *src = _src;
+	char *dst = _dst;
+	unsigned int ch;
+
+	while (*src) {
+		if (*src == '\\' && *(src+1) == 'x') {
+			sscanf(src + 2, "%2x", &ch);
+			src += 4;
+			*dst++ = (unsigned char)ch;
+		} else {
+			*dst++ = *src++;
+		}
+		ret++;
+	}
+	return ret;
+}
+
+static void transfer(int fd, uint8_t const *tx, uint8_t const *rx, size_t len)
 {
 	int ret;
-	uint8_t tx[] = {
-		0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-		0x40, 0x00, 0x00, 0x00, 0x00, 0x95,
-		0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-		0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-		0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-		0xDE, 0xAD, 0xBE, 0xEF, 0xBA, 0xAD,
-		0xF0, 0x0D,
-	};
-	uint8_t rx[ARRAY_SIZE(tx)] = {0, };
+
 	struct spi_ioc_transfer tr = {
 		.tx_buf = (unsigned long)tx,
 		.rx_buf = (unsigned long)rx,
-		.len = ARRAY_SIZE(tx),
+		.len = len,
 		.delay_usecs = delay,
 		.speed_hz = speed,
 		.bits_per_word = bits,
@@ -76,12 +132,9 @@
 	if (ret < 1)
 		pabort("can't send spi message");
 
-	for (ret = 0; ret < ARRAY_SIZE(tx); ret++) {
-		if (!(ret % 6))
-			puts("");
-		printf("%.2X ", rx[ret]);
-	}
-	puts("");
+	if (verbose)
+		hex_dump(tx, len, 32, "TX");
+	hex_dump(rx, len, 32, "RX");
 }
 
 static void print_usage(const char *prog)
@@ -97,6 +150,8 @@
 	     "  -L --lsb      least significant bit first\n"
 	     "  -C --cs-high  chip select active high\n"
 	     "  -3 --3wire    SI/SO signals shared\n"
+	     "  -v --verbose  Verbose (show tx buffer)\n"
+	     "  -p            Send data (e.g. \"1234\\xde\\xad\")\n"
 	     "  -N --no-cs    no chip select\n"
 	     "  -R --ready    slave pulls low to pause\n"
 	     "  -2 --dual     dual transfer\n"
@@ -121,12 +176,13 @@
 			{ "no-cs",   0, 0, 'N' },
 			{ "ready",   0, 0, 'R' },
 			{ "dual",    0, 0, '2' },
+			{ "verbose", 0, 0, 'v' },
 			{ "quad",    0, 0, '4' },
 			{ NULL, 0, 0, 0 },
 		};
 		int c;
 
-		c = getopt_long(argc, argv, "D:s:d:b:lHOLC3NR24", lopts, NULL);
+		c = getopt_long(argc, argv, "D:s:d:b:lHOLC3NR24p:v", lopts, NULL);
 
 		if (c == -1)
 			break;
@@ -165,9 +221,15 @@
 		case 'N':
 			mode |= SPI_NO_CS;
 			break;
+		case 'v':
+			verbose = 1;
+			break;
 		case 'R':
 			mode |= SPI_READY;
 			break;
+		case 'p':
+			input_tx = optarg;
+			break;
 		case '2':
 			mode |= SPI_TX_DUAL;
 			break;
@@ -191,6 +253,9 @@
 {
 	int ret = 0;
 	int fd;
+	uint8_t *tx;
+	uint8_t *rx;
+	int size;
 
 	parse_opts(argc, argv);
 
@@ -235,7 +300,17 @@
 	printf("bits per word: %d\n", bits);
 	printf("max speed: %d Hz (%d KHz)\n", speed, speed/1000);
 
-	transfer(fd);
+	if (input_tx) {
+		size = strlen(input_tx+1);
+		tx = malloc(size);
+		rx = malloc(size);
+		size = unespcape((char *)tx, input_tx, size);
+		transfer(fd, tx, rx, size);
+		free(rx);
+		free(tx);
+	} else {
+		transfer(fd, default_tx, default_rx, sizeof(default_tx));
+	}
 
 	close(fd);
 
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index a874b6e..942ca54 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -51,19 +51,6 @@
 	  OS and tools for MIC to use with this driver are available from
 	  <http://software.intel.com/en-us/mic-developer>.
 
-config INTEL_MID_DMAC
-	tristate "Intel MID DMA support for Peripheral DMA controllers"
-	depends on PCI && X86
-	select DMA_ENGINE
-	default n
-	help
-	  Enable support for the Intel(R) MID DMA engine present
-	  in Intel MID chipsets.
-
-	  Say Y here if you have such a chipset.
-
-	  If unsure, say N.
-
 config ASYNC_TX_ENABLE_CHANNEL_SWITCH
 	bool
 
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index f915f61..539d482 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -6,7 +6,6 @@
 obj-$(CONFIG_DMA_ACPI) += acpi-dma.o
 obj-$(CONFIG_DMA_OF) += of-dma.o
 
-obj-$(CONFIG_INTEL_MID_DMAC) += intel_mid_dma.o
 obj-$(CONFIG_DMATEST) += dmatest.o
 obj-$(CONFIG_INTEL_IOATDMA) += ioat/
 obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
deleted file mode 100644
index 5aaead9..0000000
--- a/drivers/dma/intel_mid_dma.c
+++ /dev/null
@@ -1,1447 +0,0 @@
-/*
- *  intel_mid_dma.c - Intel Langwell DMA Drivers
- *
- *  Copyright (C) 2008-10 Intel Corp
- *  Author: Vinod Koul <vinod.koul@intel.com>
- *  The driver design is based on dw_dmac driver
- *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; version 2 of the License.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *
- */
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/pm_runtime.h>
-#include <linux/intel_mid_dma.h>
-#include <linux/module.h>
-
-#include "dmaengine.h"
-
-#define MAX_CHAN	4 /*max ch across controllers*/
-#include "intel_mid_dma_regs.h"
-
-#define INTEL_MID_DMAC1_ID		0x0814
-#define INTEL_MID_DMAC2_ID		0x0813
-#define INTEL_MID_GP_DMAC2_ID		0x0827
-#define INTEL_MFLD_DMAC1_ID		0x0830
-#define LNW_PERIPHRAL_MASK_BASE		0xFFAE8008
-#define LNW_PERIPHRAL_MASK_SIZE		0x10
-#define LNW_PERIPHRAL_STATUS		0x0
-#define LNW_PERIPHRAL_MASK		0x8
-
-struct intel_mid_dma_probe_info {
-	u8 max_chan;
-	u8 ch_base;
-	u16 block_size;
-	u32 pimr_mask;
-};
-
-#define INFO(_max_chan, _ch_base, _block_size, _pimr_mask) \
-	((kernel_ulong_t)&(struct intel_mid_dma_probe_info) {	\
-		.max_chan = (_max_chan),			\
-		.ch_base = (_ch_base),				\
-		.block_size = (_block_size),			\
-		.pimr_mask = (_pimr_mask),			\
-	})
-
-/*****************************************************************************
-Utility Functions*/
-/**
- * get_ch_index	-	convert status to channel
- * @status: status mask
- * @base: dma ch base value
- *
- * Modify the status mask and return the channel index needing
- * attention (or -1 if neither)
- */
-static int get_ch_index(int *status, unsigned int base)
-{
-	int i;
-	for (i = 0; i < MAX_CHAN; i++) {
-		if (*status & (1 << (i + base))) {
-			*status = *status & ~(1 << (i + base));
-			pr_debug("MDMA: index %d New status %x\n", i, *status);
-			return i;
-		}
-	}
-	return -1;
-}
-
-/**
- * get_block_ts	-	calculates dma transaction length
- * @len: dma transfer length
- * @tx_width: dma transfer src width
- * @block_size: dma controller max block size
- *
- * Based on src width calculate the DMA trsaction length in data items
- * return data items or FFFF if exceeds max length for block
- */
-static int get_block_ts(int len, int tx_width, int block_size)
-{
-	int byte_width = 0, block_ts = 0;
-
-	switch (tx_width) {
-	case DMA_SLAVE_BUSWIDTH_1_BYTE:
-		byte_width = 1;
-		break;
-	case DMA_SLAVE_BUSWIDTH_2_BYTES:
-		byte_width = 2;
-		break;
-	case DMA_SLAVE_BUSWIDTH_4_BYTES:
-	default:
-		byte_width = 4;
-		break;
-	}
-
-	block_ts = len/byte_width;
-	if (block_ts > block_size)
-		block_ts = 0xFFFF;
-	return block_ts;
-}
-
-/*****************************************************************************
-DMAC1 interrupt Functions*/
-
-/**
- * dmac1_mask_periphral_intr -	mask the periphral interrupt
- * @mid: dma device for which masking is required
- *
- * Masks the DMA periphral interrupt
- * this is valid for DMAC1 family controllers only
- * This controller should have periphral mask registers already mapped
- */
-static void dmac1_mask_periphral_intr(struct middma_device *mid)
-{
-	u32 pimr;
-
-	if (mid->pimr_mask) {
-		pimr = readl(mid->mask_reg + LNW_PERIPHRAL_MASK);
-		pimr |= mid->pimr_mask;
-		writel(pimr, mid->mask_reg + LNW_PERIPHRAL_MASK);
-	}
-	return;
-}
-
-/**
- * dmac1_unmask_periphral_intr -	unmask the periphral interrupt
- * @midc: dma channel for which masking is required
- *
- * UnMasks the DMA periphral interrupt,
- * this is valid for DMAC1 family controllers only
- * This controller should have periphral mask registers already mapped
- */
-static void dmac1_unmask_periphral_intr(struct intel_mid_dma_chan *midc)
-{
-	u32 pimr;
-	struct middma_device *mid = to_middma_device(midc->chan.device);
-
-	if (mid->pimr_mask) {
-		pimr = readl(mid->mask_reg + LNW_PERIPHRAL_MASK);
-		pimr &= ~mid->pimr_mask;
-		writel(pimr, mid->mask_reg + LNW_PERIPHRAL_MASK);
-	}
-	return;
-}
-
-/**
- * enable_dma_interrupt -	enable the periphral interrupt
- * @midc: dma channel for which enable interrupt is required
- *
- * Enable the DMA periphral interrupt,
- * this is valid for DMAC1 family controllers only
- * This controller should have periphral mask registers already mapped
- */
-static void enable_dma_interrupt(struct intel_mid_dma_chan *midc)
-{
-	dmac1_unmask_periphral_intr(midc);
-
-	/*en ch interrupts*/
-	iowrite32(UNMASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_TFR);
-	iowrite32(UNMASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_ERR);
-	return;
-}
-
-/**
- * disable_dma_interrupt -	disable the periphral interrupt
- * @midc: dma channel for which disable interrupt is required
- *
- * Disable the DMA periphral interrupt,
- * this is valid for DMAC1 family controllers only
- * This controller should have periphral mask registers already mapped
- */
-static void disable_dma_interrupt(struct intel_mid_dma_chan *midc)
-{
-	/*Check LPE PISR, make sure fwd is disabled*/
-	iowrite32(MASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_BLOCK);
-	iowrite32(MASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_TFR);
-	iowrite32(MASK_INTR_REG(midc->ch_id), midc->dma_base + MASK_ERR);
-	return;
-}
-
-/*****************************************************************************
-DMA channel helper Functions*/
-/**
- * mid_desc_get		-	get a descriptor
- * @midc: dma channel for which descriptor is required
- *
- * Obtain a descriptor for the channel. Returns NULL if none are free.
- * Once the descriptor is returned it is private until put on another
- * list or freed
- */
-static struct intel_mid_dma_desc *midc_desc_get(struct intel_mid_dma_chan *midc)
-{
-	struct intel_mid_dma_desc *desc, *_desc;
-	struct intel_mid_dma_desc *ret = NULL;
-
-	spin_lock_bh(&midc->lock);
-	list_for_each_entry_safe(desc, _desc, &midc->free_list, desc_node) {
-		if (async_tx_test_ack(&desc->txd)) {
-			list_del(&desc->desc_node);
-			ret = desc;
-			break;
-		}
-	}
-	spin_unlock_bh(&midc->lock);
-	return ret;
-}
-
-/**
- * mid_desc_put		-	put a descriptor
- * @midc: dma channel for which descriptor is required
- * @desc: descriptor to put
- *
- * Return a descriptor from lwn_desc_get back to the free pool
- */
-static void midc_desc_put(struct intel_mid_dma_chan *midc,
-			struct intel_mid_dma_desc *desc)
-{
-	if (desc) {
-		spin_lock_bh(&midc->lock);
-		list_add_tail(&desc->desc_node, &midc->free_list);
-		spin_unlock_bh(&midc->lock);
-	}
-}
-/**
- * midc_dostart		-		begin a DMA transaction
- * @midc: channel for which txn is to be started
- * @first: first descriptor of series
- *
- * Load a transaction into the engine. This must be called with midc->lock
- * held and bh disabled.
- */
-static void midc_dostart(struct intel_mid_dma_chan *midc,
-			struct intel_mid_dma_desc *first)
-{
-	struct middma_device *mid = to_middma_device(midc->chan.device);
-
-	/*  channel is idle */
-	if (midc->busy && test_ch_en(midc->dma_base, midc->ch_id)) {
-		/*error*/
-		pr_err("ERR_MDMA: channel is busy in start\n");
-		/* The tasklet will hopefully advance the queue... */
-		return;
-	}
-	midc->busy = true;
-	/*write registers and en*/
-	iowrite32(first->sar, midc->ch_regs + SAR);
-	iowrite32(first->dar, midc->ch_regs + DAR);
-	iowrite32(first->lli_phys, midc->ch_regs + LLP);
-	iowrite32(first->cfg_hi, midc->ch_regs + CFG_HIGH);
-	iowrite32(first->cfg_lo, midc->ch_regs + CFG_LOW);
-	iowrite32(first->ctl_lo, midc->ch_regs + CTL_LOW);
-	iowrite32(first->ctl_hi, midc->ch_regs + CTL_HIGH);
-	pr_debug("MDMA:TX SAR %x,DAR %x,CFGL %x,CFGH %x,CTLH %x, CTLL %x\n",
-		(int)first->sar, (int)first->dar, first->cfg_hi,
-		first->cfg_lo, first->ctl_hi, first->ctl_lo);
-	first->status = DMA_IN_PROGRESS;
-
-	iowrite32(ENABLE_CHANNEL(midc->ch_id), mid->dma_base + DMA_CHAN_EN);
-}
-
-/**
- * midc_descriptor_complete	-	process completed descriptor
- * @midc: channel owning the descriptor
- * @desc: the descriptor itself
- *
- * Process a completed descriptor and perform any callbacks upon
- * the completion. The completion handling drops the lock during the
- * callbacks but must be called with the lock held.
- */
-static void midc_descriptor_complete(struct intel_mid_dma_chan *midc,
-		struct intel_mid_dma_desc *desc)
-		__releases(&midc->lock) __acquires(&midc->lock)
-{
-	struct dma_async_tx_descriptor	*txd = &desc->txd;
-	dma_async_tx_callback callback_txd = NULL;
-	struct intel_mid_dma_lli	*llitem;
-	void *param_txd = NULL;
-
-	dma_cookie_complete(txd);
-	callback_txd = txd->callback;
-	param_txd = txd->callback_param;
-
-	if (desc->lli != NULL) {
-		/*clear the DONE bit of completed LLI in memory*/
-		llitem = desc->lli + desc->current_lli;
-		llitem->ctl_hi &= CLEAR_DONE;
-		if (desc->current_lli < desc->lli_length-1)
-			(desc->current_lli)++;
-		else
-			desc->current_lli = 0;
-	}
-	spin_unlock_bh(&midc->lock);
-	if (callback_txd) {
-		pr_debug("MDMA: TXD callback set ... calling\n");
-		callback_txd(param_txd);
-	}
-	if (midc->raw_tfr) {
-		desc->status = DMA_COMPLETE;
-		if (desc->lli != NULL) {
-			pci_pool_free(desc->lli_pool, desc->lli,
-						desc->lli_phys);
-			pci_pool_destroy(desc->lli_pool);
-			desc->lli = NULL;
-		}
-		list_move(&desc->desc_node, &midc->free_list);
-		midc->busy = false;
-	}
-	spin_lock_bh(&midc->lock);
-
-}
-/**
- * midc_scan_descriptors -		check the descriptors in channel
- *					mark completed when tx is completete
- * @mid: device
- * @midc: channel to scan
- *
- * Walk the descriptor chain for the device and process any entries
- * that are complete.
- */
-static void midc_scan_descriptors(struct middma_device *mid,
-				struct intel_mid_dma_chan *midc)
-{
-	struct intel_mid_dma_desc *desc = NULL, *_desc = NULL;
-
-	/*tx is complete*/
-	list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) {
-		if (desc->status == DMA_IN_PROGRESS)
-			midc_descriptor_complete(midc, desc);
-	}
-	return;
-	}
-/**
- * midc_lli_fill_sg -		Helper function to convert
- *				SG list to Linked List Items.
- *@midc: Channel
- *@desc: DMA descriptor
- *@sglist: Pointer to SG list
- *@sglen: SG list length
- *@flags: DMA transaction flags
- *
- * Walk through the SG list and convert the SG list into Linked
- * List Items (LLI).
- */
-static int midc_lli_fill_sg(struct intel_mid_dma_chan *midc,
-				struct intel_mid_dma_desc *desc,
-				struct scatterlist *sglist,
-				unsigned int sglen,
-				unsigned int flags)
-{
-	struct intel_mid_dma_slave *mids;
-	struct scatterlist  *sg;
-	dma_addr_t lli_next, sg_phy_addr;
-	struct intel_mid_dma_lli *lli_bloc_desc;
-	union intel_mid_dma_ctl_lo ctl_lo;
-	union intel_mid_dma_ctl_hi ctl_hi;
-	int i;
-
-	pr_debug("MDMA: Entered midc_lli_fill_sg\n");
-	mids = midc->mid_slave;
-
-	lli_bloc_desc = desc->lli;
-	lli_next = desc->lli_phys;
-
-	ctl_lo.ctl_lo = desc->ctl_lo;
-	ctl_hi.ctl_hi = desc->ctl_hi;
-	for_each_sg(sglist, sg, sglen, i) {
-		/*Populate CTL_LOW and LLI values*/
-		if (i != sglen - 1) {
-			lli_next = lli_next +
-				sizeof(struct intel_mid_dma_lli);
-		} else {
-		/*Check for circular list, otherwise terminate LLI to ZERO*/
-			if (flags & DMA_PREP_CIRCULAR_LIST) {
-				pr_debug("MDMA: LLI is configured in circular mode\n");
-				lli_next = desc->lli_phys;
-			} else {
-				lli_next = 0;
-				ctl_lo.ctlx.llp_dst_en = 0;
-				ctl_lo.ctlx.llp_src_en = 0;
-			}
-		}
-		/*Populate CTL_HI values*/
-		ctl_hi.ctlx.block_ts = get_block_ts(sg_dma_len(sg),
-							desc->width,
-							midc->dma->block_size);
-		/*Populate SAR and DAR values*/
-		sg_phy_addr = sg_dma_address(sg);
-		if (desc->dirn ==  DMA_MEM_TO_DEV) {
-			lli_bloc_desc->sar  = sg_phy_addr;
-			lli_bloc_desc->dar  = mids->dma_slave.dst_addr;
-		} else if (desc->dirn ==  DMA_DEV_TO_MEM) {
-			lli_bloc_desc->sar  = mids->dma_slave.src_addr;
-			lli_bloc_desc->dar  = sg_phy_addr;
-		}
-		/*Copy values into block descriptor in system memroy*/
-		lli_bloc_desc->llp = lli_next;
-		lli_bloc_desc->ctl_lo = ctl_lo.ctl_lo;
-		lli_bloc_desc->ctl_hi = ctl_hi.ctl_hi;
-
-		lli_bloc_desc++;
-	}
-	/*Copy very first LLI values to descriptor*/
-	desc->ctl_lo = desc->lli->ctl_lo;
-	desc->ctl_hi = desc->lli->ctl_hi;
-	desc->sar = desc->lli->sar;
-	desc->dar = desc->lli->dar;
-
-	return 0;
-}
-/*****************************************************************************
-DMA engine callback Functions*/
-/**
- * intel_mid_dma_tx_submit -	callback to submit DMA transaction
- * @tx: dma engine descriptor
- *
- * Submit the DMA transaction for this descriptor, start if ch idle
- */
-static dma_cookie_t intel_mid_dma_tx_submit(struct dma_async_tx_descriptor *tx)
-{
-	struct intel_mid_dma_desc	*desc = to_intel_mid_dma_desc(tx);
-	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(tx->chan);
-	dma_cookie_t		cookie;
-
-	spin_lock_bh(&midc->lock);
-	cookie = dma_cookie_assign(tx);
-
-	if (list_empty(&midc->active_list))
-		list_add_tail(&desc->desc_node, &midc->active_list);
-	else
-		list_add_tail(&desc->desc_node, &midc->queue);
-
-	midc_dostart(midc, desc);
-	spin_unlock_bh(&midc->lock);
-
-	return cookie;
-}
-
-/**
- * intel_mid_dma_issue_pending -	callback to issue pending txn
- * @chan: chan where pending trascation needs to be checked and submitted
- *
- * Call for scan to issue pending descriptors
- */
-static void intel_mid_dma_issue_pending(struct dma_chan *chan)
-{
-	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
-
-	spin_lock_bh(&midc->lock);
-	if (!list_empty(&midc->queue))
-		midc_scan_descriptors(to_middma_device(chan->device), midc);
-	spin_unlock_bh(&midc->lock);
-}
-
-/**
- * intel_mid_dma_tx_status -	Return status of txn
- * @chan: chan for where status needs to be checked
- * @cookie: cookie for txn
- * @txstate: DMA txn state
- *
- * Return status of DMA txn
- */
-static enum dma_status intel_mid_dma_tx_status(struct dma_chan *chan,
-						dma_cookie_t cookie,
-						struct dma_tx_state *txstate)
-{
-	struct intel_mid_dma_chan *midc = to_intel_mid_dma_chan(chan);
-	enum dma_status ret;
-
-	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret != DMA_COMPLETE) {
-		spin_lock_bh(&midc->lock);
-		midc_scan_descriptors(to_middma_device(chan->device), midc);
-		spin_unlock_bh(&midc->lock);
-
-		ret = dma_cookie_status(chan, cookie, txstate);
-	}
-
-	return ret;
-}
-
-static int intel_mid_dma_config(struct dma_chan *chan,
-				struct dma_slave_config *slave)
-{
-	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
-	struct intel_mid_dma_slave *mid_slave;
-
-	BUG_ON(!midc);
-	BUG_ON(!slave);
-	pr_debug("MDMA: slave control called\n");
-
-	mid_slave = to_intel_mid_dma_slave(slave);
-
-	BUG_ON(!mid_slave);
-
-	midc->mid_slave = mid_slave;
-	return 0;
-}
-
-static int intel_mid_dma_terminate_all(struct dma_chan *chan)
-{
-	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
-	struct middma_device	*mid = to_middma_device(chan->device);
-	struct intel_mid_dma_desc	*desc, *_desc;
-	union intel_mid_dma_cfg_lo cfg_lo;
-
-	spin_lock_bh(&midc->lock);
-	if (midc->busy == false) {
-		spin_unlock_bh(&midc->lock);
-		return 0;
-	}
-	/*Suspend and disable the channel*/
-	cfg_lo.cfg_lo = ioread32(midc->ch_regs + CFG_LOW);
-	cfg_lo.cfgx.ch_susp = 1;
-	iowrite32(cfg_lo.cfg_lo, midc->ch_regs + CFG_LOW);
-	iowrite32(DISABLE_CHANNEL(midc->ch_id), mid->dma_base + DMA_CHAN_EN);
-	midc->busy = false;
-	/* Disable interrupts */
-	disable_dma_interrupt(midc);
-	midc->descs_allocated = 0;
-
-	spin_unlock_bh(&midc->lock);
-	list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) {
-		if (desc->lli != NULL) {
-			pci_pool_free(desc->lli_pool, desc->lli,
-						desc->lli_phys);
-			pci_pool_destroy(desc->lli_pool);
-			desc->lli = NULL;
-		}
-		list_move(&desc->desc_node, &midc->free_list);
-	}
-	return 0;
-}
-
-
-/**
- * intel_mid_dma_prep_memcpy -	Prep memcpy txn
- * @chan: chan for DMA transfer
- * @dest: destn address
- * @src: src address
- * @len: DMA transfer len
- * @flags: DMA flags
- *
- * Perform a DMA memcpy. Note we support slave periphral DMA transfers only
- * The periphral txn details should be filled in slave structure properly
- * Returns the descriptor for this txn
- */
-static struct dma_async_tx_descriptor *intel_mid_dma_prep_memcpy(
-			struct dma_chan *chan, dma_addr_t dest,
-			dma_addr_t src, size_t len, unsigned long flags)
-{
-	struct intel_mid_dma_chan *midc;
-	struct intel_mid_dma_desc *desc = NULL;
-	struct intel_mid_dma_slave *mids;
-	union intel_mid_dma_ctl_lo ctl_lo;
-	union intel_mid_dma_ctl_hi ctl_hi;
-	union intel_mid_dma_cfg_lo cfg_lo;
-	union intel_mid_dma_cfg_hi cfg_hi;
-	enum dma_slave_buswidth width;
-
-	pr_debug("MDMA: Prep for memcpy\n");
-	BUG_ON(!chan);
-	if (!len)
-		return NULL;
-
-	midc = to_intel_mid_dma_chan(chan);
-	BUG_ON(!midc);
-
-	mids = midc->mid_slave;
-	BUG_ON(!mids);
-
-	pr_debug("MDMA:called for DMA %x CH %d Length %zu\n",
-				midc->dma->pci_id, midc->ch_id, len);
-	pr_debug("MDMA:Cfg passed Mode %x, Dirn %x, HS %x, Width %x\n",
-			mids->cfg_mode, mids->dma_slave.direction,
-			mids->hs_mode, mids->dma_slave.src_addr_width);
-
-	/*calculate CFG_LO*/
-	if (mids->hs_mode == LNW_DMA_SW_HS) {
-		cfg_lo.cfg_lo = 0;
-		cfg_lo.cfgx.hs_sel_dst = 1;
-		cfg_lo.cfgx.hs_sel_src = 1;
-	} else if (mids->hs_mode == LNW_DMA_HW_HS)
-		cfg_lo.cfg_lo = 0x00000;
-
-	/*calculate CFG_HI*/
-	if (mids->cfg_mode == LNW_DMA_MEM_TO_MEM) {
-		/*SW HS only*/
-		cfg_hi.cfg_hi = 0;
-	} else {
-		cfg_hi.cfg_hi = 0;
-		if (midc->dma->pimr_mask) {
-			cfg_hi.cfgx.protctl = 0x0; /*default value*/
-			cfg_hi.cfgx.fifo_mode = 1;
-			if (mids->dma_slave.direction == DMA_MEM_TO_DEV) {
-				cfg_hi.cfgx.src_per = 0;
-				if (mids->device_instance == 0)
-					cfg_hi.cfgx.dst_per = 3;
-				if (mids->device_instance == 1)
-					cfg_hi.cfgx.dst_per = 1;
-			} else if (mids->dma_slave.direction == DMA_DEV_TO_MEM) {
-				if (mids->device_instance == 0)
-					cfg_hi.cfgx.src_per = 2;
-				if (mids->device_instance == 1)
-					cfg_hi.cfgx.src_per = 0;
-				cfg_hi.cfgx.dst_per = 0;
-			}
-		} else {
-			cfg_hi.cfgx.protctl = 0x1; /*default value*/
-			cfg_hi.cfgx.src_per = cfg_hi.cfgx.dst_per =
-					midc->ch_id - midc->dma->chan_base;
-		}
-	}
-
-	/*calculate CTL_HI*/
-	ctl_hi.ctlx.reser = 0;
-	ctl_hi.ctlx.done  = 0;
-	width = mids->dma_slave.src_addr_width;
-
-	ctl_hi.ctlx.block_ts = get_block_ts(len, width, midc->dma->block_size);
-	pr_debug("MDMA:calc len %d for block size %d\n",
-				ctl_hi.ctlx.block_ts, midc->dma->block_size);
-	/*calculate CTL_LO*/
-	ctl_lo.ctl_lo = 0;
-	ctl_lo.ctlx.int_en = 1;
-	ctl_lo.ctlx.dst_msize = mids->dma_slave.src_maxburst;
-	ctl_lo.ctlx.src_msize = mids->dma_slave.dst_maxburst;
-
-	/*
-	 * Here we need some translation from "enum dma_slave_buswidth"
-	 * to the format for our dma controller
-	 *		standard	intel_mid_dmac's format
-	 *		 1 Byte			0b000
-	 *		 2 Bytes		0b001
-	 *		 4 Bytes		0b010
-	 */
-	ctl_lo.ctlx.dst_tr_width = mids->dma_slave.dst_addr_width / 2;
-	ctl_lo.ctlx.src_tr_width = mids->dma_slave.src_addr_width / 2;
-
-	if (mids->cfg_mode == LNW_DMA_MEM_TO_MEM) {
-		ctl_lo.ctlx.tt_fc = 0;
-		ctl_lo.ctlx.sinc = 0;
-		ctl_lo.ctlx.dinc = 0;
-	} else {
-		if (mids->dma_slave.direction == DMA_MEM_TO_DEV) {
-			ctl_lo.ctlx.sinc = 0;
-			ctl_lo.ctlx.dinc = 2;
-			ctl_lo.ctlx.tt_fc = 1;
-		} else if (mids->dma_slave.direction == DMA_DEV_TO_MEM) {
-			ctl_lo.ctlx.sinc = 2;
-			ctl_lo.ctlx.dinc = 0;
-			ctl_lo.ctlx.tt_fc = 2;
-		}
-	}
-
-	pr_debug("MDMA:Calc CTL LO %x, CTL HI %x, CFG LO %x, CFG HI %x\n",
-		ctl_lo.ctl_lo, ctl_hi.ctl_hi, cfg_lo.cfg_lo, cfg_hi.cfg_hi);
-
-	enable_dma_interrupt(midc);
-
-	desc = midc_desc_get(midc);
-	if (desc == NULL)
-		goto err_desc_get;
-	desc->sar = src;
-	desc->dar = dest ;
-	desc->len = len;
-	desc->cfg_hi = cfg_hi.cfg_hi;
-	desc->cfg_lo = cfg_lo.cfg_lo;
-	desc->ctl_lo = ctl_lo.ctl_lo;
-	desc->ctl_hi = ctl_hi.ctl_hi;
-	desc->width = width;
-	desc->dirn = mids->dma_slave.direction;
-	desc->lli_phys = 0;
-	desc->lli = NULL;
-	desc->lli_pool = NULL;
-	return &desc->txd;
-
-err_desc_get:
-	pr_err("ERR_MDMA: Failed to get desc\n");
-	midc_desc_put(midc, desc);
-	return NULL;
-}
-/**
- * intel_mid_dma_prep_slave_sg -	Prep slave sg txn
- * @chan: chan for DMA transfer
- * @sgl: scatter gather list
- * @sg_len: length of sg txn
- * @direction: DMA transfer dirtn
- * @flags: DMA flags
- * @context: transfer context (ignored)
- *
- * Prepares LLI based periphral transfer
- */
-static struct dma_async_tx_descriptor *intel_mid_dma_prep_slave_sg(
-			struct dma_chan *chan, struct scatterlist *sgl,
-			unsigned int sg_len, enum dma_transfer_direction direction,
-			unsigned long flags, void *context)
-{
-	struct intel_mid_dma_chan *midc = NULL;
-	struct intel_mid_dma_slave *mids = NULL;
-	struct intel_mid_dma_desc *desc = NULL;
-	struct dma_async_tx_descriptor *txd = NULL;
-	union intel_mid_dma_ctl_lo ctl_lo;
-
-	pr_debug("MDMA: Prep for slave SG\n");
-
-	if (!sg_len) {
-		pr_err("MDMA: Invalid SG length\n");
-		return NULL;
-	}
-	midc = to_intel_mid_dma_chan(chan);
-	BUG_ON(!midc);
-
-	mids = midc->mid_slave;
-	BUG_ON(!mids);
-
-	if (!midc->dma->pimr_mask) {
-		/* We can still handle sg list with only one item */
-		if (sg_len == 1) {
-			txd = intel_mid_dma_prep_memcpy(chan,
-						mids->dma_slave.dst_addr,
-						mids->dma_slave.src_addr,
-						sg_dma_len(sgl),
-						flags);
-			return txd;
-		} else {
-			pr_warn("MDMA: SG list is not supported by this controller\n");
-			return  NULL;
-		}
-	}
-
-	pr_debug("MDMA: SG Length = %d, direction = %d, Flags = %#lx\n",
-			sg_len, direction, flags);
-
-	txd = intel_mid_dma_prep_memcpy(chan, 0, 0, sg_dma_len(sgl), flags);
-	if (NULL == txd) {
-		pr_err("MDMA: Prep memcpy failed\n");
-		return NULL;
-	}
-
-	desc = to_intel_mid_dma_desc(txd);
-	desc->dirn = direction;
-	ctl_lo.ctl_lo = desc->ctl_lo;
-	ctl_lo.ctlx.llp_dst_en = 1;
-	ctl_lo.ctlx.llp_src_en = 1;
-	desc->ctl_lo = ctl_lo.ctl_lo;
-	desc->lli_length = sg_len;
-	desc->current_lli = 0;
-	/* DMA coherent memory pool for LLI descriptors*/
-	desc->lli_pool = pci_pool_create("intel_mid_dma_lli_pool",
-				midc->dma->pdev,
-				(sizeof(struct intel_mid_dma_lli)*sg_len),
-				32, 0);
-	if (NULL == desc->lli_pool) {
-		pr_err("MID_DMA:LLI pool create failed\n");
-		return NULL;
-	}
-
-	desc->lli = pci_pool_alloc(desc->lli_pool, GFP_KERNEL, &desc->lli_phys);
-	if (!desc->lli) {
-		pr_err("MID_DMA: LLI alloc failed\n");
-		pci_pool_destroy(desc->lli_pool);
-		return NULL;
-	}
-
-	midc_lli_fill_sg(midc, desc, sgl, sg_len, flags);
-	if (flags & DMA_PREP_INTERRUPT) {
-		iowrite32(UNMASK_INTR_REG(midc->ch_id),
-				midc->dma_base + MASK_BLOCK);
-		pr_debug("MDMA:Enabled Block interrupt\n");
-	}
-	return &desc->txd;
-}
-
-/**
- * intel_mid_dma_free_chan_resources -	Frees dma resources
- * @chan: chan requiring attention
- *
- * Frees the allocated resources on this DMA chan
- */
-static void intel_mid_dma_free_chan_resources(struct dma_chan *chan)
-{
-	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
-	struct middma_device	*mid = to_middma_device(chan->device);
-	struct intel_mid_dma_desc	*desc, *_desc;
-
-	if (true == midc->busy) {
-		/*trying to free ch in use!!!!!*/
-		pr_err("ERR_MDMA: trying to free ch in use\n");
-	}
-	spin_lock_bh(&midc->lock);
-	midc->descs_allocated = 0;
-	list_for_each_entry_safe(desc, _desc, &midc->active_list, desc_node) {
-		list_del(&desc->desc_node);
-		pci_pool_free(mid->dma_pool, desc, desc->txd.phys);
-	}
-	list_for_each_entry_safe(desc, _desc, &midc->free_list, desc_node) {
-		list_del(&desc->desc_node);
-		pci_pool_free(mid->dma_pool, desc, desc->txd.phys);
-	}
-	list_for_each_entry_safe(desc, _desc, &midc->queue, desc_node) {
-		list_del(&desc->desc_node);
-		pci_pool_free(mid->dma_pool, desc, desc->txd.phys);
-	}
-	spin_unlock_bh(&midc->lock);
-	midc->in_use = false;
-	midc->busy = false;
-	/* Disable CH interrupts */
-	iowrite32(MASK_INTR_REG(midc->ch_id), mid->dma_base + MASK_BLOCK);
-	iowrite32(MASK_INTR_REG(midc->ch_id), mid->dma_base + MASK_ERR);
-	pm_runtime_put(&mid->pdev->dev);
-}
-
-/**
- * intel_mid_dma_alloc_chan_resources -	Allocate dma resources
- * @chan: chan requiring attention
- *
- * Allocates DMA resources on this chan
- * Return the descriptors allocated
- */
-static int intel_mid_dma_alloc_chan_resources(struct dma_chan *chan)
-{
-	struct intel_mid_dma_chan	*midc = to_intel_mid_dma_chan(chan);
-	struct middma_device	*mid = to_middma_device(chan->device);
-	struct intel_mid_dma_desc	*desc;
-	dma_addr_t		phys;
-	int	i = 0;
-
-	pm_runtime_get_sync(&mid->pdev->dev);
-
-	if (mid->state == SUSPENDED) {
-		if (dma_resume(&mid->pdev->dev)) {
-			pr_err("ERR_MDMA: resume failed");
-			return -EFAULT;
-		}
-	}
-
-	/* ASSERT:  channel is idle */
-	if (test_ch_en(mid->dma_base, midc->ch_id)) {
-		/*ch is not idle*/
-		pr_err("ERR_MDMA: ch not idle\n");
-		pm_runtime_put(&mid->pdev->dev);
-		return -EIO;
-	}
-	dma_cookie_init(chan);
-
-	spin_lock_bh(&midc->lock);
-	while (midc->descs_allocated < DESCS_PER_CHANNEL) {
-		spin_unlock_bh(&midc->lock);
-		desc = pci_pool_alloc(mid->dma_pool, GFP_KERNEL, &phys);
-		if (!desc) {
-			pr_err("ERR_MDMA: desc failed\n");
-			pm_runtime_put(&mid->pdev->dev);
-			return -ENOMEM;
-			/*check*/
-		}
-		dma_async_tx_descriptor_init(&desc->txd, chan);
-		desc->txd.tx_submit = intel_mid_dma_tx_submit;
-		desc->txd.flags = DMA_CTRL_ACK;
-		desc->txd.phys = phys;
-		spin_lock_bh(&midc->lock);
-		i = ++midc->descs_allocated;
-		list_add_tail(&desc->desc_node, &midc->free_list);
-	}
-	spin_unlock_bh(&midc->lock);
-	midc->in_use = true;
-	midc->busy = false;
-	pr_debug("MID_DMA: Desc alloc done ret: %d desc\n", i);
-	return i;
-}
-
-/**
- * midc_handle_error -	Handle DMA txn error
- * @mid: controller where error occurred
- * @midc: chan where error occurred
- *
- * Scan the descriptor for error
- */
-static void midc_handle_error(struct middma_device *mid,
-		struct intel_mid_dma_chan *midc)
-{
-	midc_scan_descriptors(mid, midc);
-}
-
-/**
- * dma_tasklet -	DMA interrupt tasklet
- * @data: tasklet arg (the controller structure)
- *
- * Scan the controller for interrupts for completion/error
- * Clear the interrupt and call for handling completion/error
- */
-static void dma_tasklet(unsigned long data)
-{
-	struct middma_device *mid = NULL;
-	struct intel_mid_dma_chan *midc = NULL;
-	u32 status, raw_tfr, raw_block;
-	int i;
-
-	mid = (struct middma_device *)data;
-	if (mid == NULL) {
-		pr_err("ERR_MDMA: tasklet Null param\n");
-		return;
-	}
-	pr_debug("MDMA: in tasklet for device %x\n", mid->pci_id);
-	raw_tfr = ioread32(mid->dma_base + RAW_TFR);
-	raw_block = ioread32(mid->dma_base + RAW_BLOCK);
-	status = raw_tfr | raw_block;
-	status &= mid->intr_mask;
-	while (status) {
-		/*txn interrupt*/
-		i = get_ch_index(&status, mid->chan_base);
-		if (i < 0) {
-			pr_err("ERR_MDMA:Invalid ch index %x\n", i);
-			return;
-		}
-		midc = &mid->ch[i];
-		if (midc == NULL) {
-			pr_err("ERR_MDMA:Null param midc\n");
-			return;
-		}
-		pr_debug("MDMA:Tx complete interrupt %x, Ch No %d Index %d\n",
-				status, midc->ch_id, i);
-		midc->raw_tfr = raw_tfr;
-		midc->raw_block = raw_block;
-		spin_lock_bh(&midc->lock);
-		/*clearing this interrupts first*/
-		iowrite32((1 << midc->ch_id), mid->dma_base + CLEAR_TFR);
-		if (raw_block) {
-			iowrite32((1 << midc->ch_id),
-				mid->dma_base + CLEAR_BLOCK);
-		}
-		midc_scan_descriptors(mid, midc);
-		pr_debug("MDMA:Scan of desc... complete, unmasking\n");
-		iowrite32(UNMASK_INTR_REG(midc->ch_id),
-				mid->dma_base + MASK_TFR);
-		if (raw_block) {
-			iowrite32(UNMASK_INTR_REG(midc->ch_id),
-				mid->dma_base + MASK_BLOCK);
-		}
-		spin_unlock_bh(&midc->lock);
-	}
-
-	status = ioread32(mid->dma_base + RAW_ERR);
-	status &= mid->intr_mask;
-	while (status) {
-		/*err interrupt*/
-		i = get_ch_index(&status, mid->chan_base);
-		if (i < 0) {
-			pr_err("ERR_MDMA:Invalid ch index %x\n", i);
-			return;
-		}
-		midc = &mid->ch[i];
-		if (midc == NULL) {
-			pr_err("ERR_MDMA:Null param midc\n");
-			return;
-		}
-		pr_debug("MDMA:Tx complete interrupt %x, Ch No %d Index %d\n",
-				status, midc->ch_id, i);
-
-		iowrite32((1 << midc->ch_id), mid->dma_base + CLEAR_ERR);
-		spin_lock_bh(&midc->lock);
-		midc_handle_error(mid, midc);
-		iowrite32(UNMASK_INTR_REG(midc->ch_id),
-				mid->dma_base + MASK_ERR);
-		spin_unlock_bh(&midc->lock);
-	}
-	pr_debug("MDMA:Exiting takslet...\n");
-	return;
-}
-
-static void dma_tasklet1(unsigned long data)
-{
-	pr_debug("MDMA:in takslet1...\n");
-	return dma_tasklet(data);
-}
-
-static void dma_tasklet2(unsigned long data)
-{
-	pr_debug("MDMA:in takslet2...\n");
-	return dma_tasklet(data);
-}
-
-/**
- * intel_mid_dma_interrupt -	DMA ISR
- * @irq: IRQ where interrupt occurred
- * @data: ISR cllback data (the controller structure)
- *
- * See if this is our interrupt if so then schedule the tasklet
- * otherwise ignore
- */
-static irqreturn_t intel_mid_dma_interrupt(int irq, void *data)
-{
-	struct middma_device *mid = data;
-	u32 tfr_status, err_status;
-	int call_tasklet = 0;
-
-	tfr_status = ioread32(mid->dma_base + RAW_TFR);
-	err_status = ioread32(mid->dma_base + RAW_ERR);
-	if (!tfr_status && !err_status)
-		return IRQ_NONE;
-
-	/*DMA Interrupt*/
-	pr_debug("MDMA:Got an interrupt on irq %d\n", irq);
-	pr_debug("MDMA: Status %x, Mask %x\n", tfr_status, mid->intr_mask);
-	tfr_status &= mid->intr_mask;
-	if (tfr_status) {
-		/*need to disable intr*/
-		iowrite32((tfr_status << INT_MASK_WE), mid->dma_base + MASK_TFR);
-		iowrite32((tfr_status << INT_MASK_WE), mid->dma_base + MASK_BLOCK);
-		pr_debug("MDMA: Calling tasklet %x\n", tfr_status);
-		call_tasklet = 1;
-	}
-	err_status &= mid->intr_mask;
-	if (err_status) {
-		iowrite32((err_status << INT_MASK_WE),
-			  mid->dma_base + MASK_ERR);
-		call_tasklet = 1;
-	}
-	if (call_tasklet)
-		tasklet_schedule(&mid->tasklet);
-
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t intel_mid_dma_interrupt1(int irq, void *data)
-{
-	return intel_mid_dma_interrupt(irq, data);
-}
-
-static irqreturn_t intel_mid_dma_interrupt2(int irq, void *data)
-{
-	return intel_mid_dma_interrupt(irq, data);
-}
-
-/**
- * mid_setup_dma -	Setup the DMA controller
- * @pdev: Controller PCI device structure
- *
- * Initialize the DMA controller, channels, registers with DMA engine,
- * ISR. Initialize DMA controller channels.
- */
-static int mid_setup_dma(struct pci_dev *pdev)
-{
-	struct middma_device *dma = pci_get_drvdata(pdev);
-	int err, i;
-
-	/* DMA coherent memory pool for DMA descriptor allocations */
-	dma->dma_pool = pci_pool_create("intel_mid_dma_desc_pool", pdev,
-					sizeof(struct intel_mid_dma_desc),
-					32, 0);
-	if (NULL == dma->dma_pool) {
-		pr_err("ERR_MDMA:pci_pool_create failed\n");
-		err = -ENOMEM;
-		goto err_dma_pool;
-	}
-
-	INIT_LIST_HEAD(&dma->common.channels);
-	dma->pci_id = pdev->device;
-	if (dma->pimr_mask) {
-		dma->mask_reg = ioremap(LNW_PERIPHRAL_MASK_BASE,
-					LNW_PERIPHRAL_MASK_SIZE);
-		if (dma->mask_reg == NULL) {
-			pr_err("ERR_MDMA:Can't map periphral intr space !!\n");
-			err = -ENOMEM;
-			goto err_ioremap;
-		}
-	} else
-		dma->mask_reg = NULL;
-
-	pr_debug("MDMA:Adding %d channel for this controller\n", dma->max_chan);
-	/*init CH structures*/
-	dma->intr_mask = 0;
-	dma->state = RUNNING;
-	for (i = 0; i < dma->max_chan; i++) {
-		struct intel_mid_dma_chan *midch = &dma->ch[i];
-
-		midch->chan.device = &dma->common;
-		dma_cookie_init(&midch->chan);
-		midch->ch_id = dma->chan_base + i;
-		pr_debug("MDMA:Init CH %d, ID %d\n", i, midch->ch_id);
-
-		midch->dma_base = dma->dma_base;
-		midch->ch_regs = dma->dma_base + DMA_CH_SIZE * midch->ch_id;
-		midch->dma = dma;
-		dma->intr_mask |= 1 << (dma->chan_base + i);
-		spin_lock_init(&midch->lock);
-
-		INIT_LIST_HEAD(&midch->active_list);
-		INIT_LIST_HEAD(&midch->queue);
-		INIT_LIST_HEAD(&midch->free_list);
-		/*mask interrupts*/
-		iowrite32(MASK_INTR_REG(midch->ch_id),
-			dma->dma_base + MASK_BLOCK);
-		iowrite32(MASK_INTR_REG(midch->ch_id),
-			dma->dma_base + MASK_SRC_TRAN);
-		iowrite32(MASK_INTR_REG(midch->ch_id),
-			dma->dma_base + MASK_DST_TRAN);
-		iowrite32(MASK_INTR_REG(midch->ch_id),
-			dma->dma_base + MASK_ERR);
-		iowrite32(MASK_INTR_REG(midch->ch_id),
-			dma->dma_base + MASK_TFR);
-
-		disable_dma_interrupt(midch);
-		list_add_tail(&midch->chan.device_node, &dma->common.channels);
-	}
-	pr_debug("MDMA: Calc Mask as %x for this controller\n", dma->intr_mask);
-
-	/*init dma structure*/
-	dma_cap_zero(dma->common.cap_mask);
-	dma_cap_set(DMA_MEMCPY, dma->common.cap_mask);
-	dma_cap_set(DMA_SLAVE, dma->common.cap_mask);
-	dma_cap_set(DMA_PRIVATE, dma->common.cap_mask);
-	dma->common.dev = &pdev->dev;
-
-	dma->common.device_alloc_chan_resources =
-					intel_mid_dma_alloc_chan_resources;
-	dma->common.device_free_chan_resources =
-					intel_mid_dma_free_chan_resources;
-
-	dma->common.device_tx_status = intel_mid_dma_tx_status;
-	dma->common.device_prep_dma_memcpy = intel_mid_dma_prep_memcpy;
-	dma->common.device_issue_pending = intel_mid_dma_issue_pending;
-	dma->common.device_prep_slave_sg = intel_mid_dma_prep_slave_sg;
-	dma->common.device_config = intel_mid_dma_config;
-	dma->common.device_terminate_all = intel_mid_dma_terminate_all;
-
-	/*enable dma cntrl*/
-	iowrite32(REG_BIT0, dma->dma_base + DMA_CFG);
-
-	/*register irq */
-	if (dma->pimr_mask) {
-		pr_debug("MDMA:Requesting irq shared for DMAC1\n");
-		err = request_irq(pdev->irq, intel_mid_dma_interrupt1,
-			IRQF_SHARED, "INTEL_MID_DMAC1", dma);
-		if (0 != err)
-			goto err_irq;
-	} else {
-		dma->intr_mask = 0x03;
-		pr_debug("MDMA:Requesting irq for DMAC2\n");
-		err = request_irq(pdev->irq, intel_mid_dma_interrupt2,
-			IRQF_SHARED, "INTEL_MID_DMAC2", dma);
-		if (0 != err)
-			goto err_irq;
-	}
-	/*register device w/ engine*/
-	err = dma_async_device_register(&dma->common);
-	if (0 != err) {
-		pr_err("ERR_MDMA:device_register failed: %d\n", err);
-		goto err_engine;
-	}
-	if (dma->pimr_mask) {
-		pr_debug("setting up tasklet1 for DMAC1\n");
-		tasklet_init(&dma->tasklet, dma_tasklet1, (unsigned long)dma);
-	} else {
-		pr_debug("setting up tasklet2 for DMAC2\n");
-		tasklet_init(&dma->tasklet, dma_tasklet2, (unsigned long)dma);
-	}
-	return 0;
-
-err_engine:
-	free_irq(pdev->irq, dma);
-err_irq:
-	if (dma->mask_reg)
-		iounmap(dma->mask_reg);
-err_ioremap:
-	pci_pool_destroy(dma->dma_pool);
-err_dma_pool:
-	pr_err("ERR_MDMA:setup_dma failed: %d\n", err);
-	return err;
-
-}
-
-/**
- * middma_shutdown -	Shutdown the DMA controller
- * @pdev: Controller PCI device structure
- *
- * Called by remove
- * Unregister DMa controller, clear all structures and free interrupt
- */
-static void middma_shutdown(struct pci_dev *pdev)
-{
-	struct middma_device *device = pci_get_drvdata(pdev);
-
-	dma_async_device_unregister(&device->common);
-	pci_pool_destroy(device->dma_pool);
-	if (device->mask_reg)
-		iounmap(device->mask_reg);
-	if (device->dma_base)
-		iounmap(device->dma_base);
-	free_irq(pdev->irq, device);
-	return;
-}
-
-/**
- * intel_mid_dma_probe -	PCI Probe
- * @pdev: Controller PCI device structure
- * @id: pci device id structure
- *
- * Initialize the PCI device, map BARs, query driver data.
- * Call setup_dma to complete contoller and chan initilzation
- */
-static int intel_mid_dma_probe(struct pci_dev *pdev,
-					const struct pci_device_id *id)
-{
-	struct middma_device *device;
-	u32 base_addr, bar_size;
-	struct intel_mid_dma_probe_info *info;
-	int err;
-
-	pr_debug("MDMA: probe for %x\n", pdev->device);
-	info = (void *)id->driver_data;
-	pr_debug("MDMA: CH %d, base %d, block len %d, Periphral mask %x\n",
-				info->max_chan, info->ch_base,
-				info->block_size, info->pimr_mask);
-
-	err = pci_enable_device(pdev);
-	if (err)
-		goto err_enable_device;
-
-	err = pci_request_regions(pdev, "intel_mid_dmac");
-	if (err)
-		goto err_request_regions;
-
-	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-	if (err)
-		goto err_set_dma_mask;
-
-	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-	if (err)
-		goto err_set_dma_mask;
-
-	device = kzalloc(sizeof(*device), GFP_KERNEL);
-	if (!device) {
-		pr_err("ERR_MDMA:kzalloc failed probe\n");
-		err = -ENOMEM;
-		goto err_kzalloc;
-	}
-	device->pdev = pci_dev_get(pdev);
-
-	base_addr = pci_resource_start(pdev, 0);
-	bar_size  = pci_resource_len(pdev, 0);
-	device->dma_base = ioremap_nocache(base_addr, DMA_REG_SIZE);
-	if (!device->dma_base) {
-		pr_err("ERR_MDMA:ioremap failed\n");
-		err = -ENOMEM;
-		goto err_ioremap;
-	}
-	pci_set_drvdata(pdev, device);
-	pci_set_master(pdev);
-	device->max_chan = info->max_chan;
-	device->chan_base = info->ch_base;
-	device->block_size = info->block_size;
-	device->pimr_mask = info->pimr_mask;
-
-	err = mid_setup_dma(pdev);
-	if (err)
-		goto err_dma;
-
-	pm_runtime_put_noidle(&pdev->dev);
-	pm_runtime_allow(&pdev->dev);
-	return 0;
-
-err_dma:
-	iounmap(device->dma_base);
-err_ioremap:
-	pci_dev_put(pdev);
-	kfree(device);
-err_kzalloc:
-err_set_dma_mask:
-	pci_release_regions(pdev);
-	pci_disable_device(pdev);
-err_request_regions:
-err_enable_device:
-	pr_err("ERR_MDMA:Probe failed %d\n", err);
-	return err;
-}
-
-/**
- * intel_mid_dma_remove -	PCI remove
- * @pdev: Controller PCI device structure
- *
- * Free up all resources and data
- * Call shutdown_dma to complete contoller and chan cleanup
- */
-static void intel_mid_dma_remove(struct pci_dev *pdev)
-{
-	struct middma_device *device = pci_get_drvdata(pdev);
-
-	pm_runtime_get_noresume(&pdev->dev);
-	pm_runtime_forbid(&pdev->dev);
-	middma_shutdown(pdev);
-	pci_dev_put(pdev);
-	kfree(device);
-	pci_release_regions(pdev);
-	pci_disable_device(pdev);
-}
-
-/* Power Management */
-/*
-* dma_suspend - PCI suspend function
-*
-* @pci: PCI device structure
-* @state: PM message
-*
-* This function is called by OS when a power event occurs
-*/
-static int dma_suspend(struct device *dev)
-{
-	struct pci_dev *pci = to_pci_dev(dev);
-	int i;
-	struct middma_device *device = pci_get_drvdata(pci);
-	pr_debug("MDMA: dma_suspend called\n");
-
-	for (i = 0; i < device->max_chan; i++) {
-		if (device->ch[i].in_use)
-			return -EAGAIN;
-	}
-	dmac1_mask_periphral_intr(device);
-	device->state = SUSPENDED;
-	pci_save_state(pci);
-	pci_disable_device(pci);
-	pci_set_power_state(pci, PCI_D3hot);
-	return 0;
-}
-
-/**
-* dma_resume - PCI resume function
-*
-* @pci:	PCI device structure
-*
-* This function is called by OS when a power event occurs
-*/
-int dma_resume(struct device *dev)
-{
-	struct pci_dev *pci = to_pci_dev(dev);
-	int ret;
-	struct middma_device *device = pci_get_drvdata(pci);
-
-	pr_debug("MDMA: dma_resume called\n");
-	pci_set_power_state(pci, PCI_D0);
-	pci_restore_state(pci);
-	ret = pci_enable_device(pci);
-	if (ret) {
-		pr_err("MDMA: device can't be enabled for %x\n", pci->device);
-		return ret;
-	}
-	device->state = RUNNING;
-	iowrite32(REG_BIT0, device->dma_base + DMA_CFG);
-	return 0;
-}
-
-static int dma_runtime_suspend(struct device *dev)
-{
-	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct middma_device *device = pci_get_drvdata(pci_dev);
-
-	device->state = SUSPENDED;
-	return 0;
-}
-
-static int dma_runtime_resume(struct device *dev)
-{
-	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct middma_device *device = pci_get_drvdata(pci_dev);
-
-	device->state = RUNNING;
-	iowrite32(REG_BIT0, device->dma_base + DMA_CFG);
-	return 0;
-}
-
-static int dma_runtime_idle(struct device *dev)
-{
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct middma_device *device = pci_get_drvdata(pdev);
-	int i;
-
-	for (i = 0; i < device->max_chan; i++) {
-		if (device->ch[i].in_use)
-			return -EAGAIN;
-	}
-
-	return 0;
-}
-
-/******************************************************************************
-* PCI stuff
-*/
-static struct pci_device_id intel_mid_dma_ids[] = {
-	{ PCI_VDEVICE(INTEL, INTEL_MID_DMAC1_ID),	INFO(2, 6, 4095, 0x200020)},
-	{ PCI_VDEVICE(INTEL, INTEL_MID_DMAC2_ID),	INFO(2, 0, 2047, 0)},
-	{ PCI_VDEVICE(INTEL, INTEL_MID_GP_DMAC2_ID),	INFO(2, 0, 2047, 0)},
-	{ PCI_VDEVICE(INTEL, INTEL_MFLD_DMAC1_ID),	INFO(4, 0, 4095, 0x400040)},
-	{ 0, }
-};
-MODULE_DEVICE_TABLE(pci, intel_mid_dma_ids);
-
-static const struct dev_pm_ops intel_mid_dma_pm = {
-	.runtime_suspend = dma_runtime_suspend,
-	.runtime_resume = dma_runtime_resume,
-	.runtime_idle = dma_runtime_idle,
-	.suspend = dma_suspend,
-	.resume = dma_resume,
-};
-
-static struct pci_driver intel_mid_dma_pci_driver = {
-	.name		=	"Intel MID DMA",
-	.id_table	=	intel_mid_dma_ids,
-	.probe		=	intel_mid_dma_probe,
-	.remove		=	intel_mid_dma_remove,
-#ifdef CONFIG_PM
-	.driver = {
-		.pm = &intel_mid_dma_pm,
-	},
-#endif
-};
-
-static int __init intel_mid_dma_init(void)
-{
-	pr_debug("INFO_MDMA: LNW DMA Driver Version %s\n",
-			INTEL_MID_DMA_DRIVER_VERSION);
-	return pci_register_driver(&intel_mid_dma_pci_driver);
-}
-fs_initcall(intel_mid_dma_init);
-
-static void __exit intel_mid_dma_exit(void)
-{
-	pci_unregister_driver(&intel_mid_dma_pci_driver);
-}
-module_exit(intel_mid_dma_exit);
-
-MODULE_AUTHOR("Vinod Koul <vinod.koul@intel.com>");
-MODULE_DESCRIPTION("Intel (R) MID DMAC Driver");
-MODULE_LICENSE("GPL v2");
-MODULE_VERSION(INTEL_MID_DMA_DRIVER_VERSION);
diff --git a/drivers/dma/intel_mid_dma_regs.h b/drivers/dma/intel_mid_dma_regs.h
deleted file mode 100644
index 17b4219..0000000
--- a/drivers/dma/intel_mid_dma_regs.h
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- *  intel_mid_dma_regs.h - Intel MID DMA Drivers
- *
- *  Copyright (C) 2008-10 Intel Corp
- *  Author: Vinod Koul <vinod.koul@intel.com>
- *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; version 2 of the License.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *
- */
-#ifndef __INTEL_MID_DMAC_REGS_H__
-#define __INTEL_MID_DMAC_REGS_H__
-
-#include <linux/dmaengine.h>
-#include <linux/dmapool.h>
-#include <linux/pci_ids.h>
-
-#define INTEL_MID_DMA_DRIVER_VERSION "1.1.0"
-
-#define	REG_BIT0		0x00000001
-#define	REG_BIT8		0x00000100
-#define INT_MASK_WE		0x8
-#define CLEAR_DONE		0xFFFFEFFF
-#define UNMASK_INTR_REG(chan_num) \
-	((REG_BIT0 << chan_num) | (REG_BIT8 << chan_num))
-#define MASK_INTR_REG(chan_num) (REG_BIT8 << chan_num)
-
-#define ENABLE_CHANNEL(chan_num) \
-	((REG_BIT0 << chan_num) | (REG_BIT8 << chan_num))
-
-#define DISABLE_CHANNEL(chan_num) \
-	(REG_BIT8 << chan_num)
-
-#define DESCS_PER_CHANNEL	16
-/*DMA Registers*/
-/*registers associated with channel programming*/
-#define DMA_REG_SIZE		0x400
-#define DMA_CH_SIZE		0x58
-
-/*CH X REG = (DMA_CH_SIZE)*CH_NO + REG*/
-#define SAR			0x00 /* Source Address Register*/
-#define DAR			0x08 /* Destination Address Register*/
-#define LLP			0x10 /* Linked List Pointer Register*/
-#define CTL_LOW			0x18 /* Control Register*/
-#define CTL_HIGH		0x1C /* Control Register*/
-#define CFG_LOW			0x40 /* Configuration Register Low*/
-#define CFG_HIGH		0x44 /* Configuration Register high*/
-
-#define STATUS_TFR		0x2E8
-#define STATUS_BLOCK		0x2F0
-#define STATUS_ERR		0x308
-
-#define RAW_TFR			0x2C0
-#define RAW_BLOCK		0x2C8
-#define RAW_ERR			0x2E0
-
-#define MASK_TFR		0x310
-#define MASK_BLOCK		0x318
-#define MASK_SRC_TRAN		0x320
-#define MASK_DST_TRAN		0x328
-#define MASK_ERR		0x330
-
-#define CLEAR_TFR		0x338
-#define CLEAR_BLOCK		0x340
-#define CLEAR_SRC_TRAN		0x348
-#define CLEAR_DST_TRAN		0x350
-#define CLEAR_ERR		0x358
-
-#define INTR_STATUS		0x360
-#define DMA_CFG			0x398
-#define DMA_CHAN_EN		0x3A0
-
-/*DMA channel control registers*/
-union intel_mid_dma_ctl_lo {
-	struct {
-		u32	int_en:1;	/*enable or disable interrupts*/
-					/*should be 0*/
-		u32	dst_tr_width:3;	/*destination transfer width*/
-					/*usually 32 bits = 010*/
-		u32	src_tr_width:3; /*source transfer width*/
-					/*usually 32 bits = 010*/
-		u32	dinc:2;		/*destination address inc/dec*/
-					/*For mem:INC=00, Periphral NoINC=11*/
-		u32	sinc:2;		/*source address inc or dec, as above*/
-		u32	dst_msize:3;	/*destination burst transaction length*/
-					/*always = 16 ie 011*/
-		u32	src_msize:3;	/*source burst transaction length*/
-					/*always = 16 ie 011*/
-		u32	reser1:3;
-		u32	tt_fc:3;	/*transfer type and flow controller*/
-					/*M-M = 000
-					  P-M = 010
-					  M-P = 001*/
-		u32	dms:2;		/*destination master select = 0*/
-		u32	sms:2;		/*source master select = 0*/
-		u32	llp_dst_en:1;	/*enable/disable destination LLP = 0*/
-		u32	llp_src_en:1;	/*enable/disable source LLP = 0*/
-		u32	reser2:3;
-	} ctlx;
-	u32	ctl_lo;
-};
-
-union intel_mid_dma_ctl_hi {
-	struct {
-		u32	block_ts:12;	/*block transfer size*/
-		u32	done:1;		/*Done - updated by DMAC*/
-		u32	reser:19;	/*configured by DMAC*/
-	} ctlx;
-	u32	ctl_hi;
-
-};
-
-/*DMA channel configuration registers*/
-union intel_mid_dma_cfg_lo {
-	struct {
-		u32	reser1:5;
-		u32	ch_prior:3;	/*channel priority = 0*/
-		u32	ch_susp:1;	/*channel suspend = 0*/
-		u32	fifo_empty:1;	/*FIFO empty or not R bit = 0*/
-		u32	hs_sel_dst:1;	/*select HW/SW destn handshaking*/
-					/*HW = 0, SW = 1*/
-		u32	hs_sel_src:1;	/*select HW/SW src handshaking*/
-		u32	reser2:6;
-		u32	dst_hs_pol:1;	/*dest HS interface polarity*/
-		u32	src_hs_pol:1;	/*src HS interface polarity*/
-		u32	max_abrst:10;	/*max AMBA burst len = 0 (no sw limit*/
-		u32	reload_src:1;	/*auto reload src addr =1 if src is P*/
-		u32	reload_dst:1;	/*AR destn addr =1 if dstn is P*/
-	} cfgx;
-	u32	cfg_lo;
-};
-
-union intel_mid_dma_cfg_hi {
-	struct {
-		u32	fcmode:1;	/*flow control mode = 1*/
-		u32	fifo_mode:1;	/*FIFO mode select = 1*/
-		u32	protctl:3;	/*protection control = 0*/
-		u32	rsvd:2;
-		u32	src_per:4;	/*src hw HS interface*/
-		u32	dst_per:4;	/*dstn hw HS interface*/
-		u32	reser2:17;
-	} cfgx;
-	u32	cfg_hi;
-};
-
-
-/**
- * struct intel_mid_dma_chan - internal mid representation of a DMA channel
- * @chan: dma_chan strcture represetation for mid chan
- * @ch_regs: MMIO register space pointer to channel register
- * @dma_base: MMIO register space DMA engine base pointer
- * @ch_id: DMA channel id
- * @lock: channel spinlock
- * @active_list: current active descriptors
- * @queue: current queued up descriptors
- * @free_list: current free descriptors
- * @slave: dma slave structure
- * @descs_allocated: total number of descriptors allocated
- * @dma: dma device structure pointer
- * @busy: bool representing if ch is busy (active txn) or not
- * @in_use: bool representing if ch is in use or not
- * @raw_tfr: raw trf interrupt received
- * @raw_block: raw block interrupt received
- */
-struct intel_mid_dma_chan {
-	struct dma_chan		chan;
-	void __iomem		*ch_regs;
-	void __iomem		*dma_base;
-	int			ch_id;
-	spinlock_t		lock;
-	struct list_head	active_list;
-	struct list_head	queue;
-	struct list_head	free_list;
-	unsigned int		descs_allocated;
-	struct middma_device	*dma;
-	bool			busy;
-	bool			in_use;
-	u32			raw_tfr;
-	u32			raw_block;
-	struct intel_mid_dma_slave *mid_slave;
-};
-
-static inline struct intel_mid_dma_chan *to_intel_mid_dma_chan(
-						struct dma_chan *chan)
-{
-	return container_of(chan, struct intel_mid_dma_chan, chan);
-}
-
-enum intel_mid_dma_state {
-	RUNNING = 0,
-	SUSPENDED,
-};
-/**
- * struct middma_device - internal representation of a DMA device
- * @pdev: PCI device
- * @dma_base: MMIO register space pointer of DMA
- * @dma_pool: for allocating DMA descriptors
- * @common: embedded struct dma_device
- * @tasklet: dma tasklet for processing interrupts
- * @ch: per channel data
- * @pci_id: DMA device PCI ID
- * @intr_mask: Interrupt mask to be used
- * @mask_reg: MMIO register for periphral mask
- * @chan_base: Base ch index (read from driver data)
- * @max_chan: max number of chs supported (from drv_data)
- * @block_size: Block size of DMA transfer supported (from drv_data)
- * @pimr_mask: MMIO register addr for periphral interrupt (from drv_data)
- * @state: dma PM device state
- */
-struct middma_device {
-	struct pci_dev		*pdev;
-	void __iomem		*dma_base;
-	struct pci_pool		*dma_pool;
-	struct dma_device	common;
-	struct tasklet_struct   tasklet;
-	struct intel_mid_dma_chan ch[MAX_CHAN];
-	unsigned int		pci_id;
-	unsigned int		intr_mask;
-	void __iomem		*mask_reg;
-	int			chan_base;
-	int			max_chan;
-	int			block_size;
-	unsigned int		pimr_mask;
-	enum intel_mid_dma_state state;
-};
-
-static inline struct middma_device *to_middma_device(struct dma_device *common)
-{
-	return container_of(common, struct middma_device, common);
-}
-
-struct intel_mid_dma_desc {
-	void __iomem			*block; /*ch ptr*/
-	struct list_head		desc_node;
-	struct dma_async_tx_descriptor	txd;
-	size_t				len;
-	dma_addr_t			sar;
-	dma_addr_t			dar;
-	u32				cfg_hi;
-	u32				cfg_lo;
-	u32				ctl_lo;
-	u32				ctl_hi;
-	struct pci_pool			*lli_pool;
-	struct intel_mid_dma_lli	*lli;
-	dma_addr_t			lli_phys;
-	unsigned int			lli_length;
-	unsigned int			current_lli;
-	dma_addr_t			next;
-	enum dma_transfer_direction		dirn;
-	enum dma_status			status;
-	enum dma_slave_buswidth		width; /*width of DMA txn*/
-	enum intel_mid_dma_mode		cfg_mode; /*mode configuration*/
-
-};
-
-struct intel_mid_dma_lli {
-	dma_addr_t			sar;
-	dma_addr_t			dar;
-	dma_addr_t			llp;
-	u32				ctl_lo;
-	u32				ctl_hi;
-} __attribute__ ((packed));
-
-static inline int test_ch_en(void __iomem *dma, u32 ch_no)
-{
-	u32 en_reg = ioread32(dma + DMA_CHAN_EN);
-	return (en_reg >> ch_no) & 0x1;
-}
-
-static inline struct intel_mid_dma_desc *to_intel_mid_dma_desc
-		(struct dma_async_tx_descriptor *txd)
-{
-	return container_of(txd, struct intel_mid_dma_desc, txd);
-}
-
-static inline struct intel_mid_dma_slave *to_intel_mid_dma_slave
-		(struct dma_slave_config *slave)
-{
-	return container_of(slave, struct intel_mid_dma_slave, dma_slave);
-}
-
-
-int dma_resume(struct device *dev);
-
-#endif /*__INTEL_MID_DMAC_REGS_H__*/
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index ab8dfbe..198f96b 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -159,10 +159,9 @@
 
 config SPI_CADENCE
 	tristate "Cadence SPI controller"
-	depends on ARM
 	help
 	  This selects the Cadence SPI controller master driver
-	  used by Xilinx Zynq.
+	  used by Xilinx Zynq and ZynqMP.
 
 config SPI_CLPS711X
 	tristate "CLPS711X host SPI controller"
@@ -632,7 +631,7 @@
 
 config SPI_DW_MID_DMA
 	bool "DMA support for DW SPI controller on Intel MID platform"
-	depends on SPI_DW_PCI && INTEL_MID_DMAC
+	depends on SPI_DW_PCI && DW_DMAC_PCI
 
 config SPI_DW_MMIO
 	tristate "Memory-mapped io interface driver for DW SPI core"
diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c
index 06de340..a2f40b1 100644
--- a/drivers/spi/spi-atmel.c
+++ b/drivers/spi/spi-atmel.c
@@ -180,11 +180,17 @@
 	  | SPI_BF(name, value))
 
 /* Register access macros */
+#ifdef CONFIG_AVR32
 #define spi_readl(port, reg) \
 	__raw_readl((port)->regs + SPI_##reg)
 #define spi_writel(port, reg, value) \
 	__raw_writel((value), (port)->regs + SPI_##reg)
-
+#else
+#define spi_readl(port, reg) \
+	readl_relaxed((port)->regs + SPI_##reg)
+#define spi_writel(port, reg, value) \
+	writel_relaxed((value), (port)->regs + SPI_##reg)
+#endif
 /* use PIO for small transfers, avoiding DMA setup/teardown overhead and
  * cache operations; better heuristics consider wordsize and bitrate.
  */
diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c
index 419a782..f63864a 100644
--- a/drivers/spi/spi-bcm2835.c
+++ b/drivers/spi/spi-bcm2835.c
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 2012 Chris Boot
  * Copyright (C) 2013 Stephen Warren
+ * Copyright (C) 2015 Martin Sperl
  *
  * This driver is inspired by:
  * spi-ath79.c, Copyright (C) 2009-2011 Gabor Juhos <juhosg@openwrt.org>
@@ -29,6 +30,7 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
+#include <linux/of_gpio.h>
 #include <linux/of_device.h>
 #include <linux/spi/spi.h>
 
@@ -66,8 +68,10 @@
 #define BCM2835_SPI_CS_CS_10		0x00000002
 #define BCM2835_SPI_CS_CS_01		0x00000001
 
-#define BCM2835_SPI_TIMEOUT_MS	30000
-#define BCM2835_SPI_MODE_BITS	(SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_NO_CS)
+#define BCM2835_SPI_POLLING_LIMIT_US	30
+#define BCM2835_SPI_TIMEOUT_MS		30000
+#define BCM2835_SPI_MODE_BITS	(SPI_CPOL | SPI_CPHA | SPI_CS_HIGH \
+				| SPI_NO_CS | SPI_3WIRE)
 
 #define DRV_NAME	"spi-bcm2835"
 
@@ -75,10 +79,10 @@
 	void __iomem *regs;
 	struct clk *clk;
 	int irq;
-	struct completion done;
 	const u8 *tx_buf;
 	u8 *rx_buf;
-	int len;
+	int tx_len;
+	int rx_len;
 };
 
 static inline u32 bcm2835_rd(struct bcm2835_spi *bs, unsigned reg)
@@ -91,205 +95,315 @@
 	writel(val, bs->regs + reg);
 }
 
-static inline void bcm2835_rd_fifo(struct bcm2835_spi *bs, int len)
+static inline void bcm2835_rd_fifo(struct bcm2835_spi *bs)
 {
 	u8 byte;
 
-	while (len--) {
+	while ((bs->rx_len) &&
+	       (bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_RXD)) {
 		byte = bcm2835_rd(bs, BCM2835_SPI_FIFO);
 		if (bs->rx_buf)
 			*bs->rx_buf++ = byte;
+		bs->rx_len--;
 	}
 }
 
-static inline void bcm2835_wr_fifo(struct bcm2835_spi *bs, int len)
+static inline void bcm2835_wr_fifo(struct bcm2835_spi *bs)
 {
 	u8 byte;
 
-	if (len > bs->len)
-		len = bs->len;
-
-	while (len--) {
+	while ((bs->tx_len) &&
+	       (bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_TXD)) {
 		byte = bs->tx_buf ? *bs->tx_buf++ : 0;
 		bcm2835_wr(bs, BCM2835_SPI_FIFO, byte);
-		bs->len--;
+		bs->tx_len--;
 	}
 }
 
+static void bcm2835_spi_reset_hw(struct spi_master *master)
+{
+	struct bcm2835_spi *bs = spi_master_get_devdata(master);
+	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
+
+	/* Disable SPI interrupts and transfer */
+	cs &= ~(BCM2835_SPI_CS_INTR |
+		BCM2835_SPI_CS_INTD |
+		BCM2835_SPI_CS_TA);
+	/* and reset RX/TX FIFOS */
+	cs |= BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX;
+
+	/* and reset the SPI_HW */
+	bcm2835_wr(bs, BCM2835_SPI_CS, cs);
+}
+
 static irqreturn_t bcm2835_spi_interrupt(int irq, void *dev_id)
 {
 	struct spi_master *master = dev_id;
 	struct bcm2835_spi *bs = spi_master_get_devdata(master);
-	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
 
-	/*
-	 * RXR - RX needs Reading. This means 12 (or more) bytes have been
-	 * transmitted and hence 12 (or more) bytes have been received.
-	 *
-	 * The FIFO is 16-bytes deep. We check for this interrupt to keep the
-	 * FIFO full; we have a 4-byte-time buffer for IRQ latency. We check
-	 * this before DONE (TX empty) just in case we delayed processing this
-	 * interrupt for some reason.
-	 *
-	 * We only check for this case if we have more bytes to TX; at the end
-	 * of the transfer, we ignore this pipelining optimization, and let
-	 * bcm2835_spi_finish_transfer() drain the RX FIFO.
-	 */
-	if (bs->len && (cs & BCM2835_SPI_CS_RXR)) {
-		/* Read 12 bytes of data */
-		bcm2835_rd_fifo(bs, 12);
+	/* Read as many bytes as possible from FIFO */
+	bcm2835_rd_fifo(bs);
+	/* Write as many bytes as possible to FIFO */
+	bcm2835_wr_fifo(bs);
 
-		/* Write up to 12 bytes */
-		bcm2835_wr_fifo(bs, 12);
-
-		/*
-		 * We must have written something to the TX FIFO due to the
-		 * bs->len check above, so cannot be DONE. Hence, return
-		 * early. Note that DONE could also be set if we serviced an
-		 * RXR interrupt really late.
-		 */
-		return IRQ_HANDLED;
+	/* based on flags decide if we can finish the transfer */
+	if (bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_DONE) {
+		/* Transfer complete - reset SPI HW */
+		bcm2835_spi_reset_hw(master);
+		/* wake up the framework */
+		complete(&master->xfer_completion);
 	}
 
-	/*
-	 * DONE - TX empty. This occurs when we first enable the transfer
-	 * since we do not pre-fill the TX FIFO. At any other time, given that
-	 * we refill the TX FIFO above based on RXR, and hence ignore DONE if
-	 * RXR is set, DONE really does mean end-of-transfer.
-	 */
-	if (cs & BCM2835_SPI_CS_DONE) {
-		if (bs->len) { /* First interrupt in a transfer */
-			bcm2835_wr_fifo(bs, 16);
-		} else { /* Transfer complete */
-			/* Disable SPI interrupts */
-			cs &= ~(BCM2835_SPI_CS_INTR | BCM2835_SPI_CS_INTD);
-			bcm2835_wr(bs, BCM2835_SPI_CS, cs);
-
-			/*
-			 * Wake up bcm2835_spi_transfer_one(), which will call
-			 * bcm2835_spi_finish_transfer(), to drain the RX FIFO.
-			 */
-			complete(&bs->done);
-		}
-
-		return IRQ_HANDLED;
-	}
-
-	return IRQ_NONE;
+	return IRQ_HANDLED;
 }
 
-static int bcm2835_spi_start_transfer(struct spi_device *spi,
-		struct spi_transfer *tfr)
+static int bcm2835_spi_transfer_one_poll(struct spi_master *master,
+					 struct spi_device *spi,
+					 struct spi_transfer *tfr,
+					 u32 cs,
+					 unsigned long xfer_time_us)
 {
-	struct bcm2835_spi *bs = spi_master_get_devdata(spi->master);
-	unsigned long spi_hz, clk_hz, cdiv;
-	u32 cs = BCM2835_SPI_CS_INTR | BCM2835_SPI_CS_INTD | BCM2835_SPI_CS_TA;
+	struct bcm2835_spi *bs = spi_master_get_devdata(master);
+	unsigned long timeout = jiffies +
+		max(4 * xfer_time_us * HZ / 1000000, 2uL);
 
-	spi_hz = tfr->speed_hz;
-	clk_hz = clk_get_rate(bs->clk);
+	/* enable HW block without interrupts */
+	bcm2835_wr(bs, BCM2835_SPI_CS, cs | BCM2835_SPI_CS_TA);
 
-	if (spi_hz >= clk_hz / 2) {
-		cdiv = 2; /* clk_hz/2 is the fastest we can go */
-	} else if (spi_hz) {
-		/* CDIV must be a power of two */
-		cdiv = roundup_pow_of_two(DIV_ROUND_UP(clk_hz, spi_hz));
-
-		if (cdiv >= 65536)
-			cdiv = 0; /* 0 is the slowest we can go */
-	} else
-		cdiv = 0; /* 0 is the slowest we can go */
-
-	if (spi->mode & SPI_CPOL)
-		cs |= BCM2835_SPI_CS_CPOL;
-	if (spi->mode & SPI_CPHA)
-		cs |= BCM2835_SPI_CS_CPHA;
-
-	if (!(spi->mode & SPI_NO_CS)) {
-		if (spi->mode & SPI_CS_HIGH) {
-			cs |= BCM2835_SPI_CS_CSPOL;
-			cs |= BCM2835_SPI_CS_CSPOL0 << spi->chip_select;
+	/* set timeout to 4x the expected time, or 2 jiffies */
+	/* loop until finished the transfer */
+	while (bs->rx_len) {
+		/* read from fifo as much as possible */
+		bcm2835_rd_fifo(bs);
+		/* fill in tx fifo as much as possible */
+		bcm2835_wr_fifo(bs);
+		/* if we still expect some data after the read,
+		 * check for a possible timeout
+		 */
+		if (bs->rx_len && time_after(jiffies, timeout)) {
+			/* Transfer complete - reset SPI HW */
+			bcm2835_spi_reset_hw(master);
+			/* and return timeout */
+			return -ETIMEDOUT;
 		}
-
-		cs |= spi->chip_select;
 	}
 
-	reinit_completion(&bs->done);
-	bs->tx_buf = tfr->tx_buf;
-	bs->rx_buf = tfr->rx_buf;
-	bs->len = tfr->len;
+	/* Transfer complete - reset SPI HW */
+	bcm2835_spi_reset_hw(master);
+	/* and return without waiting for completion */
+	return 0;
+}
 
-	bcm2835_wr(bs, BCM2835_SPI_CLK, cdiv);
+static int bcm2835_spi_transfer_one_irq(struct spi_master *master,
+					struct spi_device *spi,
+					struct spi_transfer *tfr,
+					u32 cs)
+{
+	struct bcm2835_spi *bs = spi_master_get_devdata(master);
+
+	/* fill in fifo if we have gpio-cs
+	 * note that there have been rare events where the native-CS
+	 * flapped for <1us which may change the behaviour
+	 * with gpio-cs this does not happen, so it is implemented
+	 * only for this case
+	 */
+	if (gpio_is_valid(spi->cs_gpio)) {
+		/* enable HW block, but without interrupts enabled
+		 * this would triggern an immediate interrupt
+		 */
+		bcm2835_wr(bs, BCM2835_SPI_CS,
+			   cs | BCM2835_SPI_CS_TA);
+		/* fill in tx fifo as much as possible */
+		bcm2835_wr_fifo(bs);
+	}
+
 	/*
 	 * Enable the HW block. This will immediately trigger a DONE (TX
 	 * empty) interrupt, upon which we will fill the TX FIFO with the
 	 * first TX bytes. Pre-filling the TX FIFO here to avoid the
 	 * interrupt doesn't work:-(
 	 */
+	cs |= BCM2835_SPI_CS_INTR | BCM2835_SPI_CS_INTD | BCM2835_SPI_CS_TA;
 	bcm2835_wr(bs, BCM2835_SPI_CS, cs);
 
-	return 0;
-}
-
-static int bcm2835_spi_finish_transfer(struct spi_device *spi,
-		struct spi_transfer *tfr, bool cs_change)
-{
-	struct bcm2835_spi *bs = spi_master_get_devdata(spi->master);
-	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
-
-	/* Drain RX FIFO */
-	while (cs & BCM2835_SPI_CS_RXD) {
-		bcm2835_rd_fifo(bs, 1);
-		cs = bcm2835_rd(bs, BCM2835_SPI_CS);
-	}
-
-	if (tfr->delay_usecs)
-		udelay(tfr->delay_usecs);
-
-	if (cs_change)
-		/* Clear TA flag */
-		bcm2835_wr(bs, BCM2835_SPI_CS, cs & ~BCM2835_SPI_CS_TA);
-
-	return 0;
+	/* signal that we need to wait for completion */
+	return 1;
 }
 
 static int bcm2835_spi_transfer_one(struct spi_master *master,
-		struct spi_message *mesg)
+				    struct spi_device *spi,
+				    struct spi_transfer *tfr)
 {
 	struct bcm2835_spi *bs = spi_master_get_devdata(master);
-	struct spi_transfer *tfr;
-	struct spi_device *spi = mesg->spi;
-	int err = 0;
-	unsigned int timeout;
-	bool cs_change;
+	unsigned long spi_hz, clk_hz, cdiv;
+	unsigned long spi_used_hz, xfer_time_us;
+	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
 
-	list_for_each_entry(tfr, &mesg->transfers, transfer_list) {
-		err = bcm2835_spi_start_transfer(spi, tfr);
-		if (err)
-			goto out;
+	/* set clock */
+	spi_hz = tfr->speed_hz;
+	clk_hz = clk_get_rate(bs->clk);
 
-		timeout = wait_for_completion_timeout(&bs->done,
-				msecs_to_jiffies(BCM2835_SPI_TIMEOUT_MS));
-		if (!timeout) {
-			err = -ETIMEDOUT;
-			goto out;
-		}
+	if (spi_hz >= clk_hz / 2) {
+		cdiv = 2; /* clk_hz/2 is the fastest we can go */
+	} else if (spi_hz) {
+		/* CDIV must be a multiple of two */
+		cdiv = DIV_ROUND_UP(clk_hz, spi_hz);
+		cdiv += (cdiv % 2);
 
-		cs_change = tfr->cs_change ||
-			list_is_last(&tfr->transfer_list, &mesg->transfers);
+		if (cdiv >= 65536)
+			cdiv = 0; /* 0 is the slowest we can go */
+	} else {
+		cdiv = 0; /* 0 is the slowest we can go */
+	}
+	spi_used_hz = cdiv ? (clk_hz / cdiv) : (clk_hz / 65536);
+	bcm2835_wr(bs, BCM2835_SPI_CLK, cdiv);
 
-		err = bcm2835_spi_finish_transfer(spi, tfr, cs_change);
-		if (err)
-			goto out;
+	/* handle all the modes */
+	if ((spi->mode & SPI_3WIRE) && (tfr->rx_buf))
+		cs |= BCM2835_SPI_CS_REN;
+	if (spi->mode & SPI_CPOL)
+		cs |= BCM2835_SPI_CS_CPOL;
+	if (spi->mode & SPI_CPHA)
+		cs |= BCM2835_SPI_CS_CPHA;
 
-		mesg->actual_length += (tfr->len - bs->len);
+	/* for gpio_cs set dummy CS so that no HW-CS get changed
+	 * we can not run this in bcm2835_spi_set_cs, as it does
+	 * not get called for cs_gpio cases, so we need to do it here
+	 */
+	if (gpio_is_valid(spi->cs_gpio) || (spi->mode & SPI_NO_CS))
+		cs |= BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01;
+
+	/* set transmit buffers and length */
+	bs->tx_buf = tfr->tx_buf;
+	bs->rx_buf = tfr->rx_buf;
+	bs->tx_len = tfr->len;
+	bs->rx_len = tfr->len;
+
+	/* calculate the estimated time in us the transfer runs */
+	xfer_time_us = tfr->len
+		* 9 /* clocks/byte - SPI-HW waits 1 clock after each byte */
+		* 1000000 / spi_used_hz;
+
+	/* for short requests run polling*/
+	if (xfer_time_us <= BCM2835_SPI_POLLING_LIMIT_US)
+		return bcm2835_spi_transfer_one_poll(master, spi, tfr,
+						     cs, xfer_time_us);
+
+	return bcm2835_spi_transfer_one_irq(master, spi, tfr, cs);
+}
+
+static void bcm2835_spi_handle_err(struct spi_master *master,
+				   struct spi_message *msg)
+{
+	bcm2835_spi_reset_hw(master);
+}
+
+static void bcm2835_spi_set_cs(struct spi_device *spi, bool gpio_level)
+{
+	/*
+	 * we can assume that we are "native" as per spi_set_cs
+	 *   calling us ONLY when cs_gpio is not set
+	 * we can also assume that we are CS < 3 as per bcm2835_spi_setup
+	 *   we would not get called because of error handling there.
+	 * the level passed is the electrical level not enabled/disabled
+	 *   so it has to get translated back to enable/disable
+	 *   see spi_set_cs in spi.c for the implementation
+	 */
+
+	struct spi_master *master = spi->master;
+	struct bcm2835_spi *bs = spi_master_get_devdata(master);
+	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
+	bool enable;
+
+	/* calculate the enable flag from the passed gpio_level */
+	enable = (spi->mode & SPI_CS_HIGH) ? gpio_level : !gpio_level;
+
+	/* set flags for "reverse" polarity in the registers */
+	if (spi->mode & SPI_CS_HIGH) {
+		/* set the correct CS-bits */
+		cs |= BCM2835_SPI_CS_CSPOL;
+		cs |= BCM2835_SPI_CS_CSPOL0 << spi->chip_select;
+	} else {
+		/* clean the CS-bits */
+		cs &= ~BCM2835_SPI_CS_CSPOL;
+		cs &= ~(BCM2835_SPI_CS_CSPOL0 << spi->chip_select);
 	}
 
-out:
-	/* Clear FIFOs, and disable the HW block */
-	bcm2835_wr(bs, BCM2835_SPI_CS,
-		   BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX);
-	mesg->status = err;
-	spi_finalize_current_message(master);
+	/* select the correct chip_select depending on disabled/enabled */
+	if (enable) {
+		/* set cs correctly */
+		if (spi->mode & SPI_NO_CS) {
+			/* use the "undefined" chip-select */
+			cs |= BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01;
+		} else {
+			/* set the chip select */
+			cs &= ~(BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01);
+			cs |= spi->chip_select;
+		}
+	} else {
+		/* disable CSPOL which puts HW-CS into deselected state */
+		cs &= ~BCM2835_SPI_CS_CSPOL;
+		/* use the "undefined" chip-select as precaution */
+		cs |= BCM2835_SPI_CS_CS_10 | BCM2835_SPI_CS_CS_01;
+	}
+
+	/* finally set the calculated flags in SPI_CS */
+	bcm2835_wr(bs, BCM2835_SPI_CS, cs);
+}
+
+static int chip_match_name(struct gpio_chip *chip, void *data)
+{
+	return !strcmp(chip->label, data);
+}
+
+static int bcm2835_spi_setup(struct spi_device *spi)
+{
+	int err;
+	struct gpio_chip *chip;
+	/*
+	 * sanity checking the native-chipselects
+	 */
+	if (spi->mode & SPI_NO_CS)
+		return 0;
+	if (gpio_is_valid(spi->cs_gpio))
+		return 0;
+	if (spi->chip_select > 1) {
+		/* error in the case of native CS requested with CS > 1
+		 * officially there is a CS2, but it is not documented
+		 * which GPIO is connected with that...
+		 */
+		dev_err(&spi->dev,
+			"setup: only two native chip-selects are supported\n");
+		return -EINVAL;
+	}
+	/* now translate native cs to GPIO */
+
+	/* get the gpio chip for the base */
+	chip = gpiochip_find("pinctrl-bcm2835", chip_match_name);
+	if (!chip)
+		return 0;
+
+	/* and calculate the real CS */
+	spi->cs_gpio = chip->base + 8 - spi->chip_select;
+
+	/* and set up the "mode" and level */
+	dev_info(&spi->dev, "setting up native-CS%i as GPIO %i\n",
+		 spi->chip_select, spi->cs_gpio);
+
+	/* set up GPIO as output and pull to the correct level */
+	err = gpio_direction_output(spi->cs_gpio,
+				    (spi->mode & SPI_CS_HIGH) ? 0 : 1);
+	if (err) {
+		dev_err(&spi->dev,
+			"could not set CS%i gpio %i as output: %i",
+			spi->chip_select, spi->cs_gpio, err);
+		return err;
+	}
+	/* the implementation of pinctrl-bcm2835 currently does not
+	 * set the GPIO value when using gpio_direction_output
+	 * so we are setting it here explicitly
+	 */
+	gpio_set_value(spi->cs_gpio, (spi->mode & SPI_CS_HIGH) ? 0 : 1);
 
 	return 0;
 }
@@ -312,13 +426,14 @@
 	master->mode_bits = BCM2835_SPI_MODE_BITS;
 	master->bits_per_word_mask = SPI_BPW_MASK(8);
 	master->num_chipselect = 3;
-	master->transfer_one_message = bcm2835_spi_transfer_one;
+	master->setup = bcm2835_spi_setup;
+	master->set_cs = bcm2835_spi_set_cs;
+	master->transfer_one = bcm2835_spi_transfer_one;
+	master->handle_err = bcm2835_spi_handle_err;
 	master->dev.of_node = pdev->dev.of_node;
 
 	bs = spi_master_get_devdata(master);
 
-	init_completion(&bs->done);
-
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	bs->regs = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(bs->regs)) {
@@ -343,13 +458,13 @@
 	clk_prepare_enable(bs->clk);
 
 	err = devm_request_irq(&pdev->dev, bs->irq, bcm2835_spi_interrupt, 0,
-				dev_name(&pdev->dev), master);
+			       dev_name(&pdev->dev), master);
 	if (err) {
 		dev_err(&pdev->dev, "could not request IRQ: %d\n", err);
 		goto out_clk_disable;
 	}
 
-	/* initialise the hardware */
+	/* initialise the hardware with the default polarities */
 	bcm2835_wr(bs, BCM2835_SPI_CS,
 		   BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX);
 
diff --git a/drivers/spi/spi-bcm53xx.c b/drivers/spi/spi-bcm53xx.c
index 3fb91c8..1520554 100644
--- a/drivers/spi/spi-bcm53xx.c
+++ b/drivers/spi/spi-bcm53xx.c
@@ -44,7 +44,7 @@
 	u32 tmp;
 
 	/* SPE bit has to be 0 before we read MSPI STATUS */
-	deadline = jiffies + BCM53XXSPI_SPE_TIMEOUT_MS * HZ / 1000;
+	deadline = jiffies + msecs_to_jiffies(BCM53XXSPI_SPE_TIMEOUT_MS);
 	do {
 		tmp = bcm53xxspi_read(b53spi, B53SPI_MSPI_SPCR2);
 		if (!(tmp & B53SPI_MSPI_SPCR2_SPE))
@@ -56,7 +56,7 @@
 		goto spi_timeout;
 
 	/* Check status */
-	deadline = jiffies + timeout_ms * HZ / 1000;
+	deadline = jiffies + msecs_to_jiffies(timeout_ms);
 	do {
 		tmp = bcm53xxspi_read(b53spi, B53SPI_MSPI_MSPI_STATUS);
 		if (tmp & B53SPI_MSPI_MSPI_STATUS_SPIF) {
diff --git a/drivers/spi/spi-bfin5xx.c b/drivers/spi/spi-bfin5xx.c
index 3707993..a3d65b4 100644
--- a/drivers/spi/spi-bfin5xx.c
+++ b/drivers/spi/spi-bfin5xx.c
@@ -559,7 +559,7 @@
 	struct spi_transfer *previous = NULL;
 	struct bfin_spi_slave_data *chip = NULL;
 	unsigned int bits_per_word;
-	u16 cr, cr_width, dma_width, dma_config;
+	u16 cr, cr_width = 0, dma_width, dma_config;
 	u32 tranf_success = 1;
 	u8 full_duplex = 0;
 
@@ -648,7 +648,6 @@
 	} else if (bits_per_word == 8) {
 		drv_data->n_bytes = bits_per_word/8;
 		drv_data->len = transfer->len;
-		cr_width = 0;
 		drv_data->ops = &bfin_bfin_spi_transfer_ops_u8;
 	}
 	cr = bfin_read(&drv_data->regs->ctl) & ~(BIT_CTL_TIMOD | BIT_CTL_WORDSIZE);
diff --git a/drivers/spi/spi-bitbang-txrx.h b/drivers/spi/spi-bitbang-txrx.h
index c616e41..06b34e5 100644
--- a/drivers/spi/spi-bitbang-txrx.h
+++ b/drivers/spi/spi-bitbang-txrx.h
@@ -49,12 +49,17 @@
 {
 	/* if (cpol == 0) this is SPI_MODE_0; else this is SPI_MODE_2 */
 
+	bool oldbit = !(word & 1);
 	/* clock starts at inactive polarity */
 	for (word <<= (32 - bits); likely(bits); bits--) {
 
 		/* setup MSB (to slave) on trailing edge */
-		if ((flags & SPI_MASTER_NO_TX) == 0)
-			setmosi(spi, word & (1 << 31));
+		if ((flags & SPI_MASTER_NO_TX) == 0) {
+			if ((word & (1 << 31)) != oldbit) {
+				setmosi(spi, word & (1 << 31));
+				oldbit = word & (1 << 31);
+			}
+		}
 		spidelay(nsecs);	/* T(setup) */
 
 		setsck(spi, !cpol);
@@ -76,13 +81,18 @@
 {
 	/* if (cpol == 0) this is SPI_MODE_1; else this is SPI_MODE_3 */
 
+	bool oldbit = !(word & (1 << 31));
 	/* clock starts at inactive polarity */
 	for (word <<= (32 - bits); likely(bits); bits--) {
 
 		/* setup MSB (to slave) on leading edge */
 		setsck(spi, !cpol);
-		if ((flags & SPI_MASTER_NO_TX) == 0)
-			setmosi(spi, word & (1 << 31));
+		if ((flags & SPI_MASTER_NO_TX) == 0) {
+			if ((word & (1 << 31)) != oldbit) {
+				setmosi(spi, word & (1 << 31));
+				oldbit = word & (1 << 31);
+			}
+		}
 		spidelay(nsecs); /* T(setup) */
 
 		setsck(spi, cpol);
diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c
index 4f8c798..bb1052e 100644
--- a/drivers/spi/spi-dw-mid.c
+++ b/drivers/spi/spi-dw-mid.c
@@ -23,29 +23,31 @@
 #include "spi-dw.h"
 
 #ifdef CONFIG_SPI_DW_MID_DMA
-#include <linux/intel_mid_dma.h>
 #include <linux/pci.h>
+#include <linux/platform_data/dma-dw.h>
 
 #define RX_BUSY		0
 #define TX_BUSY		1
 
-struct mid_dma {
-	struct intel_mid_dma_slave	dmas_tx;
-	struct intel_mid_dma_slave	dmas_rx;
-};
+static struct dw_dma_slave mid_dma_tx = { .dst_id = 1 };
+static struct dw_dma_slave mid_dma_rx = { .src_id = 0 };
 
 static bool mid_spi_dma_chan_filter(struct dma_chan *chan, void *param)
 {
-	struct dw_spi *dws = param;
+	struct dw_dma_slave *s = param;
 
-	return dws->dma_dev == chan->device->dev;
+	if (s->dma_dev != chan->device->dev)
+		return false;
+
+	chan->private = s;
+	return true;
 }
 
 static int mid_spi_dma_init(struct dw_spi *dws)
 {
-	struct mid_dma *dw_dma = dws->dma_priv;
 	struct pci_dev *dma_dev;
-	struct intel_mid_dma_slave *rxs, *txs;
+	struct dw_dma_slave *tx = dws->dma_tx;
+	struct dw_dma_slave *rx = dws->dma_rx;
 	dma_cap_mask_t mask;
 
 	/*
@@ -56,28 +58,22 @@
 	if (!dma_dev)
 		return -ENODEV;
 
-	dws->dma_dev = &dma_dev->dev;
-
 	dma_cap_zero(mask);
 	dma_cap_set(DMA_SLAVE, mask);
 
 	/* 1. Init rx channel */
-	dws->rxchan = dma_request_channel(mask, mid_spi_dma_chan_filter, dws);
+	rx->dma_dev = &dma_dev->dev;
+	dws->rxchan = dma_request_channel(mask, mid_spi_dma_chan_filter, rx);
 	if (!dws->rxchan)
 		goto err_exit;
-	rxs = &dw_dma->dmas_rx;
-	rxs->hs_mode = LNW_DMA_HW_HS;
-	rxs->cfg_mode = LNW_DMA_PER_TO_MEM;
-	dws->rxchan->private = rxs;
+	dws->master->dma_rx = dws->rxchan;
 
 	/* 2. Init tx channel */
-	dws->txchan = dma_request_channel(mask, mid_spi_dma_chan_filter, dws);
+	tx->dma_dev = &dma_dev->dev;
+	dws->txchan = dma_request_channel(mask, mid_spi_dma_chan_filter, tx);
 	if (!dws->txchan)
 		goto free_rxchan;
-	txs = &dw_dma->dmas_tx;
-	txs->hs_mode = LNW_DMA_HW_HS;
-	txs->cfg_mode = LNW_DMA_MEM_TO_PER;
-	dws->txchan->private = txs;
+	dws->master->dma_tx = dws->txchan;
 
 	dws->dma_inited = 1;
 	return 0;
@@ -100,6 +96,42 @@
 	dma_release_channel(dws->rxchan);
 }
 
+static irqreturn_t dma_transfer(struct dw_spi *dws)
+{
+	u16 irq_status = dw_readl(dws, DW_SPI_ISR);
+
+	if (!irq_status)
+		return IRQ_NONE;
+
+	dw_readl(dws, DW_SPI_ICR);
+	spi_reset_chip(dws);
+
+	dev_err(&dws->master->dev, "%s: FIFO overrun/underrun\n", __func__);
+	dws->master->cur_msg->status = -EIO;
+	spi_finalize_current_transfer(dws->master);
+	return IRQ_HANDLED;
+}
+
+static bool mid_spi_can_dma(struct spi_master *master, struct spi_device *spi,
+		struct spi_transfer *xfer)
+{
+	struct dw_spi *dws = spi_master_get_devdata(master);
+
+	if (!dws->dma_inited)
+		return false;
+
+	return xfer->len > dws->fifo_len;
+}
+
+static enum dma_slave_buswidth convert_dma_width(u32 dma_width) {
+	if (dma_width == 1)
+		return DMA_SLAVE_BUSWIDTH_1_BYTE;
+	else if (dma_width == 2)
+		return DMA_SLAVE_BUSWIDTH_2_BYTES;
+
+	return DMA_SLAVE_BUSWIDTH_UNDEFINED;
+}
+
 /*
  * dws->dma_chan_busy is set before the dma transfer starts, callback for tx
  * channel will clear a corresponding bit.
@@ -111,33 +143,30 @@
 	clear_bit(TX_BUSY, &dws->dma_chan_busy);
 	if (test_bit(RX_BUSY, &dws->dma_chan_busy))
 		return;
-	dw_spi_xfer_done(dws);
+	spi_finalize_current_transfer(dws->master);
 }
 
-static struct dma_async_tx_descriptor *dw_spi_dma_prepare_tx(struct dw_spi *dws)
+static struct dma_async_tx_descriptor *dw_spi_dma_prepare_tx(struct dw_spi *dws,
+		struct spi_transfer *xfer)
 {
 	struct dma_slave_config txconf;
 	struct dma_async_tx_descriptor *txdesc;
 
-	if (!dws->tx_dma)
+	if (!xfer->tx_buf)
 		return NULL;
 
 	txconf.direction = DMA_MEM_TO_DEV;
 	txconf.dst_addr = dws->dma_addr;
-	txconf.dst_maxburst = LNW_DMA_MSIZE_16;
+	txconf.dst_maxburst = 16;
 	txconf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-	txconf.dst_addr_width = dws->dma_width;
+	txconf.dst_addr_width = convert_dma_width(dws->dma_width);
 	txconf.device_fc = false;
 
 	dmaengine_slave_config(dws->txchan, &txconf);
 
-	memset(&dws->tx_sgl, 0, sizeof(dws->tx_sgl));
-	dws->tx_sgl.dma_address = dws->tx_dma;
-	dws->tx_sgl.length = dws->len;
-
 	txdesc = dmaengine_prep_slave_sg(dws->txchan,
-				&dws->tx_sgl,
-				1,
+				xfer->tx_sg.sgl,
+				xfer->tx_sg.nents,
 				DMA_MEM_TO_DEV,
 				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!txdesc)
@@ -160,33 +189,30 @@
 	clear_bit(RX_BUSY, &dws->dma_chan_busy);
 	if (test_bit(TX_BUSY, &dws->dma_chan_busy))
 		return;
-	dw_spi_xfer_done(dws);
+	spi_finalize_current_transfer(dws->master);
 }
 
-static struct dma_async_tx_descriptor *dw_spi_dma_prepare_rx(struct dw_spi *dws)
+static struct dma_async_tx_descriptor *dw_spi_dma_prepare_rx(struct dw_spi *dws,
+		struct spi_transfer *xfer)
 {
 	struct dma_slave_config rxconf;
 	struct dma_async_tx_descriptor *rxdesc;
 
-	if (!dws->rx_dma)
+	if (!xfer->rx_buf)
 		return NULL;
 
 	rxconf.direction = DMA_DEV_TO_MEM;
 	rxconf.src_addr = dws->dma_addr;
-	rxconf.src_maxburst = LNW_DMA_MSIZE_16;
+	rxconf.src_maxburst = 16;
 	rxconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-	rxconf.src_addr_width = dws->dma_width;
+	rxconf.src_addr_width = convert_dma_width(dws->dma_width);
 	rxconf.device_fc = false;
 
 	dmaengine_slave_config(dws->rxchan, &rxconf);
 
-	memset(&dws->rx_sgl, 0, sizeof(dws->rx_sgl));
-	dws->rx_sgl.dma_address = dws->rx_dma;
-	dws->rx_sgl.length = dws->len;
-
 	rxdesc = dmaengine_prep_slave_sg(dws->rxchan,
-				&dws->rx_sgl,
-				1,
+				xfer->rx_sg.sgl,
+				xfer->rx_sg.nents,
 				DMA_DEV_TO_MEM,
 				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!rxdesc)
@@ -198,37 +224,36 @@
 	return rxdesc;
 }
 
-static void dw_spi_dma_setup(struct dw_spi *dws)
+static int mid_spi_dma_setup(struct dw_spi *dws, struct spi_transfer *xfer)
 {
 	u16 dma_ctrl = 0;
 
-	spi_enable_chip(dws, 0);
+	dw_writel(dws, DW_SPI_DMARDLR, 0xf);
+	dw_writel(dws, DW_SPI_DMATDLR, 0x10);
 
-	dw_writew(dws, DW_SPI_DMARDLR, 0xf);
-	dw_writew(dws, DW_SPI_DMATDLR, 0x10);
-
-	if (dws->tx_dma)
+	if (xfer->tx_buf)
 		dma_ctrl |= SPI_DMA_TDMAE;
-	if (dws->rx_dma)
+	if (xfer->rx_buf)
 		dma_ctrl |= SPI_DMA_RDMAE;
-	dw_writew(dws, DW_SPI_DMACR, dma_ctrl);
+	dw_writel(dws, DW_SPI_DMACR, dma_ctrl);
 
-	spi_enable_chip(dws, 1);
+	/* Set the interrupt mask */
+	spi_umask_intr(dws, SPI_INT_TXOI | SPI_INT_RXUI | SPI_INT_RXOI);
+
+	dws->transfer_handler = dma_transfer;
+
+	return 0;
 }
 
-static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change)
+static int mid_spi_dma_transfer(struct dw_spi *dws, struct spi_transfer *xfer)
 {
 	struct dma_async_tx_descriptor *txdesc, *rxdesc;
 
-	/* 1. setup DMA related registers */
-	if (cs_change)
-		dw_spi_dma_setup(dws);
+	/* Prepare the TX dma transfer */
+	txdesc = dw_spi_dma_prepare_tx(dws, xfer);
 
-	/* 2. Prepare the TX dma transfer */
-	txdesc = dw_spi_dma_prepare_tx(dws);
-
-	/* 3. Prepare the RX dma transfer */
-	rxdesc = dw_spi_dma_prepare_rx(dws);
+	/* Prepare the RX dma transfer */
+	rxdesc = dw_spi_dma_prepare_rx(dws, xfer);
 
 	/* rx must be started before tx due to spi instinct */
 	if (rxdesc) {
@@ -246,10 +271,25 @@
 	return 0;
 }
 
+static void mid_spi_dma_stop(struct dw_spi *dws)
+{
+	if (test_bit(TX_BUSY, &dws->dma_chan_busy)) {
+		dmaengine_terminate_all(dws->txchan);
+		clear_bit(TX_BUSY, &dws->dma_chan_busy);
+	}
+	if (test_bit(RX_BUSY, &dws->dma_chan_busy)) {
+		dmaengine_terminate_all(dws->rxchan);
+		clear_bit(RX_BUSY, &dws->dma_chan_busy);
+	}
+}
+
 static struct dw_spi_dma_ops mid_dma_ops = {
 	.dma_init	= mid_spi_dma_init,
 	.dma_exit	= mid_spi_dma_exit,
+	.dma_setup	= mid_spi_dma_setup,
+	.can_dma	= mid_spi_can_dma,
 	.dma_transfer	= mid_spi_dma_transfer,
+	.dma_stop	= mid_spi_dma_stop,
 };
 #endif
 
@@ -282,9 +322,8 @@
 	iounmap(clk_reg);
 
 #ifdef CONFIG_SPI_DW_MID_DMA
-	dws->dma_priv = kzalloc(sizeof(struct mid_dma), GFP_KERNEL);
-	if (!dws->dma_priv)
-		return -ENOMEM;
+	dws->dma_tx = &mid_dma_tx;
+	dws->dma_rx = &mid_dma_rx;
 	dws->dma_ops = &mid_dma_ops;
 #endif
 	return 0;
diff --git a/drivers/spi/spi-dw.c b/drivers/spi/spi-dw.c
index 4847afb..8d67d03 100644
--- a/drivers/spi/spi-dw.c
+++ b/drivers/spi/spi-dw.c
@@ -28,11 +28,6 @@
 #include <linux/debugfs.h>
 #endif
 
-#define START_STATE	((void *)0)
-#define RUNNING_STATE	((void *)1)
-#define DONE_STATE	((void *)2)
-#define ERROR_STATE	((void *)-1)
-
 /* Slave spi_dev related */
 struct chip_data {
 	u16 cr0;
@@ -143,13 +138,26 @@
 }
 #endif /* CONFIG_DEBUG_FS */
 
+static void dw_spi_set_cs(struct spi_device *spi, bool enable)
+{
+	struct dw_spi *dws = spi_master_get_devdata(spi->master);
+	struct chip_data *chip = spi_get_ctldata(spi);
+
+	/* Chip select logic is inverted from spi_set_cs() */
+	if (chip && chip->cs_control)
+		chip->cs_control(!enable);
+
+	if (!enable)
+		dw_writel(dws, DW_SPI_SER, BIT(spi->chip_select));
+}
+
 /* Return the max entries we can fill into tx fifo */
 static inline u32 tx_max(struct dw_spi *dws)
 {
 	u32 tx_left, tx_room, rxtx_gap;
 
 	tx_left = (dws->tx_end - dws->tx) / dws->n_bytes;
-	tx_room = dws->fifo_len - dw_readw(dws, DW_SPI_TXFLR);
+	tx_room = dws->fifo_len - dw_readl(dws, DW_SPI_TXFLR);
 
 	/*
 	 * Another concern is about the tx/rx mismatch, we
@@ -170,7 +178,7 @@
 {
 	u32 rx_left = (dws->rx_end - dws->rx) / dws->n_bytes;
 
-	return min_t(u32, rx_left, dw_readw(dws, DW_SPI_RXFLR));
+	return min_t(u32, rx_left, dw_readl(dws, DW_SPI_RXFLR));
 }
 
 static void dw_writer(struct dw_spi *dws)
@@ -186,7 +194,7 @@
 			else
 				txw = *(u16 *)(dws->tx);
 		}
-		dw_writew(dws, DW_SPI_DR, txw);
+		dw_writel(dws, DW_SPI_DR, txw);
 		dws->tx += dws->n_bytes;
 	}
 }
@@ -197,7 +205,7 @@
 	u16 rxw;
 
 	while (max--) {
-		rxw = dw_readw(dws, DW_SPI_DR);
+		rxw = dw_readl(dws, DW_SPI_DR);
 		/* Care rx only if the transfer's original "rx" is not null */
 		if (dws->rx_end - dws->len) {
 			if (dws->n_bytes == 1)
@@ -209,103 +217,22 @@
 	}
 }
 
-static void *next_transfer(struct dw_spi *dws)
-{
-	struct spi_message *msg = dws->cur_msg;
-	struct spi_transfer *trans = dws->cur_transfer;
-
-	/* Move to next transfer */
-	if (trans->transfer_list.next != &msg->transfers) {
-		dws->cur_transfer =
-			list_entry(trans->transfer_list.next,
-					struct spi_transfer,
-					transfer_list);
-		return RUNNING_STATE;
-	}
-
-	return DONE_STATE;
-}
-
-/*
- * Note: first step is the protocol driver prepares
- * a dma-capable memory, and this func just need translate
- * the virt addr to physical
- */
-static int map_dma_buffers(struct dw_spi *dws)
-{
-	if (!dws->cur_msg->is_dma_mapped
-		|| !dws->dma_inited
-		|| !dws->cur_chip->enable_dma
-		|| !dws->dma_ops)
-		return 0;
-
-	if (dws->cur_transfer->tx_dma)
-		dws->tx_dma = dws->cur_transfer->tx_dma;
-
-	if (dws->cur_transfer->rx_dma)
-		dws->rx_dma = dws->cur_transfer->rx_dma;
-
-	return 1;
-}
-
-/* Caller already set message->status; dma and pio irqs are blocked */
-static void giveback(struct dw_spi *dws)
-{
-	struct spi_transfer *last_transfer;
-	struct spi_message *msg;
-
-	msg = dws->cur_msg;
-	dws->cur_msg = NULL;
-	dws->cur_transfer = NULL;
-	dws->prev_chip = dws->cur_chip;
-	dws->cur_chip = NULL;
-	dws->dma_mapped = 0;
-
-	last_transfer = list_last_entry(&msg->transfers, struct spi_transfer,
-					transfer_list);
-
-	if (!last_transfer->cs_change)
-		spi_chip_sel(dws, msg->spi, 0);
-
-	spi_finalize_current_message(dws->master);
-}
-
 static void int_error_stop(struct dw_spi *dws, const char *msg)
 {
-	/* Stop the hw */
-	spi_enable_chip(dws, 0);
+	spi_reset_chip(dws);
 
 	dev_err(&dws->master->dev, "%s\n", msg);
-	dws->cur_msg->state = ERROR_STATE;
-	tasklet_schedule(&dws->pump_transfers);
+	dws->master->cur_msg->status = -EIO;
+	spi_finalize_current_transfer(dws->master);
 }
 
-void dw_spi_xfer_done(struct dw_spi *dws)
-{
-	/* Update total byte transferred return count actual bytes read */
-	dws->cur_msg->actual_length += dws->len;
-
-	/* Move to next transfer */
-	dws->cur_msg->state = next_transfer(dws);
-
-	/* Handle end of message */
-	if (dws->cur_msg->state == DONE_STATE) {
-		dws->cur_msg->status = 0;
-		giveback(dws);
-	} else
-		tasklet_schedule(&dws->pump_transfers);
-}
-EXPORT_SYMBOL_GPL(dw_spi_xfer_done);
-
 static irqreturn_t interrupt_transfer(struct dw_spi *dws)
 {
-	u16 irq_status = dw_readw(dws, DW_SPI_ISR);
+	u16 irq_status = dw_readl(dws, DW_SPI_ISR);
 
 	/* Error handling */
 	if (irq_status & (SPI_INT_TXOI | SPI_INT_RXOI | SPI_INT_RXUI)) {
-		dw_readw(dws, DW_SPI_TXOICR);
-		dw_readw(dws, DW_SPI_RXOICR);
-		dw_readw(dws, DW_SPI_RXUICR);
+		dw_readl(dws, DW_SPI_ICR);
 		int_error_stop(dws, "interrupt_transfer: fifo overrun/underrun");
 		return IRQ_HANDLED;
 	}
@@ -313,7 +240,7 @@
 	dw_reader(dws);
 	if (dws->rx_end == dws->rx) {
 		spi_mask_intr(dws, SPI_INT_TXEI);
-		dw_spi_xfer_done(dws);
+		spi_finalize_current_transfer(dws->master);
 		return IRQ_HANDLED;
 	}
 	if (irq_status & SPI_INT_TXEI) {
@@ -328,13 +255,14 @@
 
 static irqreturn_t dw_spi_irq(int irq, void *dev_id)
 {
-	struct dw_spi *dws = dev_id;
-	u16 irq_status = dw_readw(dws, DW_SPI_ISR) & 0x3f;
+	struct spi_master *master = dev_id;
+	struct dw_spi *dws = spi_master_get_devdata(master);
+	u16 irq_status = dw_readl(dws, DW_SPI_ISR) & 0x3f;
 
 	if (!irq_status)
 		return IRQ_NONE;
 
-	if (!dws->cur_msg) {
+	if (!master->cur_msg) {
 		spi_mask_intr(dws, SPI_INT_TXEI);
 		return IRQ_HANDLED;
 	}
@@ -343,7 +271,7 @@
 }
 
 /* Must be called inside pump_transfers() */
-static void poll_transfer(struct dw_spi *dws)
+static int poll_transfer(struct dw_spi *dws)
 {
 	do {
 		dw_writer(dws);
@@ -351,64 +279,32 @@
 		cpu_relax();
 	} while (dws->rx_end > dws->rx);
 
-	dw_spi_xfer_done(dws);
+	return 0;
 }
 
-static void pump_transfers(unsigned long data)
+static int dw_spi_transfer_one(struct spi_master *master,
+		struct spi_device *spi, struct spi_transfer *transfer)
 {
-	struct dw_spi *dws = (struct dw_spi *)data;
-	struct spi_message *message = NULL;
-	struct spi_transfer *transfer = NULL;
-	struct spi_transfer *previous = NULL;
-	struct spi_device *spi = NULL;
-	struct chip_data *chip = NULL;
-	u8 bits = 0;
+	struct dw_spi *dws = spi_master_get_devdata(master);
+	struct chip_data *chip = spi_get_ctldata(spi);
 	u8 imask = 0;
-	u8 cs_change = 0;
-	u16 txint_level = 0;
+	u16 txlevel = 0;
 	u16 clk_div = 0;
 	u32 speed = 0;
 	u32 cr0 = 0;
+	int ret;
 
-	/* Get current state information */
-	message = dws->cur_msg;
-	transfer = dws->cur_transfer;
-	chip = dws->cur_chip;
-	spi = message->spi;
-
-	if (message->state == ERROR_STATE) {
-		message->status = -EIO;
-		goto early_exit;
-	}
-
-	/* Handle end of message */
-	if (message->state == DONE_STATE) {
-		message->status = 0;
-		goto early_exit;
-	}
-
-	/* Delay if requested at end of transfer */
-	if (message->state == RUNNING_STATE) {
-		previous = list_entry(transfer->transfer_list.prev,
-					struct spi_transfer,
-					transfer_list);
-		if (previous->delay_usecs)
-			udelay(previous->delay_usecs);
-	}
-
+	dws->dma_mapped = 0;
 	dws->n_bytes = chip->n_bytes;
 	dws->dma_width = chip->dma_width;
-	dws->cs_control = chip->cs_control;
 
-	dws->rx_dma = transfer->rx_dma;
-	dws->tx_dma = transfer->tx_dma;
 	dws->tx = (void *)transfer->tx_buf;
 	dws->tx_end = dws->tx + transfer->len;
 	dws->rx = transfer->rx_buf;
 	dws->rx_end = dws->rx + transfer->len;
-	dws->len = dws->cur_transfer->len;
-	if (chip != dws->prev_chip)
-		cs_change = 1;
+	dws->len = transfer->len;
+
+	spi_enable_chip(dws, 0);
 
 	cr0 = chip->cr0;
 
@@ -416,32 +312,37 @@
 	if (transfer->speed_hz) {
 		speed = chip->speed_hz;
 
-		if ((transfer->speed_hz != speed) || (!chip->clk_div)) {
+		if ((transfer->speed_hz != speed) || !chip->clk_div) {
 			speed = transfer->speed_hz;
 
 			/* clk_div doesn't support odd number */
-			clk_div = dws->max_freq / speed;
-			clk_div = (clk_div + 1) & 0xfffe;
+			clk_div = (dws->max_freq / speed + 1) & 0xfffe;
 
 			chip->speed_hz = speed;
 			chip->clk_div = clk_div;
+
+			spi_set_clk(dws, chip->clk_div);
 		}
 	}
 	if (transfer->bits_per_word) {
-		bits = transfer->bits_per_word;
-		dws->n_bytes = dws->dma_width = bits >> 3;
-		cr0 = (bits - 1)
+		if (transfer->bits_per_word == 8) {
+			dws->n_bytes = 1;
+			dws->dma_width = 1;
+		} else if (transfer->bits_per_word == 16) {
+			dws->n_bytes = 2;
+			dws->dma_width = 2;
+		}
+		cr0 = (transfer->bits_per_word - 1)
 			| (chip->type << SPI_FRF_OFFSET)
 			| (spi->mode << SPI_MODE_OFFSET)
 			| (chip->tmode << SPI_TMOD_OFFSET);
 	}
-	message->state = RUNNING_STATE;
 
 	/*
 	 * Adjust transfer mode if necessary. Requires platform dependent
 	 * chipselect mechanism.
 	 */
-	if (dws->cs_control) {
+	if (chip->cs_control) {
 		if (dws->rx && dws->tx)
 			chip->tmode = SPI_TMOD_TR;
 		else if (dws->rx)
@@ -453,80 +354,60 @@
 		cr0 |= (chip->tmode << SPI_TMOD_OFFSET);
 	}
 
+	dw_writel(dws, DW_SPI_CTRL0, cr0);
+
 	/* Check if current transfer is a DMA transaction */
-	dws->dma_mapped = map_dma_buffers(dws);
+	if (master->can_dma && master->can_dma(master, spi, transfer))
+		dws->dma_mapped = master->cur_msg_mapped;
+
+	/* For poll mode just disable all interrupts */
+	spi_mask_intr(dws, 0xff);
 
 	/*
 	 * Interrupt mode
 	 * we only need set the TXEI IRQ, as TX/RX always happen syncronizely
 	 */
-	if (!dws->dma_mapped && !chip->poll_mode) {
-		int templen = dws->len / dws->n_bytes;
+	if (dws->dma_mapped) {
+		ret = dws->dma_ops->dma_setup(dws, transfer);
+		if (ret < 0) {
+			spi_enable_chip(dws, 1);
+			return ret;
+		}
+	} else if (!chip->poll_mode) {
+		txlevel = min_t(u16, dws->fifo_len / 2, dws->len / dws->n_bytes);
+		dw_writel(dws, DW_SPI_TXFLTR, txlevel);
 
-		txint_level = dws->fifo_len / 2;
-		txint_level = (templen > txint_level) ? txint_level : templen;
-
+		/* Set the interrupt mask */
 		imask |= SPI_INT_TXEI | SPI_INT_TXOI |
 			 SPI_INT_RXUI | SPI_INT_RXOI;
+		spi_umask_intr(dws, imask);
+
 		dws->transfer_handler = interrupt_transfer;
 	}
 
-	/*
-	 * Reprogram registers only if
-	 *	1. chip select changes
-	 *	2. clk_div is changed
-	 *	3. control value changes
-	 */
-	if (dw_readw(dws, DW_SPI_CTRL0) != cr0 || cs_change || clk_div || imask) {
-		spi_enable_chip(dws, 0);
+	spi_enable_chip(dws, 1);
 
-		if (dw_readw(dws, DW_SPI_CTRL0) != cr0)
-			dw_writew(dws, DW_SPI_CTRL0, cr0);
-
-		spi_set_clk(dws, clk_div ? clk_div : chip->clk_div);
-		spi_chip_sel(dws, spi, 1);
-
-		/* Set the interrupt mask, for poll mode just disable all int */
-		spi_mask_intr(dws, 0xff);
-		if (imask)
-			spi_umask_intr(dws, imask);
-		if (txint_level)
-			dw_writew(dws, DW_SPI_TXFLTR, txint_level);
-
-		spi_enable_chip(dws, 1);
-		if (cs_change)
-			dws->prev_chip = chip;
+	if (dws->dma_mapped) {
+		ret = dws->dma_ops->dma_transfer(dws, transfer);
+		if (ret < 0)
+			return ret;
 	}
 
-	if (dws->dma_mapped)
-		dws->dma_ops->dma_transfer(dws, cs_change);
-
 	if (chip->poll_mode)
-		poll_transfer(dws);
+		return poll_transfer(dws);
 
-	return;
-
-early_exit:
-	giveback(dws);
+	return 1;
 }
 
-static int dw_spi_transfer_one_message(struct spi_master *master,
+static void dw_spi_handle_err(struct spi_master *master,
 		struct spi_message *msg)
 {
 	struct dw_spi *dws = spi_master_get_devdata(master);
 
-	dws->cur_msg = msg;
-	/* Initial message state */
-	dws->cur_msg->state = START_STATE;
-	dws->cur_transfer = list_entry(dws->cur_msg->transfers.next,
-						struct spi_transfer,
-						transfer_list);
-	dws->cur_chip = spi_get_ctldata(dws->cur_msg->spi);
+	if (dws->dma_mapped)
+		dws->dma_ops->dma_stop(dws);
 
-	/* Launch transfers */
-	tasklet_schedule(&dws->pump_transfers);
-
-	return 0;
+	spi_reset_chip(dws);
 }
 
 /* This may be called twice for each spi dev */
@@ -561,8 +442,6 @@
 
 		chip->rx_threshold = 0;
 		chip->tx_threshold = 0;
-
-		chip->enable_dma = chip_info->enable_dma;
 	}
 
 	if (spi->bits_per_word == 8) {
@@ -610,9 +489,7 @@
 /* Restart the controller, disable all interrupts, clean rx fifo */
 static void spi_hw_init(struct device *dev, struct dw_spi *dws)
 {
-	spi_enable_chip(dws, 0);
-	spi_mask_intr(dws, 0xff);
-	spi_enable_chip(dws, 1);
+	spi_reset_chip(dws);
 
 	/*
 	 * Try to detect the FIFO depth if not set by interface driver,
@@ -622,11 +499,11 @@
 		u32 fifo;
 
 		for (fifo = 1; fifo < 256; fifo++) {
-			dw_writew(dws, DW_SPI_TXFLTR, fifo);
-			if (fifo != dw_readw(dws, DW_SPI_TXFLTR))
+			dw_writel(dws, DW_SPI_TXFLTR, fifo);
+			if (fifo != dw_readl(dws, DW_SPI_TXFLTR))
 				break;
 		}
-		dw_writew(dws, DW_SPI_TXFLTR, 0);
+		dw_writel(dws, DW_SPI_TXFLTR, 0);
 
 		dws->fifo_len = (fifo == 1) ? 0 : fifo;
 		dev_dbg(dev, "Detected FIFO size: %u bytes\n", dws->fifo_len);
@@ -646,13 +523,12 @@
 
 	dws->master = master;
 	dws->type = SSI_MOTO_SPI;
-	dws->prev_chip = NULL;
 	dws->dma_inited = 0;
 	dws->dma_addr = (dma_addr_t)(dws->paddr + 0x60);
 	snprintf(dws->name, sizeof(dws->name), "dw_spi%d", dws->bus_num);
 
 	ret = devm_request_irq(dev, dws->irq, dw_spi_irq, IRQF_SHARED,
-			dws->name, dws);
+			dws->name, master);
 	if (ret < 0) {
 		dev_err(&master->dev, "can not get IRQ\n");
 		goto err_free_master;
@@ -664,7 +540,9 @@
 	master->num_chipselect = dws->num_cs;
 	master->setup = dw_spi_setup;
 	master->cleanup = dw_spi_cleanup;
-	master->transfer_one_message = dw_spi_transfer_one_message;
+	master->set_cs = dw_spi_set_cs;
+	master->transfer_one = dw_spi_transfer_one;
+	master->handle_err = dw_spi_handle_err;
 	master->max_speed_hz = dws->max_freq;
 	master->dev.of_node = dev->of_node;
 
@@ -676,11 +554,11 @@
 		if (ret) {
 			dev_warn(dev, "DMA init failed\n");
 			dws->dma_inited = 0;
+		} else {
+			master->can_dma = dws->dma_ops->can_dma;
 		}
 	}
 
-	tasklet_init(&dws->pump_transfers, pump_transfers, (unsigned long)dws);
-
 	spi_master_set_devdata(master, dws);
 	ret = devm_spi_register_master(dev, master);
 	if (ret) {
diff --git a/drivers/spi/spi-dw.h b/drivers/spi/spi-dw.h
index 3d32be6..6c91391 100644
--- a/drivers/spi/spi-dw.h
+++ b/drivers/spi/spi-dw.h
@@ -91,12 +91,15 @@
 struct dw_spi_dma_ops {
 	int (*dma_init)(struct dw_spi *dws);
 	void (*dma_exit)(struct dw_spi *dws);
-	int (*dma_transfer)(struct dw_spi *dws, int cs_change);
+	int (*dma_setup)(struct dw_spi *dws, struct spi_transfer *xfer);
+	bool (*can_dma)(struct spi_master *master, struct spi_device *spi,
+			struct spi_transfer *xfer);
+	int (*dma_transfer)(struct dw_spi *dws, struct spi_transfer *xfer);
+	void (*dma_stop)(struct dw_spi *dws);
 };
 
 struct dw_spi {
 	struct spi_master	*master;
-	struct spi_device	*cur_dev;
 	enum dw_ssi_type	type;
 	char			name[16];
 
@@ -109,41 +112,26 @@
 	u16			bus_num;
 	u16			num_cs;		/* supported slave numbers */
 
-	/* Message Transfer pump */
-	struct tasklet_struct	pump_transfers;
-
 	/* Current message transfer state info */
-	struct spi_message	*cur_msg;
-	struct spi_transfer	*cur_transfer;
-	struct chip_data	*cur_chip;
-	struct chip_data	*prev_chip;
 	size_t			len;
 	void			*tx;
 	void			*tx_end;
 	void			*rx;
 	void			*rx_end;
 	int			dma_mapped;
-	dma_addr_t		rx_dma;
-	dma_addr_t		tx_dma;
-	size_t			rx_map_len;
-	size_t			tx_map_len;
 	u8			n_bytes;	/* current is a 1/2 bytes op */
-	u8			max_bits_per_word;	/* maxim is 16b */
 	u32			dma_width;
 	irqreturn_t		(*transfer_handler)(struct dw_spi *dws);
-	void			(*cs_control)(u32 command);
 
-	/* Dma info */
+	/* DMA info */
 	int			dma_inited;
 	struct dma_chan		*txchan;
-	struct scatterlist	tx_sgl;
 	struct dma_chan		*rxchan;
-	struct scatterlist	rx_sgl;
 	unsigned long		dma_chan_busy;
-	struct device		*dma_dev;
 	dma_addr_t		dma_addr; /* phy address of the Data register */
 	struct dw_spi_dma_ops	*dma_ops;
-	void			*dma_priv; /* platform relate info */
+	void			*dma_tx;
+	void			*dma_rx;
 
 	/* Bus interface info */
 	void			*priv;
@@ -162,16 +150,6 @@
 	__raw_writel(val, dws->regs + offset);
 }
 
-static inline u16 dw_readw(struct dw_spi *dws, u32 offset)
-{
-	return __raw_readw(dws->regs + offset);
-}
-
-static inline void dw_writew(struct dw_spi *dws, u32 offset, u16 val)
-{
-	__raw_writew(val, dws->regs + offset);
-}
-
 static inline void spi_enable_chip(struct dw_spi *dws, int enable)
 {
 	dw_writel(dws, DW_SPI_SSIENR, (enable ? 1 : 0));
@@ -182,22 +160,6 @@
 	dw_writel(dws, DW_SPI_BAUDR, div);
 }
 
-static inline void spi_chip_sel(struct dw_spi *dws, struct spi_device *spi,
-		int active)
-{
-	u16 cs = spi->chip_select;
-	int gpio_val = active ? (spi->mode & SPI_CS_HIGH) :
-		!(spi->mode & SPI_CS_HIGH);
-
-	if (dws->cs_control)
-		dws->cs_control(active);
-	if (gpio_is_valid(spi->cs_gpio))
-		gpio_set_value(spi->cs_gpio, gpio_val);
-
-	if (active)
-		dw_writel(dws, DW_SPI_SER, 1 << cs);
-}
-
 /* Disable IRQ bits */
 static inline void spi_mask_intr(struct dw_spi *dws, u32 mask)
 {
@@ -217,15 +179,26 @@
 }
 
 /*
+ * This does disable the SPI controller, interrupts, and re-enable the
+ * controller back. Transmit and receive FIFO buffers are cleared when the
+ * device is disabled.
+ */
+static inline void spi_reset_chip(struct dw_spi *dws)
+{
+	spi_enable_chip(dws, 0);
+	spi_mask_intr(dws, 0xff);
+	spi_enable_chip(dws, 1);
+}
+
+/*
  * Each SPI slave device to work with dw_api controller should
- * has such a structure claiming its working mode (PIO/DMA etc),
+ * has such a structure claiming its working mode (poll or PIO/DMA),
  * which can be save in the "controller_data" member of the
  * struct spi_device.
  */
 struct dw_spi_chip {
 	u8 poll_mode;	/* 1 for controller polling mode */
 	u8 type;	/* SPI/SSP/MicroWire */
-	u8 enable_dma;
 	void (*cs_control)(u32 command);
 };
 
@@ -233,7 +206,6 @@
 extern void dw_spi_remove_host(struct dw_spi *dws);
 extern int dw_spi_suspend_host(struct dw_spi *dws);
 extern int dw_spi_resume_host(struct dw_spi *dws);
-extern void dw_spi_xfer_done(struct dw_spi *dws);
 
 /* platform related setup */
 extern int dw_spi_mid_init(struct dw_spi *dws); /* Intel MID platforms */
diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index d1a3924..5fe54cd 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -20,6 +20,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/math64.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -29,6 +30,7 @@
 #include <linux/sched.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
+#include <linux/time.h>
 
 #define DRIVER_NAME "fsl-dspi"
 
@@ -51,7 +53,7 @@
 #define SPI_CTAR_CPOL(x)	((x) << 26)
 #define SPI_CTAR_CPHA(x)	((x) << 25)
 #define SPI_CTAR_LSBFE(x)	((x) << 24)
-#define SPI_CTAR_PCSSCR(x)	(((x) & 0x00000003) << 22)
+#define SPI_CTAR_PCSSCK(x)	(((x) & 0x00000003) << 22)
 #define SPI_CTAR_PASC(x)	(((x) & 0x00000003) << 20)
 #define SPI_CTAR_PDT(x)	(((x) & 0x00000003) << 18)
 #define SPI_CTAR_PBR(x)	(((x) & 0x00000003) << 16)
@@ -59,6 +61,7 @@
 #define SPI_CTAR_ASC(x)	(((x) & 0x0000000f) << 8)
 #define SPI_CTAR_DT(x)		(((x) & 0x0000000f) << 4)
 #define SPI_CTAR_BR(x)		((x) & 0x0000000f)
+#define SPI_CTAR_SCALE_BITS	0xf
 
 #define SPI_CTAR0_SLAVE	0x0c
 
@@ -148,23 +151,66 @@
 		16,	32,	64,	128,
 		256,	512,	1024,	2048,
 		4096,	8192,	16384,	32768 };
-	int temp, i = 0, j = 0;
+	int scale_needed, scale, minscale = INT_MAX;
+	int i, j;
 
-	temp = clkrate / 2 / speed_hz;
+	scale_needed = clkrate / speed_hz;
+	if (clkrate % speed_hz)
+		scale_needed++;
 
-	for (i = 0; i < ARRAY_SIZE(pbr_tbl); i++)
-		for (j = 0; j < ARRAY_SIZE(brs); j++) {
-			if (pbr_tbl[i] * brs[j] >= temp) {
-				*pbr = i;
-				*br = j;
-				return;
+	for (i = 0; i < ARRAY_SIZE(brs); i++)
+		for (j = 0; j < ARRAY_SIZE(pbr_tbl); j++) {
+			scale = brs[i] * pbr_tbl[j];
+			if (scale >= scale_needed) {
+				if (scale < minscale) {
+					minscale = scale;
+					*br = i;
+					*pbr = j;
+				}
+				break;
 			}
 		}
 
-	pr_warn("Can not find valid baud rate,speed_hz is %d,clkrate is %ld\
-		,we use the max prescaler value.\n", speed_hz, clkrate);
-	*pbr = ARRAY_SIZE(pbr_tbl) - 1;
-	*br =  ARRAY_SIZE(brs) - 1;
+	if (minscale == INT_MAX) {
+		pr_warn("Can not find valid baud rate,speed_hz is %d,clkrate is %ld, we use the max prescaler value.\n",
+			speed_hz, clkrate);
+		*pbr = ARRAY_SIZE(pbr_tbl) - 1;
+		*br =  ARRAY_SIZE(brs) - 1;
+	}
+}
+
+static void ns_delay_scale(char *psc, char *sc, int delay_ns,
+		unsigned long clkrate)
+{
+	int pscale_tbl[4] = {1, 3, 5, 7};
+	int scale_needed, scale, minscale = INT_MAX;
+	int i, j;
+	u32 remainder;
+
+	scale_needed = div_u64_rem((u64)delay_ns * clkrate, NSEC_PER_SEC,
+			&remainder);
+	if (remainder)
+		scale_needed++;
+
+	for (i = 0; i < ARRAY_SIZE(pscale_tbl); i++)
+		for (j = 0; j <= SPI_CTAR_SCALE_BITS; j++) {
+			scale = pscale_tbl[i] * (2 << j);
+			if (scale >= scale_needed) {
+				if (scale < minscale) {
+					minscale = scale;
+					*psc = i;
+					*sc = j;
+				}
+				break;
+			}
+		}
+
+	if (minscale == INT_MAX) {
+		pr_warn("Cannot find correct scale values for %dns delay at clkrate %ld, using max prescaler value",
+			delay_ns, clkrate);
+		*psc = ARRAY_SIZE(pscale_tbl) - 1;
+		*sc = SPI_CTAR_SCALE_BITS;
+	}
 }
 
 static int dspi_transfer_write(struct fsl_dspi *dspi)
@@ -345,7 +391,10 @@
 {
 	struct chip_data *chip;
 	struct fsl_dspi *dspi = spi_master_get_devdata(spi->master);
-	unsigned char br = 0, pbr = 0, fmsz = 0;
+	u32 cs_sck_delay = 0, sck_cs_delay = 0;
+	unsigned char br = 0, pbr = 0, pcssck = 0, cssck = 0;
+	unsigned char pasc = 0, asc = 0, fmsz = 0;
+	unsigned long clkrate;
 
 	if ((spi->bits_per_word >= 4) && (spi->bits_per_word <= 16)) {
 		fmsz = spi->bits_per_word - 1;
@@ -362,18 +411,34 @@
 			return -ENOMEM;
 	}
 
+	of_property_read_u32(spi->dev.of_node, "fsl,spi-cs-sck-delay",
+			&cs_sck_delay);
+
+	of_property_read_u32(spi->dev.of_node, "fsl,spi-sck-cs-delay",
+			&sck_cs_delay);
+
 	chip->mcr_val = SPI_MCR_MASTER | SPI_MCR_PCSIS |
 		SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF;
 
 	chip->void_write_data = 0;
 
-	hz_to_spi_baud(&pbr, &br,
-			spi->max_speed_hz, clk_get_rate(dspi->clk));
+	clkrate = clk_get_rate(dspi->clk);
+	hz_to_spi_baud(&pbr, &br, spi->max_speed_hz, clkrate);
+
+	/* Set PCS to SCK delay scale values */
+	ns_delay_scale(&pcssck, &cssck, cs_sck_delay, clkrate);
+
+	/* Set After SCK delay scale values */
+	ns_delay_scale(&pasc, &asc, sck_cs_delay, clkrate);
 
 	chip->ctar_val =  SPI_CTAR_FMSZ(fmsz)
 		| SPI_CTAR_CPOL(spi->mode & SPI_CPOL ? 1 : 0)
 		| SPI_CTAR_CPHA(spi->mode & SPI_CPHA ? 1 : 0)
 		| SPI_CTAR_LSBFE(spi->mode & SPI_LSB_FIRST ? 1 : 0)
+		| SPI_CTAR_PCSSCK(pcssck)
+		| SPI_CTAR_CSSCK(cssck)
+		| SPI_CTAR_PASC(pasc)
+		| SPI_CTAR_ASC(asc)
 		| SPI_CTAR_PBR(pbr)
 		| SPI_CTAR_BR(br);
 
diff --git a/drivers/spi/spi-img-spfi.c b/drivers/spi/spi-img-spfi.c
index e649bc7..788e2b1 100644
--- a/drivers/spi/spi-img-spfi.c
+++ b/drivers/spi/spi-img-spfi.c
@@ -12,6 +12,7 @@
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/dmaengine.h>
+#include <linux/gpio.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/irq.h>
@@ -122,36 +123,31 @@
 	spfi_writel(spfi, val, SPFI_CONTROL);
 }
 
-static inline void spfi_stop(struct img_spfi *spfi)
-{
-	u32 val;
-
-	val = spfi_readl(spfi, SPFI_CONTROL);
-	val &= ~SPFI_CONTROL_SPFI_EN;
-	spfi_writel(spfi, val, SPFI_CONTROL);
-}
-
 static inline void spfi_reset(struct img_spfi *spfi)
 {
 	spfi_writel(spfi, SPFI_CONTROL_SOFT_RESET, SPFI_CONTROL);
-	udelay(1);
 	spfi_writel(spfi, 0, SPFI_CONTROL);
 }
 
-static void spfi_flush_tx_fifo(struct img_spfi *spfi)
+static int spfi_wait_all_done(struct img_spfi *spfi)
 {
-	unsigned long timeout = jiffies + msecs_to_jiffies(10);
+	unsigned long timeout = jiffies + msecs_to_jiffies(50);
 
-	spfi_writel(spfi, SPFI_INTERRUPT_SDE, SPFI_INTERRUPT_CLEAR);
 	while (time_before(jiffies, timeout)) {
-		if (spfi_readl(spfi, SPFI_INTERRUPT_STATUS) &
-		    SPFI_INTERRUPT_SDE)
-			return;
+		u32 status = spfi_readl(spfi, SPFI_INTERRUPT_STATUS);
+
+		if (status & SPFI_INTERRUPT_ALLDONETRIG) {
+			spfi_writel(spfi, SPFI_INTERRUPT_ALLDONETRIG,
+				    SPFI_INTERRUPT_CLEAR);
+			return 0;
+		}
 		cpu_relax();
 	}
 
-	dev_err(spfi->dev, "Timed out waiting for FIFO to drain\n");
+	dev_err(spfi->dev, "Timed out waiting for transaction to complete\n");
 	spfi_reset(spfi);
+
+	return -ETIMEDOUT;
 }
 
 static unsigned int spfi_pio_write32(struct img_spfi *spfi, const u32 *buf,
@@ -237,6 +233,7 @@
 	const void *tx_buf = xfer->tx_buf;
 	void *rx_buf = xfer->rx_buf;
 	unsigned long timeout;
+	int ret;
 
 	if (tx_buf)
 		tx_bytes = xfer->len;
@@ -269,16 +266,15 @@
 		cpu_relax();
 	}
 
+	ret = spfi_wait_all_done(spfi);
+	if (ret < 0)
+		return ret;
+
 	if (rx_bytes > 0 || tx_bytes > 0) {
 		dev_err(spfi->dev, "PIO transfer timed out\n");
-		spfi_reset(spfi);
 		return -ETIMEDOUT;
 	}
 
-	if (tx_buf)
-		spfi_flush_tx_fifo(spfi);
-	spfi_stop(spfi);
-
 	return 0;
 }
 
@@ -287,14 +283,12 @@
 	struct img_spfi *spfi = data;
 	unsigned long flags;
 
+	spfi_wait_all_done(spfi);
+
 	spin_lock_irqsave(&spfi->lock, flags);
-
 	spfi->rx_dma_busy = false;
-	if (!spfi->tx_dma_busy) {
-		spfi_stop(spfi);
+	if (!spfi->tx_dma_busy)
 		spi_finalize_current_transfer(spfi->master);
-	}
-
 	spin_unlock_irqrestore(&spfi->lock, flags);
 }
 
@@ -303,16 +297,12 @@
 	struct img_spfi *spfi = data;
 	unsigned long flags;
 
-	spfi_flush_tx_fifo(spfi);
+	spfi_wait_all_done(spfi);
 
 	spin_lock_irqsave(&spfi->lock, flags);
-
 	spfi->tx_dma_busy = false;
-	if (!spfi->rx_dma_busy) {
-		spfi_stop(spfi);
+	if (!spfi->rx_dma_busy)
 		spi_finalize_current_transfer(spfi->master);
-	}
-
 	spin_unlock_irqrestore(&spfi->lock, flags);
 }
 
@@ -397,6 +387,75 @@
 	return -EIO;
 }
 
+static void img_spfi_handle_err(struct spi_master *master,
+				struct spi_message *msg)
+{
+	struct img_spfi *spfi = spi_master_get_devdata(master);
+	unsigned long flags;
+
+	/*
+	 * Stop all DMA and reset the controller if the previous transaction
+	 * timed-out and never completed it's DMA.
+	 */
+	spin_lock_irqsave(&spfi->lock, flags);
+	if (spfi->tx_dma_busy || spfi->rx_dma_busy) {
+		spfi->tx_dma_busy = false;
+		spfi->rx_dma_busy = false;
+
+		dmaengine_terminate_all(spfi->tx_ch);
+		dmaengine_terminate_all(spfi->rx_ch);
+	}
+	spin_unlock_irqrestore(&spfi->lock, flags);
+}
+
+static int img_spfi_prepare(struct spi_master *master, struct spi_message *msg)
+{
+	struct img_spfi *spfi = spi_master_get_devdata(master);
+	u32 val;
+
+	val = spfi_readl(spfi, SPFI_PORT_STATE);
+	if (msg->spi->mode & SPI_CPHA)
+		val |= SPFI_PORT_STATE_CK_PHASE(msg->spi->chip_select);
+	else
+		val &= ~SPFI_PORT_STATE_CK_PHASE(msg->spi->chip_select);
+	if (msg->spi->mode & SPI_CPOL)
+		val |= SPFI_PORT_STATE_CK_POL(msg->spi->chip_select);
+	else
+		val &= ~SPFI_PORT_STATE_CK_POL(msg->spi->chip_select);
+	spfi_writel(spfi, val, SPFI_PORT_STATE);
+
+	return 0;
+}
+
+static int img_spfi_unprepare(struct spi_master *master,
+			      struct spi_message *msg)
+{
+	struct img_spfi *spfi = spi_master_get_devdata(master);
+
+	spfi_reset(spfi);
+
+	return 0;
+}
+
+static int img_spfi_setup(struct spi_device *spi)
+{
+	int ret;
+
+	ret = gpio_request_one(spi->cs_gpio, (spi->mode & SPI_CS_HIGH) ?
+			       GPIOF_OUT_INIT_LOW : GPIOF_OUT_INIT_HIGH,
+			       dev_name(&spi->dev));
+	if (ret)
+		dev_err(&spi->dev, "can't request chipselect gpio %d\n",
+				spi->cs_gpio);
+
+	return ret;
+}
+
+static void img_spfi_cleanup(struct spi_device *spi)
+{
+	gpio_free(spi->cs_gpio);
+}
+
 static void img_spfi_config(struct spi_master *master, struct spi_device *spi,
 			    struct spi_transfer *xfer)
 {
@@ -405,10 +464,10 @@
 
 	/*
 	 * output = spfi_clk * (BITCLK / 512), where BITCLK must be a
-	 * power of 2 up to 256 (where 255 == 256 since BITCLK is 8 bits)
+	 * power of 2 up to 128
 	 */
-	div = DIV_ROUND_UP(master->max_speed_hz, xfer->speed_hz);
-	div = clamp(512 / (1 << get_count_order(div)), 1, 255);
+	div = DIV_ROUND_UP(clk_get_rate(spfi->spfi_clk), xfer->speed_hz);
+	div = clamp(512 / (1 << get_count_order(div)), 1, 128);
 
 	val = spfi_readl(spfi, SPFI_DEVICE_PARAMETER(spi->chip_select));
 	val &= ~(SPFI_DEVICE_PARAMETER_BITCLK_MASK <<
@@ -416,6 +475,9 @@
 	val |= div << SPFI_DEVICE_PARAMETER_BITCLK_SHIFT;
 	spfi_writel(spfi, val, SPFI_DEVICE_PARAMETER(spi->chip_select));
 
+	spfi_writel(spfi, xfer->len << SPFI_TRANSACTION_TSIZE_SHIFT,
+		    SPFI_TRANSACTION);
+
 	val = spfi_readl(spfi, SPFI_CONTROL);
 	val &= ~(SPFI_CONTROL_SEND_DMA | SPFI_CONTROL_GET_DMA);
 	if (xfer->tx_buf)
@@ -429,25 +491,7 @@
 	else if (xfer->tx_nbits == SPI_NBITS_QUAD &&
 		 xfer->rx_nbits == SPI_NBITS_QUAD)
 		val |= SPFI_CONTROL_TMODE_QUAD << SPFI_CONTROL_TMODE_SHIFT;
-	val &= ~SPFI_CONTROL_CONTINUE;
-	if (!xfer->cs_change && !list_is_last(&xfer->transfer_list,
-					      &master->cur_msg->transfers))
-		val |= SPFI_CONTROL_CONTINUE;
 	spfi_writel(spfi, val, SPFI_CONTROL);
-
-	val = spfi_readl(spfi, SPFI_PORT_STATE);
-	if (spi->mode & SPI_CPHA)
-		val |= SPFI_PORT_STATE_CK_PHASE(spi->chip_select);
-	else
-		val &= ~SPFI_PORT_STATE_CK_PHASE(spi->chip_select);
-	if (spi->mode & SPI_CPOL)
-		val |= SPFI_PORT_STATE_CK_POL(spi->chip_select);
-	else
-		val &= ~SPFI_PORT_STATE_CK_POL(spi->chip_select);
-	spfi_writel(spfi, val, SPFI_PORT_STATE);
-
-	spfi_writel(spfi, xfer->len << SPFI_TRANSACTION_TSIZE_SHIFT,
-		    SPFI_TRANSACTION);
 }
 
 static int img_spfi_transfer_one(struct spi_master *master,
@@ -455,8 +499,6 @@
 				 struct spi_transfer *xfer)
 {
 	struct img_spfi *spfi = spi_master_get_devdata(spi->master);
-	bool dma_reset = false;
-	unsigned long flags;
 	int ret;
 
 	if (xfer->len > SPFI_TRANSACTION_TSIZE_MASK) {
@@ -466,23 +508,6 @@
 		return -EINVAL;
 	}
 
-	/*
-	 * Stop all DMA and reset the controller if the previous transaction
-	 * timed-out and never completed it's DMA.
-	 */
-	spin_lock_irqsave(&spfi->lock, flags);
-	if (spfi->tx_dma_busy || spfi->rx_dma_busy) {
-		dev_err(spfi->dev, "SPI DMA still busy\n");
-		dma_reset = true;
-	}
-	spin_unlock_irqrestore(&spfi->lock, flags);
-
-	if (dma_reset) {
-		dmaengine_terminate_all(spfi->tx_ch);
-		dmaengine_terminate_all(spfi->rx_ch);
-		spfi_reset(spfi);
-	}
-
 	img_spfi_config(master, spi, xfer);
 	if (master->can_dma && master->can_dma(master, spi, xfer))
 		ret = img_spfi_start_dma(master, spi, xfer);
@@ -492,17 +517,6 @@
 	return ret;
 }
 
-static void img_spfi_set_cs(struct spi_device *spi, bool enable)
-{
-	struct img_spfi *spfi = spi_master_get_devdata(spi->master);
-	u32 val;
-
-	val = spfi_readl(spfi, SPFI_PORT_STATE);
-	val &= ~(SPFI_PORT_STATE_DEV_SEL_MASK << SPFI_PORT_STATE_DEV_SEL_SHIFT);
-	val |= spi->chip_select << SPFI_PORT_STATE_DEV_SEL_SHIFT;
-	spfi_writel(spfi, val, SPFI_PORT_STATE);
-}
-
 static bool img_spfi_can_dma(struct spi_master *master, struct spi_device *spi,
 			     struct spi_transfer *xfer)
 {
@@ -591,14 +605,17 @@
 	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_TX_DUAL | SPI_RX_DUAL;
 	if (of_property_read_bool(spfi->dev->of_node, "img,supports-quad-mode"))
 		master->mode_bits |= SPI_TX_QUAD | SPI_RX_QUAD;
-	master->num_chipselect = 5;
 	master->dev.of_node = pdev->dev.of_node;
 	master->bits_per_word_mask = SPI_BPW_MASK(32) | SPI_BPW_MASK(8);
-	master->max_speed_hz = clk_get_rate(spfi->spfi_clk);
-	master->min_speed_hz = master->max_speed_hz / 512;
+	master->max_speed_hz = clk_get_rate(spfi->spfi_clk) / 4;
+	master->min_speed_hz = clk_get_rate(spfi->spfi_clk) / 512;
 
-	master->set_cs = img_spfi_set_cs;
+	master->setup = img_spfi_setup;
+	master->cleanup = img_spfi_cleanup;
 	master->transfer_one = img_spfi_transfer_one;
+	master->prepare_message = img_spfi_prepare;
+	master->unprepare_message = img_spfi_unprepare;
+	master->handle_err = img_spfi_handle_err;
 
 	spfi->tx_ch = dma_request_slave_channel(spfi->dev, "tx");
 	spfi->rx_ch = dma_request_slave_channel(spfi->dev, "rx");
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index 6fea4af..f08e812 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -370,8 +370,6 @@
 	if (spi_imx->dma_is_inited) {
 		dma = readl(spi_imx->base + MX51_ECSPI_DMA);
 
-		spi_imx->tx_wml = spi_imx_get_fifosize(spi_imx) / 2;
-		spi_imx->rx_wml = spi_imx_get_fifosize(spi_imx) / 2;
 		spi_imx->rxt_wml = spi_imx_get_fifosize(spi_imx) / 2;
 		rx_wml_cfg = spi_imx->rx_wml << MX51_ECSPI_DMA_RX_WML_OFFSET;
 		tx_wml_cfg = spi_imx->tx_wml << MX51_ECSPI_DMA_TX_WML_OFFSET;
@@ -868,6 +866,8 @@
 	master->max_dma_len = MAX_SDMA_BD_BYTES;
 	spi_imx->bitbang.master->flags = SPI_MASTER_MUST_RX |
 					 SPI_MASTER_MUST_TX;
+	spi_imx->tx_wml = spi_imx_get_fifosize(spi_imx) / 2;
+	spi_imx->rx_wml = spi_imx_get_fifosize(spi_imx) / 2;
 	spi_imx->dma_is_inited = 1;
 
 	return 0;
@@ -903,7 +903,7 @@
 
 	if (tx) {
 		desc_tx = dmaengine_prep_slave_sg(master->dma_tx,
-					tx->sgl, tx->nents, DMA_TO_DEVICE,
+					tx->sgl, tx->nents, DMA_MEM_TO_DEV,
 					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 		if (!desc_tx)
 			goto no_dma;
@@ -915,7 +915,7 @@
 
 	if (rx) {
 		desc_rx = dmaengine_prep_slave_sg(master->dma_rx,
-					rx->sgl, rx->nents, DMA_FROM_DEVICE,
+					rx->sgl, rx->nents, DMA_DEV_TO_MEM,
 					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 		if (!desc_rx)
 			goto no_dma;
diff --git a/drivers/spi/spi-mpc512x-psc.c b/drivers/spi/spi-mpc512x-psc.c
index ecae0d4..965d2bd 100644
--- a/drivers/spi/spi-mpc512x-psc.c
+++ b/drivers/spi/spi-mpc512x-psc.c
@@ -588,7 +588,7 @@
 	return mpc512x_psc_spi_do_remove(&op->dev);
 }
 
-static struct of_device_id mpc512x_psc_spi_of_match[] = {
+static const struct of_device_id mpc512x_psc_spi_of_match[] = {
 	{ .compatible = "fsl,mpc5121-psc-spi", },
 	{},
 };
diff --git a/drivers/spi/spi-octeon.c b/drivers/spi/spi-octeon.c
index b283d53..e99d6a9 100644
--- a/drivers/spi/spi-octeon.c
+++ b/drivers/spi/spi-octeon.c
@@ -238,7 +238,7 @@
 	return 0;
 }
 
-static struct of_device_id octeon_spi_match[] = {
+static const struct of_device_id octeon_spi_match[] = {
 	{ .compatible = "cavium,octeon-3010-spi", },
 	{},
 };
diff --git a/drivers/spi/spi-omap-100k.c b/drivers/spi/spi-omap-100k.c
index d890d30..35b332d 100644
--- a/drivers/spi/spi-omap-100k.c
+++ b/drivers/spi/spi-omap-100k.c
@@ -24,6 +24,7 @@
 #include <linux/device.h>
 #include <linux/delay.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/io.h>
@@ -294,16 +295,6 @@
 	return ret;
 }
 
-static int omap1_spi100k_prepare_hardware(struct spi_master *master)
-{
-	struct omap1_spi100k *spi100k = spi_master_get_devdata(master);
-
-	clk_prepare_enable(spi100k->ick);
-	clk_prepare_enable(spi100k->fck);
-
-	return 0;
-}
-
 static int omap1_spi100k_transfer_one_message(struct spi_master *master,
 					      struct spi_message *m)
 {
@@ -372,16 +363,6 @@
 	return status;
 }
 
-static int omap1_spi100k_unprepare_hardware(struct spi_master *master)
-{
-	struct omap1_spi100k *spi100k = spi_master_get_devdata(master);
-
-	clk_disable_unprepare(spi100k->ick);
-	clk_disable_unprepare(spi100k->fck);
-
-	return 0;
-}
-
 static int omap1_spi100k_probe(struct platform_device *pdev)
 {
 	struct spi_master       *master;
@@ -402,14 +383,12 @@
 
 	master->setup = omap1_spi100k_setup;
 	master->transfer_one_message = omap1_spi100k_transfer_one_message;
-	master->prepare_transfer_hardware = omap1_spi100k_prepare_hardware;
-	master->unprepare_transfer_hardware = omap1_spi100k_unprepare_hardware;
-	master->cleanup = NULL;
 	master->num_chipselect = 2;
 	master->mode_bits = MODEBITS;
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
 	master->min_speed_hz = OMAP1_SPI100K_MAX_FREQ/(1<<16);
 	master->max_speed_hz = OMAP1_SPI100K_MAX_FREQ;
+	master->auto_runtime_pm = true;
 
 	spi100k = spi_master_get_devdata(master);
 
@@ -434,22 +413,96 @@
 		goto err;
 	}
 
+	status = clk_prepare_enable(spi100k->ick);
+	if (status != 0) {
+		dev_err(&pdev->dev, "failed to enable ick: %d\n", status);
+		goto err;
+	}
+
+	status = clk_prepare_enable(spi100k->fck);
+	if (status != 0) {
+		dev_err(&pdev->dev, "failed to enable fck: %d\n", status);
+		goto err_ick;
+	}
+
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_set_active(&pdev->dev);
+
 	status = devm_spi_register_master(&pdev->dev, master);
 	if (status < 0)
-		goto err;
+		goto err_fck;
 
 	return status;
 
+err_fck:
+	clk_disable_unprepare(spi100k->fck);
+err_ick:
+	clk_disable_unprepare(spi100k->ick);
 err:
 	spi_master_put(master);
 	return status;
 }
 
+static int omap1_spi100k_remove(struct platform_device *pdev)
+{
+	struct spi_master *master = spi_master_get(platform_get_drvdata(pdev));
+	struct omap1_spi100k *spi100k = spi_master_get_devdata(master);
+
+	pm_runtime_disable(&pdev->dev);
+
+	clk_disable_unprepare(spi100k->fck);
+	clk_disable_unprepare(spi100k->ick);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int omap1_spi100k_runtime_suspend(struct device *dev)
+{
+	struct spi_master *master = spi_master_get(dev_get_drvdata(dev));
+	struct omap1_spi100k *spi100k = spi_master_get_devdata(master);
+
+	clk_disable_unprepare(spi100k->ick);
+	clk_disable_unprepare(spi100k->fck);
+
+	return 0;
+}
+
+static int omap1_spi100k_runtime_resume(struct device *dev)
+{
+	struct spi_master *master = spi_master_get(dev_get_drvdata(dev));
+	struct omap1_spi100k *spi100k = spi_master_get_devdata(master);
+	int ret;
+
+	ret = clk_prepare_enable(spi100k->ick);
+	if (ret != 0) {
+		dev_err(dev, "Failed to enable ick: %d\n", ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(spi100k->fck);
+	if (ret != 0) {
+		dev_err(dev, "Failed to enable fck: %d\n", ret);
+		clk_disable_unprepare(spi100k->ick);
+		return ret;
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops omap1_spi100k_pm = {
+	SET_RUNTIME_PM_OPS(omap1_spi100k_runtime_suspend,
+			   omap1_spi100k_runtime_resume, NULL)
+};
+
 static struct platform_driver omap1_spi100k_driver = {
 	.driver = {
 		.name		= "omap1_spi100k",
+		.pm		= &omap1_spi100k_pm,
 	},
 	.probe		= omap1_spi100k_probe,
+	.remove		= omap1_spi100k_remove,
 };
 
 module_platform_driver(omap1_spi100k_driver);
diff --git a/drivers/spi/spi-omap-uwire.c b/drivers/spi/spi-omap-uwire.c
index 3c08444..55576db 100644
--- a/drivers/spi/spi-omap-uwire.c
+++ b/drivers/spi/spi-omap-uwire.c
@@ -44,7 +44,6 @@
 #include <linux/module.h>
 #include <linux/io.h>
 
-#include <asm/irq.h>
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
 
diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c
index ee513a8..94af806 100644
--- a/drivers/spi/spi-pl022.c
+++ b/drivers/spi/spi-pl022.c
@@ -285,7 +285,12 @@
  */
 #define DEFAULT_SSP_REG_IMSC  0x0UL
 #define DISABLE_ALL_INTERRUPTS DEFAULT_SSP_REG_IMSC
-#define ENABLE_ALL_INTERRUPTS (~DEFAULT_SSP_REG_IMSC)
+#define ENABLE_ALL_INTERRUPTS ( \
+	SSP_IMSC_MASK_RORIM | \
+	SSP_IMSC_MASK_RTIM | \
+	SSP_IMSC_MASK_RXIM | \
+	SSP_IMSC_MASK_TXIM \
+)
 
 #define CLEAR_ALL_INTERRUPTS  0x3
 
@@ -1251,7 +1256,6 @@
 	struct pl022 *pl022 = dev_id;
 	struct spi_message *msg = pl022->cur_msg;
 	u16 irq_status = 0;
-	u16 flag = 0;
 
 	if (unlikely(!msg)) {
 		dev_err(&pl022->adev->dev,
@@ -1280,9 +1284,6 @@
 		if (readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_RFF)
 			dev_err(&pl022->adev->dev,
 				"RXFIFO is full\n");
-		if (readw(SSP_SR(pl022->virtbase)) & SSP_SR_MASK_TNF)
-			dev_err(&pl022->adev->dev,
-				"TXFIFO is full\n");
 
 		/*
 		 * Disable and clear interrupts, disable SSP,
@@ -1303,8 +1304,7 @@
 
 	readwriter(pl022);
 
-	if ((pl022->tx == pl022->tx_end) && (flag == 0)) {
-		flag = 1;
+	if (pl022->tx == pl022->tx_end) {
 		/* Disable Transmit interrupt, enable receive interrupt */
 		writew((readw(SSP_IMSC(pl022->virtbase)) &
 		       ~SSP_IMSC_MASK_TXIM) | SSP_IMSC_MASK_RXIM,
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 6f72ad0..e3223ac 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -20,6 +20,7 @@
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
+#include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/spi/pxa2xx_spi.h>
 #include <linux/spi/spi.h>
@@ -30,10 +31,6 @@
 #include <linux/pm_runtime.h>
 #include <linux/acpi.h>
 
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/delay.h>
-
 #include "spi-pxa2xx.h"
 
 MODULE_AUTHOR("Stephen Street");
@@ -67,54 +64,6 @@
 #define LPSS_TX_LOTHRESH_DFLT	160
 #define LPSS_TX_HITHRESH_DFLT	224
 
-struct quark_spi_rate {
-	u32 bitrate;
-	u32 dds_clk_rate;
-	u32 clk_div;
-};
-
-/*
- * 'rate', 'dds', 'clk_div' lookup table, which is defined in
- * the Quark SPI datasheet.
- */
-static const struct quark_spi_rate quark_spi_rate_table[] = {
-/*	bitrate,	dds_clk_rate,	clk_div */
-	{50000000,	0x800000,	0},
-	{40000000,	0x666666,	0},
-	{25000000,	0x400000,	0},
-	{20000000,	0x666666,	1},
-	{16667000,	0x800000,	2},
-	{13333000,	0x666666,	2},
-	{12500000,	0x200000,	0},
-	{10000000,	0x800000,	4},
-	{8000000,	0x666666,	4},
-	{6250000,	0x400000,	3},
-	{5000000,	0x400000,	4},
-	{4000000,	0x666666,	9},
-	{3125000,	0x80000,	0},
-	{2500000,	0x400000,	9},
-	{2000000,	0x666666,	19},
-	{1563000,	0x40000,	0},
-	{1250000,	0x200000,	9},
-	{1000000,	0x400000,	24},
-	{800000,	0x666666,	49},
-	{781250,	0x20000,	0},
-	{625000,	0x200000,	19},
-	{500000,	0x400000,	49},
-	{400000,	0x666666,	99},
-	{390625,	0x10000,	0},
-	{250000,	0x400000,	99},
-	{200000,	0x666666,	199},
-	{195313,	0x8000,		0},
-	{125000,	0x100000,	49},
-	{100000,	0x200000,	124},
-	{50000,		0x100000,	124},
-	{25000,		0x80000,	124},
-	{10016,		0x20000,	77},
-	{5040,		0x20000,	154},
-	{1002,		0x8000,		194},
-};
-
 /* Offset from drv_data->lpss_base */
 #define GENERAL_REG		0x08
 #define GENERAL_REG_RXTO_HOLDOFF_DISABLE BIT(24)
@@ -701,25 +650,124 @@
 }
 
 /*
- * The Quark SPI data sheet gives a table, and for the given 'rate',
- * the 'dds' and 'clk_div' can be found in the table.
+ * The Quark SPI has an additional 24 bit register (DDS_CLK_RATE) to multiply
+ * input frequency by fractions of 2^24. It also has a divider by 5.
+ *
+ * There are formulas to get baud rate value for given input frequency and
+ * divider parameters, such as DDS_CLK_RATE and SCR:
+ *
+ * Fsys = 200MHz
+ *
+ * Fssp = Fsys * DDS_CLK_RATE / 2^24			(1)
+ * Baud rate = Fsclk = Fssp / (2 * (SCR + 1))		(2)
+ *
+ * DDS_CLK_RATE either 2^n or 2^n / 5.
+ * SCR is in range 0 .. 255
+ *
+ * Divisor = 5^i * 2^j * 2 * k
+ *       i = [0, 1]      i = 1 iff j = 0 or j > 3
+ *       j = [0, 23]     j = 0 iff i = 1
+ *       k = [1, 256]
+ * Special case: j = 0, i = 1: Divisor = 2 / 5
+ *
+ * Accordingly to the specification the recommended values for DDS_CLK_RATE
+ * are:
+ *	Case 1:		2^n, n = [0, 23]
+ *	Case 2:		2^24 * 2 / 5 (0x666666)
+ *	Case 3:		less than or equal to 2^24 / 5 / 16 (0x33333)
+ *
+ * In all cases the lowest possible value is better.
+ *
+ * The function calculates parameters for all cases and chooses the one closest
+ * to the asked baud rate.
  */
-static u32 quark_x1000_set_clk_regvals(u32 rate, u32 *dds, u32 *clk_div)
+static unsigned int quark_x1000_get_clk_div(int rate, u32 *dds)
 {
-	unsigned int i;
+	unsigned long xtal = 200000000;
+	unsigned long fref = xtal / 2;		/* mandatory division by 2,
+						   see (2) */
+						/* case 3 */
+	unsigned long fref1 = fref / 2;		/* case 1 */
+	unsigned long fref2 = fref * 2 / 5;	/* case 2 */
+	unsigned long scale;
+	unsigned long q, q1, q2;
+	long r, r1, r2;
+	u32 mul;
 
-	for (i = 0; i < ARRAY_SIZE(quark_spi_rate_table); i++) {
-		if (rate >= quark_spi_rate_table[i].bitrate) {
-			*dds = quark_spi_rate_table[i].dds_clk_rate;
-			*clk_div = quark_spi_rate_table[i].clk_div;
-			return quark_spi_rate_table[i].bitrate;
+	/* Case 1 */
+
+	/* Set initial value for DDS_CLK_RATE */
+	mul = (1 << 24) >> 1;
+
+	/* Calculate initial quot */
+	q1 = DIV_ROUND_CLOSEST(fref1, rate);
+
+	/* Scale q1 if it's too big */
+	if (q1 > 256) {
+		/* Scale q1 to range [1, 512] */
+		scale = fls_long(q1 - 1);
+		if (scale > 9) {
+			q1 >>= scale - 9;
+			mul >>= scale - 9;
+		}
+
+		/* Round the result if we have a remainder */
+		q1 += q1 & 1;
+	}
+
+	/* Decrease DDS_CLK_RATE as much as we can without loss in precision */
+	scale = __ffs(q1);
+	q1 >>= scale;
+	mul >>= scale;
+
+	/* Get the remainder */
+	r1 = abs(fref1 / (1 << (24 - fls_long(mul))) / q1 - rate);
+
+	/* Case 2 */
+
+	q2 = DIV_ROUND_CLOSEST(fref2, rate);
+	r2 = abs(fref2 / q2 - rate);
+
+	/*
+	 * Choose the best between two: less remainder we have the better. We
+	 * can't go case 2 if q2 is greater than 256 since SCR register can
+	 * hold only values 0 .. 255.
+	 */
+	if (r2 >= r1 || q2 > 256) {
+		/* case 1 is better */
+		r = r1;
+		q = q1;
+	} else {
+		/* case 2 is better */
+		r = r2;
+		q = q2;
+		mul = (1 << 24) * 2 / 5;
+	}
+
+	/* Check case 3 only If the divisor is big enough */
+	if (fref / rate >= 80) {
+		u64 fssp;
+		u32 m;
+
+		/* Calculate initial quot */
+		q1 = DIV_ROUND_CLOSEST(fref, rate);
+		m = (1 << 24) / q1;
+
+		/* Get the remainder */
+		fssp = (u64)fref * m;
+		do_div(fssp, 1 << 24);
+		r1 = abs(fssp - rate);
+
+		/* Choose this one if it suits better */
+		if (r1 < r) {
+			/* case 3 is better */
+			q = 1;
+			mul = m;
 		}
 	}
 
-	*dds = quark_spi_rate_table[i-1].dds_clk_rate;
-	*clk_div = quark_spi_rate_table[i-1].clk_div;
-
-	return quark_spi_rate_table[i-1].bitrate;
+	*dds = mul;
+	return q - 1;
 }
 
 static unsigned int ssp_get_clk_div(struct driver_data *drv_data, int rate)
@@ -730,23 +778,25 @@
 	rate = min_t(int, ssp_clk, rate);
 
 	if (ssp->type == PXA25x_SSP || ssp->type == CE4100_SSP)
-		return ((ssp_clk / (2 * rate) - 1) & 0xff) << 8;
+		return (ssp_clk / (2 * rate) - 1) & 0xff;
 	else
-		return ((ssp_clk / rate - 1) & 0xfff) << 8;
+		return (ssp_clk / rate - 1) & 0xfff;
 }
 
 static unsigned int pxa2xx_ssp_get_clk_div(struct driver_data *drv_data,
 					   struct chip_data *chip, int rate)
 {
-	u32 clk_div;
+	unsigned int clk_div;
 
 	switch (drv_data->ssp_type) {
 	case QUARK_X1000_SSP:
-		quark_x1000_set_clk_regvals(rate, &chip->dds_rate, &clk_div);
-		return clk_div << 8;
+		clk_div = quark_x1000_get_clk_div(rate, &chip->dds_rate);
+		break;
 	default:
-		return ssp_get_clk_div(drv_data, rate);
+		clk_div = ssp_get_clk_div(drv_data, rate);
+		break;
 	}
+	return clk_div << 8;
 }
 
 static void pump_transfers(unsigned long data)
diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c
index 2b2c359..810a7fa 100644
--- a/drivers/spi/spi-qup.c
+++ b/drivers/spi/spi-qup.c
@@ -22,6 +22,8 @@
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/spi/spi.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
 
 #define QUP_CONFIG			0x0000
 #define QUP_STATE			0x0004
@@ -116,6 +118,8 @@
 
 #define SPI_NUM_CHIPSELECTS		4
 
+#define SPI_MAX_DMA_XFER		(SZ_64K - 64)
+
 /* high speed mode is when bus rate is greater then 26MHz */
 #define SPI_HS_MIN_RATE			26000000
 #define SPI_MAX_RATE			50000000
@@ -140,9 +144,14 @@
 	struct completion	done;
 	int			error;
 	int			w_size;	/* bytes per SPI word */
+	int			n_words;
 	int			tx_bytes;
 	int			rx_bytes;
 	int			qup_v1;
+
+	int			use_dma;
+	struct dma_slave_config	rx_conf;
+	struct dma_slave_config	tx_conf;
 };
 
 
@@ -198,7 +207,6 @@
 	return 0;
 }
 
-
 static void spi_qup_fifo_read(struct spi_qup *controller,
 			    struct spi_transfer *xfer)
 {
@@ -266,6 +274,107 @@
 	}
 }
 
+static void spi_qup_dma_done(void *data)
+{
+	struct spi_qup *qup = data;
+
+	complete(&qup->done);
+}
+
+static int spi_qup_prep_sg(struct spi_master *master, struct spi_transfer *xfer,
+			   enum dma_transfer_direction dir,
+			   dma_async_tx_callback callback)
+{
+	struct spi_qup *qup = spi_master_get_devdata(master);
+	unsigned long flags = DMA_PREP_INTERRUPT | DMA_PREP_FENCE;
+	struct dma_async_tx_descriptor *desc;
+	struct scatterlist *sgl;
+	struct dma_chan *chan;
+	dma_cookie_t cookie;
+	unsigned int nents;
+
+	if (dir == DMA_MEM_TO_DEV) {
+		chan = master->dma_tx;
+		nents = xfer->tx_sg.nents;
+		sgl = xfer->tx_sg.sgl;
+	} else {
+		chan = master->dma_rx;
+		nents = xfer->rx_sg.nents;
+		sgl = xfer->rx_sg.sgl;
+	}
+
+	desc = dmaengine_prep_slave_sg(chan, sgl, nents, dir, flags);
+	if (!desc)
+		return -EINVAL;
+
+	desc->callback = callback;
+	desc->callback_param = qup;
+
+	cookie = dmaengine_submit(desc);
+
+	return dma_submit_error(cookie);
+}
+
+static void spi_qup_dma_terminate(struct spi_master *master,
+				  struct spi_transfer *xfer)
+{
+	if (xfer->tx_buf)
+		dmaengine_terminate_all(master->dma_tx);
+	if (xfer->rx_buf)
+		dmaengine_terminate_all(master->dma_rx);
+}
+
+static int spi_qup_do_dma(struct spi_master *master, struct spi_transfer *xfer)
+{
+	dma_async_tx_callback rx_done = NULL, tx_done = NULL;
+	int ret;
+
+	if (xfer->rx_buf)
+		rx_done = spi_qup_dma_done;
+	else if (xfer->tx_buf)
+		tx_done = spi_qup_dma_done;
+
+	if (xfer->rx_buf) {
+		ret = spi_qup_prep_sg(master, xfer, DMA_DEV_TO_MEM, rx_done);
+		if (ret)
+			return ret;
+
+		dma_async_issue_pending(master->dma_rx);
+	}
+
+	if (xfer->tx_buf) {
+		ret = spi_qup_prep_sg(master, xfer, DMA_MEM_TO_DEV, tx_done);
+		if (ret)
+			return ret;
+
+		dma_async_issue_pending(master->dma_tx);
+	}
+
+	return 0;
+}
+
+static int spi_qup_do_pio(struct spi_master *master, struct spi_transfer *xfer)
+{
+	struct spi_qup *qup = spi_master_get_devdata(master);
+	int ret;
+
+	ret = spi_qup_set_state(qup, QUP_STATE_RUN);
+	if (ret) {
+		dev_warn(qup->dev, "cannot set RUN state\n");
+		return ret;
+	}
+
+	ret = spi_qup_set_state(qup, QUP_STATE_PAUSE);
+	if (ret) {
+		dev_warn(qup->dev, "cannot set PAUSE state\n");
+		return ret;
+	}
+
+	spi_qup_fifo_write(qup, xfer);
+
+	return 0;
+}
+
 static irqreturn_t spi_qup_qup_irq(int irq, void *dev_id)
 {
 	struct spi_qup *controller = dev_id;
@@ -315,11 +424,13 @@
 		error = -EIO;
 	}
 
-	if (opflags & QUP_OP_IN_SERVICE_FLAG)
-		spi_qup_fifo_read(controller, xfer);
+	if (!controller->use_dma) {
+		if (opflags & QUP_OP_IN_SERVICE_FLAG)
+			spi_qup_fifo_read(controller, xfer);
 
-	if (opflags & QUP_OP_OUT_SERVICE_FLAG)
-		spi_qup_fifo_write(controller, xfer);
+		if (opflags & QUP_OP_OUT_SERVICE_FLAG)
+			spi_qup_fifo_write(controller, xfer);
+	}
 
 	spin_lock_irqsave(&controller->lock, flags);
 	controller->error = error;
@@ -332,13 +443,35 @@
 	return IRQ_HANDLED;
 }
 
+static u32
+spi_qup_get_mode(struct spi_master *master, struct spi_transfer *xfer)
+{
+	struct spi_qup *qup = spi_master_get_devdata(master);
+	u32 mode;
+
+	qup->w_size = 4;
+
+	if (xfer->bits_per_word <= 8)
+		qup->w_size = 1;
+	else if (xfer->bits_per_word <= 16)
+		qup->w_size = 2;
+
+	qup->n_words = xfer->len / qup->w_size;
+
+	if (qup->n_words <= (qup->in_fifo_sz / sizeof(u32)))
+		mode = QUP_IO_M_MODE_FIFO;
+	else
+		mode = QUP_IO_M_MODE_BLOCK;
+
+	return mode;
+}
 
 /* set clock freq ... bits per word */
 static int spi_qup_io_config(struct spi_device *spi, struct spi_transfer *xfer)
 {
 	struct spi_qup *controller = spi_master_get_devdata(spi->master);
 	u32 config, iomode, mode, control;
-	int ret, n_words, w_size;
+	int ret, n_words;
 
 	if (spi->mode & SPI_LOOP && xfer->len > controller->in_fifo_sz) {
 		dev_err(controller->dev, "too big size for loopback %d > %d\n",
@@ -358,35 +491,54 @@
 		return -EIO;
 	}
 
-	w_size = 4;
-	if (xfer->bits_per_word <= 8)
-		w_size = 1;
-	else if (xfer->bits_per_word <= 16)
-		w_size = 2;
+	mode = spi_qup_get_mode(spi->master, xfer);
+	n_words = controller->n_words;
 
-	n_words = xfer->len / w_size;
-	controller->w_size = w_size;
-
-	if (n_words <= (controller->in_fifo_sz / sizeof(u32))) {
-		mode = QUP_IO_M_MODE_FIFO;
+	if (mode == QUP_IO_M_MODE_FIFO) {
 		writel_relaxed(n_words, controller->base + QUP_MX_READ_CNT);
 		writel_relaxed(n_words, controller->base + QUP_MX_WRITE_CNT);
 		/* must be zero for FIFO */
 		writel_relaxed(0, controller->base + QUP_MX_INPUT_CNT);
 		writel_relaxed(0, controller->base + QUP_MX_OUTPUT_CNT);
-	} else {
-		mode = QUP_IO_M_MODE_BLOCK;
+	} else if (!controller->use_dma) {
 		writel_relaxed(n_words, controller->base + QUP_MX_INPUT_CNT);
 		writel_relaxed(n_words, controller->base + QUP_MX_OUTPUT_CNT);
 		/* must be zero for BLOCK and BAM */
 		writel_relaxed(0, controller->base + QUP_MX_READ_CNT);
 		writel_relaxed(0, controller->base + QUP_MX_WRITE_CNT);
+	} else {
+		mode = QUP_IO_M_MODE_BAM;
+		writel_relaxed(0, controller->base + QUP_MX_READ_CNT);
+		writel_relaxed(0, controller->base + QUP_MX_WRITE_CNT);
+
+		if (!controller->qup_v1) {
+			void __iomem *input_cnt;
+
+			input_cnt = controller->base + QUP_MX_INPUT_CNT;
+			/*
+			 * for DMA transfers, both QUP_MX_INPUT_CNT and
+			 * QUP_MX_OUTPUT_CNT must be zero to all cases but one.
+			 * That case is a non-balanced transfer when there is
+			 * only a rx_buf.
+			 */
+			if (xfer->tx_buf)
+				writel_relaxed(0, input_cnt);
+			else
+				writel_relaxed(n_words, input_cnt);
+
+			writel_relaxed(0, controller->base + QUP_MX_OUTPUT_CNT);
+		}
 	}
 
 	iomode = readl_relaxed(controller->base + QUP_IO_M_MODES);
 	/* Set input and output transfer mode */
 	iomode &= ~(QUP_IO_M_INPUT_MODE_MASK | QUP_IO_M_OUTPUT_MODE_MASK);
-	iomode &= ~(QUP_IO_M_PACK_EN | QUP_IO_M_UNPACK_EN);
+
+	if (!controller->use_dma)
+		iomode &= ~(QUP_IO_M_PACK_EN | QUP_IO_M_UNPACK_EN);
+	else
+		iomode |= QUP_IO_M_PACK_EN | QUP_IO_M_UNPACK_EN;
+
 	iomode |= (mode << QUP_IO_M_OUTPUT_MODE_MASK_SHIFT);
 	iomode |= (mode << QUP_IO_M_INPUT_MODE_MASK_SHIFT);
 
@@ -428,11 +580,31 @@
 	config &= ~(QUP_CONFIG_NO_INPUT | QUP_CONFIG_NO_OUTPUT | QUP_CONFIG_N);
 	config |= xfer->bits_per_word - 1;
 	config |= QUP_CONFIG_SPI_MODE;
+
+	if (controller->use_dma) {
+		if (!xfer->tx_buf)
+			config |= QUP_CONFIG_NO_OUTPUT;
+		if (!xfer->rx_buf)
+			config |= QUP_CONFIG_NO_INPUT;
+	}
+
 	writel_relaxed(config, controller->base + QUP_CONFIG);
 
 	/* only write to OPERATIONAL_MASK when register is present */
-	if (!controller->qup_v1)
-		writel_relaxed(0, controller->base + QUP_OPERATIONAL_MASK);
+	if (!controller->qup_v1) {
+		u32 mask = 0;
+
+		/*
+		 * mask INPUT and OUTPUT service flags to prevent IRQs on FIFO
+		 * status change in BAM mode
+		 */
+
+		if (mode == QUP_IO_M_MODE_BAM)
+			mask = QUP_OP_IN_SERVICE_FLAG | QUP_OP_OUT_SERVICE_FLAG;
+
+		writel_relaxed(mask, controller->base + QUP_OPERATIONAL_MASK);
+	}
+
 	return 0;
 }
 
@@ -461,17 +633,13 @@
 	controller->tx_bytes = 0;
 	spin_unlock_irqrestore(&controller->lock, flags);
 
-	if (spi_qup_set_state(controller, QUP_STATE_RUN)) {
-		dev_warn(controller->dev, "cannot set RUN state\n");
-		goto exit;
-	}
+	if (controller->use_dma)
+		ret = spi_qup_do_dma(master, xfer);
+	else
+		ret = spi_qup_do_pio(master, xfer);
 
-	if (spi_qup_set_state(controller, QUP_STATE_PAUSE)) {
-		dev_warn(controller->dev, "cannot set PAUSE state\n");
+	if (ret)
 		goto exit;
-	}
-
-	spi_qup_fifo_write(controller, xfer);
 
 	if (spi_qup_set_state(controller, QUP_STATE_RUN)) {
 		dev_warn(controller->dev, "cannot set EXECUTE state\n");
@@ -480,6 +648,7 @@
 
 	if (!wait_for_completion_timeout(&controller->done, timeout))
 		ret = -ETIMEDOUT;
+
 exit:
 	spi_qup_set_state(controller, QUP_STATE_RESET);
 	spin_lock_irqsave(&controller->lock, flags);
@@ -487,6 +656,97 @@
 	if (!ret)
 		ret = controller->error;
 	spin_unlock_irqrestore(&controller->lock, flags);
+
+	if (ret && controller->use_dma)
+		spi_qup_dma_terminate(master, xfer);
+
+	return ret;
+}
+
+static bool spi_qup_can_dma(struct spi_master *master, struct spi_device *spi,
+			    struct spi_transfer *xfer)
+{
+	struct spi_qup *qup = spi_master_get_devdata(master);
+	size_t dma_align = dma_get_cache_alignment();
+	u32 mode;
+
+	qup->use_dma = 0;
+
+	if (xfer->rx_buf && (xfer->len % qup->in_blk_sz ||
+	    IS_ERR_OR_NULL(master->dma_rx) ||
+	    !IS_ALIGNED((size_t)xfer->rx_buf, dma_align)))
+		return false;
+
+	if (xfer->tx_buf && (xfer->len % qup->out_blk_sz ||
+	    IS_ERR_OR_NULL(master->dma_tx) ||
+	    !IS_ALIGNED((size_t)xfer->tx_buf, dma_align)))
+		return false;
+
+	mode = spi_qup_get_mode(master, xfer);
+	if (mode == QUP_IO_M_MODE_FIFO)
+		return false;
+
+	qup->use_dma = 1;
+
+	return true;
+}
+
+static void spi_qup_release_dma(struct spi_master *master)
+{
+	if (!IS_ERR_OR_NULL(master->dma_rx))
+		dma_release_channel(master->dma_rx);
+	if (!IS_ERR_OR_NULL(master->dma_tx))
+		dma_release_channel(master->dma_tx);
+}
+
+static int spi_qup_init_dma(struct spi_master *master, resource_size_t base)
+{
+	struct spi_qup *spi = spi_master_get_devdata(master);
+	struct dma_slave_config *rx_conf = &spi->rx_conf,
+				*tx_conf = &spi->tx_conf;
+	struct device *dev = spi->dev;
+	int ret;
+
+	/* allocate dma resources, if available */
+	master->dma_rx = dma_request_slave_channel_reason(dev, "rx");
+	if (IS_ERR(master->dma_rx))
+		return PTR_ERR(master->dma_rx);
+
+	master->dma_tx = dma_request_slave_channel_reason(dev, "tx");
+	if (IS_ERR(master->dma_tx)) {
+		ret = PTR_ERR(master->dma_tx);
+		goto err_tx;
+	}
+
+	/* set DMA parameters */
+	rx_conf->direction = DMA_DEV_TO_MEM;
+	rx_conf->device_fc = 1;
+	rx_conf->src_addr = base + QUP_INPUT_FIFO;
+	rx_conf->src_maxburst = spi->in_blk_sz;
+
+	tx_conf->direction = DMA_MEM_TO_DEV;
+	tx_conf->device_fc = 1;
+	tx_conf->dst_addr = base + QUP_OUTPUT_FIFO;
+	tx_conf->dst_maxburst = spi->out_blk_sz;
+
+	ret = dmaengine_slave_config(master->dma_rx, rx_conf);
+	if (ret) {
+		dev_err(dev, "failed to configure RX channel\n");
+		goto err;
+	}
+
+	ret = dmaengine_slave_config(master->dma_tx, tx_conf);
+	if (ret) {
+		dev_err(dev, "failed to configure TX channel\n");
+		goto err;
+	}
+
+	return 0;
+
+err:
+	dma_release_channel(master->dma_tx);
+err_tx:
+	dma_release_channel(master->dma_rx);
 	return ret;
 }
 
@@ -563,6 +823,8 @@
 	master->transfer_one = spi_qup_transfer_one;
 	master->dev.of_node = pdev->dev.of_node;
 	master->auto_runtime_pm = true;
+	master->dma_alignment = dma_get_cache_alignment();
+	master->max_dma_len = SPI_MAX_DMA_XFER;
 
 	platform_set_drvdata(pdev, master);
 
@@ -574,6 +836,12 @@
 	controller->cclk = cclk;
 	controller->irq = irq;
 
+	ret = spi_qup_init_dma(master, res->start);
+	if (ret == -EPROBE_DEFER)
+		goto error;
+	else if (!ret)
+		master->can_dma = spi_qup_can_dma;
+
 	/* set v1 flag if device is version 1 */
 	if (of_device_is_compatible(dev->of_node, "qcom,spi-qup-v1.1.1"))
 		controller->qup_v1 = 1;
@@ -610,7 +878,7 @@
 	ret = spi_qup_set_state(controller, QUP_STATE_RESET);
 	if (ret) {
 		dev_err(dev, "cannot set RESET state\n");
-		goto error;
+		goto error_dma;
 	}
 
 	writel_relaxed(0, base + QUP_OPERATIONAL);
@@ -634,7 +902,7 @@
 	ret = devm_request_irq(dev, irq, spi_qup_qup_irq,
 			       IRQF_TRIGGER_HIGH, pdev->name, controller);
 	if (ret)
-		goto error;
+		goto error_dma;
 
 	pm_runtime_set_autosuspend_delay(dev, MSEC_PER_SEC);
 	pm_runtime_use_autosuspend(dev);
@@ -649,6 +917,8 @@
 
 disable_pm:
 	pm_runtime_disable(&pdev->dev);
+error_dma:
+	spi_qup_release_dma(master);
 error:
 	clk_disable_unprepare(cclk);
 	clk_disable_unprepare(iclk);
@@ -740,6 +1010,8 @@
 	if (ret)
 		return ret;
 
+	spi_qup_release_dma(master);
+
 	clk_disable_unprepare(controller->cclk);
 	clk_disable_unprepare(controller->iclk);
 
diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 1a777dc..68e7efe 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -179,6 +179,7 @@
 	u8 tmode;
 	u8 bpw;
 	u8 n_bytes;
+	u8 rsd_nsecs;
 	unsigned len;
 	u32 speed;
 
@@ -302,8 +303,8 @@
 	return 0;
 }
 
-static int rockchip_spi_unprepare_message(struct spi_master *master,
-					  struct spi_message *msg)
+static void rockchip_spi_handle_err(struct spi_master *master,
+				    struct spi_message *msg)
 {
 	unsigned long flags;
 	struct rockchip_spi *rs = spi_master_get_devdata(master);
@@ -313,8 +314,8 @@
 	/*
 	 * For DMA mode, we need terminate DMA channel and flush
 	 * fifo for the next transfer if DMA thansfer timeout.
-	 * unprepare_message() was called by core if transfer complete
-	 * or timeout. Maybe it is reasonable for error handling here.
+	 * handle_err() was called by core if transfer failed.
+	 * Maybe it is reasonable for error handling here.
 	 */
 	if (rs->use_dma) {
 		if (rs->state & RXBUSY) {
@@ -327,6 +328,12 @@
 	}
 
 	spin_unlock_irqrestore(&rs->lock, flags);
+}
+
+static int rockchip_spi_unprepare_message(struct spi_master *master,
+					  struct spi_message *msg)
+{
+	struct rockchip_spi *rs = spi_master_get_devdata(master);
 
 	spi_enable_chip(rs, 0);
 
@@ -493,6 +500,7 @@
 {
 	u32 div = 0;
 	u32 dmacr = 0;
+	int rsd = 0;
 
 	u32 cr0 = (CR0_BHT_8BIT << CR0_BHT_OFFSET)
 		| (CR0_SSD_ONE << CR0_SSD_OFFSET);
@@ -519,9 +527,23 @@
 	}
 
 	/* div doesn't support odd number */
-	div = max_t(u32, rs->max_freq / rs->speed, 1);
+	div = DIV_ROUND_UP(rs->max_freq, rs->speed);
 	div = (div + 1) & 0xfffe;
 
+	/* Rx sample delay is expressed in parent clock cycles (max 3) */
+	rsd = DIV_ROUND_CLOSEST(rs->rsd_nsecs * (rs->max_freq >> 8),
+				1000000000 >> 8);
+	if (!rsd && rs->rsd_nsecs) {
+		pr_warn_once("rockchip-spi: %u Hz are too slow to express %u ns delay\n",
+			     rs->max_freq, rs->rsd_nsecs);
+	} else if (rsd > 3) {
+		rsd = 3;
+		pr_warn_once("rockchip-spi: %u Hz are too fast to express %u ns delay, clamping at %u ns\n",
+			     rs->max_freq, rs->rsd_nsecs,
+			     rsd * 1000000000U / rs->max_freq);
+	}
+	cr0 |= rsd << CR0_RSD_OFFSET;
+
 	writel_relaxed(cr0, rs->regs + ROCKCHIP_SPI_CTRLR0);
 
 	writel_relaxed(rs->len - 1, rs->regs + ROCKCHIP_SPI_CTRLR1);
@@ -614,6 +636,7 @@
 	struct rockchip_spi *rs;
 	struct spi_master *master;
 	struct resource *mem;
+	u32 rsd_nsecs;
 
 	master = spi_alloc_master(&pdev->dev, sizeof(struct rockchip_spi));
 	if (!master)
@@ -665,6 +688,10 @@
 	rs->dev = &pdev->dev;
 	rs->max_freq = clk_get_rate(rs->spiclk);
 
+	if (!of_property_read_u32(pdev->dev.of_node, "rx-sample-delay-ns",
+				  &rsd_nsecs))
+		rs->rsd_nsecs = rsd_nsecs;
+
 	rs->fifo_len = get_fifo_len(rs);
 	if (!rs->fifo_len) {
 		dev_err(&pdev->dev, "Failed to get fifo length\n");
@@ -688,6 +715,7 @@
 	master->prepare_message = rockchip_spi_prepare_message;
 	master->unprepare_message = rockchip_spi_unprepare_message;
 	master->transfer_one = rockchip_spi_transfer_one;
+	master->handle_err = rockchip_spi_handle_err;
 
 	rs->dma_tx.ch = dma_request_slave_channel(rs->dev, "tx");
 	if (!rs->dma_tx.ch)
diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index 46ce470..186924a 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -177,6 +177,13 @@
 #define SPBFCR_RXRST		0x40	/* Receive Buffer Data Reset */
 #define SPBFCR_TXTRG_MASK	0x30	/* Transmit Buffer Data Triggering Number */
 #define SPBFCR_RXTRG_MASK	0x07	/* Receive Buffer Data Triggering Number */
+/* QSPI on R-Car Gen2 */
+#define SPBFCR_TXTRG_1B		0x00	/* 31 bytes (1 byte available) */
+#define SPBFCR_TXTRG_32B	0x30	/* 0 byte (32 bytes available) */
+#define SPBFCR_RXTRG_1B		0x00	/* 1 byte (31 bytes available) */
+#define SPBFCR_RXTRG_32B	0x07	/* 32 bytes (0 byte available) */
+
+#define QSPI_BUFFER_SIZE        32u
 
 struct rspi_data {
 	void __iomem *addr;
@@ -366,6 +373,52 @@
 	return 0;
 }
 
+static void qspi_update(const struct rspi_data *rspi, u8 mask, u8 val, u8 reg)
+{
+	u8 data;
+
+	data = rspi_read8(rspi, reg);
+	data &= ~mask;
+	data |= (val & mask);
+	rspi_write8(rspi, data, reg);
+}
+
+static int qspi_set_send_trigger(struct rspi_data *rspi, unsigned int len)
+{
+	unsigned int n;
+
+	n = min(len, QSPI_BUFFER_SIZE);
+
+	if (len >= QSPI_BUFFER_SIZE) {
+		/* sets triggering number to 32 bytes */
+		qspi_update(rspi, SPBFCR_TXTRG_MASK,
+			     SPBFCR_TXTRG_32B, QSPI_SPBFCR);
+	} else {
+		/* sets triggering number to 1 byte */
+		qspi_update(rspi, SPBFCR_TXTRG_MASK,
+			     SPBFCR_TXTRG_1B, QSPI_SPBFCR);
+	}
+
+	return n;
+}
+
+static void qspi_set_receive_trigger(struct rspi_data *rspi, unsigned int len)
+{
+	unsigned int n;
+
+	n = min(len, QSPI_BUFFER_SIZE);
+
+	if (len >= QSPI_BUFFER_SIZE) {
+		/* sets triggering number to 32 bytes */
+		qspi_update(rspi, SPBFCR_RXTRG_MASK,
+			     SPBFCR_RXTRG_32B, QSPI_SPBFCR);
+	} else {
+		/* sets triggering number to 1 byte */
+		qspi_update(rspi, SPBFCR_RXTRG_MASK,
+			     SPBFCR_RXTRG_1B, QSPI_SPBFCR);
+	}
+}
+
 #define set_config_register(spi, n) spi->ops->set_config_register(spi, n)
 
 static void rspi_enable_irq(const struct rspi_data *rspi, u8 enable)
@@ -609,18 +662,28 @@
 	return __rspi_can_dma(rspi, xfer);
 }
 
+static int rspi_dma_check_then_transfer(struct rspi_data *rspi,
+					 struct spi_transfer *xfer)
+{
+	if (rspi->master->can_dma && __rspi_can_dma(rspi, xfer)) {
+		/* rx_buf can be NULL on RSPI on SH in TX-only Mode */
+		int ret = rspi_dma_transfer(rspi, &xfer->tx_sg,
+					xfer->rx_buf ? &xfer->rx_sg : NULL);
+		if (ret != -EAGAIN)
+			return 0;
+	}
+
+	return -EAGAIN;
+}
+
 static int rspi_common_transfer(struct rspi_data *rspi,
 				struct spi_transfer *xfer)
 {
 	int ret;
 
-	if (rspi->master->can_dma && __rspi_can_dma(rspi, xfer)) {
-		/* rx_buf can be NULL on RSPI on SH in TX-only Mode */
-		ret = rspi_dma_transfer(rspi, &xfer->tx_sg,
-					xfer->rx_buf ? &xfer->rx_sg : NULL);
-		if (ret != -EAGAIN)
-			return ret;
-	}
+	ret = rspi_dma_check_then_transfer(rspi, xfer);
+	if (ret != -EAGAIN)
+		return ret;
 
 	ret = rspi_pio_transfer(rspi, xfer->tx_buf, xfer->rx_buf, xfer->len);
 	if (ret < 0)
@@ -661,12 +724,59 @@
 	return rspi_common_transfer(rspi, xfer);
 }
 
+static int qspi_trigger_transfer_out_int(struct rspi_data *rspi, const u8 *tx,
+					u8 *rx, unsigned int len)
+{
+	int i, n, ret;
+	int error;
+
+	while (len > 0) {
+		n = qspi_set_send_trigger(rspi, len);
+		qspi_set_receive_trigger(rspi, len);
+		if (n == QSPI_BUFFER_SIZE) {
+			error = rspi_wait_for_tx_empty(rspi);
+			if (error < 0) {
+				dev_err(&rspi->master->dev, "transmit timeout\n");
+				return error;
+			}
+			for (i = 0; i < n; i++)
+				rspi_write_data(rspi, *tx++);
+
+			error = rspi_wait_for_rx_full(rspi);
+			if (error < 0) {
+				dev_err(&rspi->master->dev, "receive timeout\n");
+				return error;
+			}
+			for (i = 0; i < n; i++)
+				*rx++ = rspi_read_data(rspi);
+		} else {
+			ret = rspi_pio_transfer(rspi, tx, rx, n);
+			if (ret < 0)
+				return ret;
+		}
+		len -= n;
+	}
+
+	return 0;
+}
+
 static int qspi_transfer_out_in(struct rspi_data *rspi,
 				struct spi_transfer *xfer)
 {
+	int ret;
+
 	qspi_receive_init(rspi);
 
-	return rspi_common_transfer(rspi, xfer);
+	ret = rspi_dma_check_then_transfer(rspi, xfer);
+	if (ret != -EAGAIN)
+		return ret;
+
+	ret = qspi_trigger_transfer_out_int(rspi, xfer->tx_buf,
+					    xfer->rx_buf, xfer->len);
+	if (ret < 0)
+		return ret;
+
+	return 0;
 }
 
 static int qspi_transfer_out(struct rspi_data *rspi, struct spi_transfer *xfer)
diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index 9231c34..b1c6731 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -324,7 +324,7 @@
 
 		/* Acquire DMA channels */
 		sdd->rx_dma.ch = dma_request_slave_channel_compat(mask, filter,
-				   (void *)sdd->rx_dma.dmach, dev, "rx");
+				   (void *)(long)sdd->rx_dma.dmach, dev, "rx");
 		if (!sdd->rx_dma.ch) {
 			dev_err(dev, "Failed to get RX DMA channel\n");
 			ret = -EBUSY;
@@ -333,7 +333,7 @@
 		spi->dma_rx = sdd->rx_dma.ch;
 
 		sdd->tx_dma.ch = dma_request_slave_channel_compat(mask, filter,
-				   (void *)sdd->tx_dma.dmach, dev, "tx");
+				   (void *)(long)sdd->tx_dma.dmach, dev, "tx");
 		if (!sdd->tx_dma.ch) {
 			dev_err(dev, "Failed to get TX DMA channel\n");
 			ret = -EBUSY;
diff --git a/drivers/spi/spi-sc18is602.c b/drivers/spi/spi-sc18is602.c
index 5a56acf..36af4d4 100644
--- a/drivers/spi/spi-sc18is602.c
+++ b/drivers/spi/spi-sc18is602.c
@@ -286,7 +286,7 @@
 			hw->freq = SC18IS602_CLOCK;
 		break;
 	}
-	master->bus_num = client->adapter->nr;
+	master->bus_num = np ? -1 : client->adapter->nr;
 	master->mode_bits = SPI_CPHA | SPI_CPOL | SPI_LSB_FIRST;
 	master->bits_per_word_mask = SPI_BPW_MASK(8);
 	master->setup = sc18is602_setup;
diff --git a/drivers/spi/spi-st-ssc4.c b/drivers/spi/spi-st-ssc4.c
index 2faeaa7..f17c0ab 100644
--- a/drivers/spi/spi-st-ssc4.c
+++ b/drivers/spi/spi-st-ssc4.c
@@ -482,7 +482,7 @@
 	SET_RUNTIME_PM_OPS(spi_st_runtime_suspend, spi_st_runtime_resume, NULL)
 };
 
-static struct of_device_id stm_spi_match[] = {
+static const struct of_device_id stm_spi_match[] = {
 	{ .compatible = "st,comms-ssc4-spi", },
 	{},
 };
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 57a1950..d5d7d22 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -16,7 +16,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/kmod.h>
 #include <linux/device.h>
 #include <linux/init.h>
 #include <linux/cache.h>
@@ -129,125 +128,11 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int spi_legacy_suspend(struct device *dev, pm_message_t message)
-{
-	int			value = 0;
-	struct spi_driver	*drv = to_spi_driver(dev->driver);
-
-	/* suspend will stop irqs and dma; no more i/o */
-	if (drv) {
-		if (drv->suspend)
-			value = drv->suspend(to_spi_device(dev), message);
-		else
-			dev_dbg(dev, "... can't suspend\n");
-	}
-	return value;
-}
-
-static int spi_legacy_resume(struct device *dev)
-{
-	int			value = 0;
-	struct spi_driver	*drv = to_spi_driver(dev->driver);
-
-	/* resume may restart the i/o queue */
-	if (drv) {
-		if (drv->resume)
-			value = drv->resume(to_spi_device(dev));
-		else
-			dev_dbg(dev, "... can't resume\n");
-	}
-	return value;
-}
-
-static int spi_pm_suspend(struct device *dev)
-{
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	if (pm)
-		return pm_generic_suspend(dev);
-	else
-		return spi_legacy_suspend(dev, PMSG_SUSPEND);
-}
-
-static int spi_pm_resume(struct device *dev)
-{
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	if (pm)
-		return pm_generic_resume(dev);
-	else
-		return spi_legacy_resume(dev);
-}
-
-static int spi_pm_freeze(struct device *dev)
-{
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	if (pm)
-		return pm_generic_freeze(dev);
-	else
-		return spi_legacy_suspend(dev, PMSG_FREEZE);
-}
-
-static int spi_pm_thaw(struct device *dev)
-{
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	if (pm)
-		return pm_generic_thaw(dev);
-	else
-		return spi_legacy_resume(dev);
-}
-
-static int spi_pm_poweroff(struct device *dev)
-{
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	if (pm)
-		return pm_generic_poweroff(dev);
-	else
-		return spi_legacy_suspend(dev, PMSG_HIBERNATE);
-}
-
-static int spi_pm_restore(struct device *dev)
-{
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-
-	if (pm)
-		return pm_generic_restore(dev);
-	else
-		return spi_legacy_resume(dev);
-}
-#else
-#define spi_pm_suspend	NULL
-#define spi_pm_resume	NULL
-#define spi_pm_freeze	NULL
-#define spi_pm_thaw	NULL
-#define spi_pm_poweroff	NULL
-#define spi_pm_restore	NULL
-#endif
-
-static const struct dev_pm_ops spi_pm = {
-	.suspend = spi_pm_suspend,
-	.resume = spi_pm_resume,
-	.freeze = spi_pm_freeze,
-	.thaw = spi_pm_thaw,
-	.poweroff = spi_pm_poweroff,
-	.restore = spi_pm_restore,
-	SET_RUNTIME_PM_OPS(
-		pm_generic_runtime_suspend,
-		pm_generic_runtime_resume,
-		NULL
-	)
-};
-
 struct bus_type spi_bus_type = {
 	.name		= "spi",
 	.dev_groups	= spi_dev_groups,
 	.match		= spi_match_device,
 	.uevent		= spi_uevent,
-	.pm		= &spi_pm,
 };
 EXPORT_SYMBOL_GPL(spi_bus_type);
 
@@ -851,6 +736,9 @@
 	if (msg->status == -EINPROGRESS)
 		msg->status = ret;
 
+	if (msg->status && master->handle_err)
+		master->handle_err(master, msg);
+
 	spi_finalize_current_message(master);
 
 	return ret;
@@ -1360,7 +1248,6 @@
 	spi->dev.of_node = nc;
 
 	/* Register the new device */
-	request_module("%s%s", SPI_MODULE_PREFIX, spi->modalias);
 	rc = spi_add_device(spi);
 	if (rc) {
 		dev_err(&master->dev, "spi_device register error %s\n",
@@ -1894,6 +1781,8 @@
 	if (!spi->max_speed_hz)
 		spi->max_speed_hz = spi->master->max_speed_hz;
 
+	spi_set_cs(spi, false);
+
 	if (spi->master->setup)
 		status = spi->master->setup(spi);
 
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index 4eb7a98..92c909e 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -223,7 +223,7 @@
 	struct spi_transfer	*k_xfers;
 	struct spi_transfer	*k_tmp;
 	struct spi_ioc_transfer *u_tmp;
-	unsigned		n, total;
+	unsigned		n, total, tx_total, rx_total;
 	u8			*tx_buf, *rx_buf;
 	int			status = -EFAULT;
 
@@ -239,33 +239,52 @@
 	tx_buf = spidev->tx_buffer;
 	rx_buf = spidev->rx_buffer;
 	total = 0;
+	tx_total = 0;
+	rx_total = 0;
 	for (n = n_xfers, k_tmp = k_xfers, u_tmp = u_xfers;
 			n;
 			n--, k_tmp++, u_tmp++) {
 		k_tmp->len = u_tmp->len;
 
 		total += k_tmp->len;
-		if (total > bufsiz) {
+		/* Since the function returns the total length of transfers
+		 * on success, restrict the total to positive int values to
+		 * avoid the return value looking like an error.  Also check
+		 * each transfer length to avoid arithmetic overflow.
+		 */
+		if (total > INT_MAX || k_tmp->len > INT_MAX) {
 			status = -EMSGSIZE;
 			goto done;
 		}
 
 		if (u_tmp->rx_buf) {
+			/* this transfer needs space in RX bounce buffer */
+			rx_total += k_tmp->len;
+			if (rx_total > bufsiz) {
+				status = -EMSGSIZE;
+				goto done;
+			}
 			k_tmp->rx_buf = rx_buf;
 			if (!access_ok(VERIFY_WRITE, (u8 __user *)
 						(uintptr_t) u_tmp->rx_buf,
 						u_tmp->len))
 				goto done;
+			rx_buf += k_tmp->len;
 		}
 		if (u_tmp->tx_buf) {
+			/* this transfer needs space in TX bounce buffer */
+			tx_total += k_tmp->len;
+			if (tx_total > bufsiz) {
+				status = -EMSGSIZE;
+				goto done;
+			}
 			k_tmp->tx_buf = tx_buf;
 			if (copy_from_user(tx_buf, (const u8 __user *)
 						(uintptr_t) u_tmp->tx_buf,
 					u_tmp->len))
 				goto done;
+			tx_buf += k_tmp->len;
 		}
-		tx_buf += k_tmp->len;
-		rx_buf += k_tmp->len;
 
 		k_tmp->cs_change = !!u_tmp->cs_change;
 		k_tmp->tx_nbits = u_tmp->tx_nbits;
@@ -303,8 +322,8 @@
 				status = -EFAULT;
 				goto done;
 			}
+			rx_buf += u_tmp->len;
 		}
-		rx_buf += u_tmp->len;
 	}
 	status = total;
 
@@ -684,6 +703,14 @@
 
 static struct class *spidev_class;
 
+#ifdef CONFIG_OF
+static const struct of_device_id spidev_dt_ids[] = {
+	{ .compatible = "rohm,dh2228fv" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, spidev_dt_ids);
+#endif
+
 /*-------------------------------------------------------------------------*/
 
 static int spidev_probe(struct spi_device *spi)
@@ -692,6 +719,17 @@
 	int			status;
 	unsigned long		minor;
 
+	/*
+	 * spidev should never be referenced in DT without a specific
+	 * compatbile string, it is a Linux implementation thing
+	 * rather than a description of the hardware.
+	 */
+	if (spi->dev.of_node && !of_match_device(spidev_dt_ids, &spi->dev)) {
+		dev_err(&spi->dev, "buggy DT: spidev listed directly in DT\n");
+		WARN_ON(spi->dev.of_node &&
+			!of_match_device(spidev_dt_ids, &spi->dev));
+	}
+
 	/* Allocate driver data */
 	spidev = kzalloc(sizeof(*spidev), GFP_KERNEL);
 	if (!spidev)
@@ -758,13 +796,6 @@
 	return 0;
 }
 
-static const struct of_device_id spidev_dt_ids[] = {
-	{ .compatible = "rohm,dh2228fv" },
-	{},
-};
-
-MODULE_DEVICE_TABLE(of, spidev_dt_ids);
-
 static struct spi_driver spidev_spi_driver = {
 	.driver = {
 		.name =		"spidev",
diff --git a/include/linux/intel_mid_dma.h b/include/linux/intel_mid_dma.h
deleted file mode 100644
index 10496bd..0000000
--- a/include/linux/intel_mid_dma.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- *  intel_mid_dma.h - Intel MID DMA Drivers
- *
- *  Copyright (C) 2008-10 Intel Corp
- *  Author: Vinod Koul <vinod.koul@intel.com>
- *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; version 2 of the License.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- *
- */
-#ifndef __INTEL_MID_DMA_H__
-#define __INTEL_MID_DMA_H__
-
-#include <linux/dmaengine.h>
-
-#define DMA_PREP_CIRCULAR_LIST		(1 << 10)
-
-/*DMA mode configurations*/
-enum intel_mid_dma_mode {
-	LNW_DMA_PER_TO_MEM = 0, /*periphral to memory configuration*/
-	LNW_DMA_MEM_TO_PER,	/*memory to periphral configuration*/
-	LNW_DMA_MEM_TO_MEM,	/*mem to mem confg (testing only)*/
-};
-
-/*DMA handshaking*/
-enum intel_mid_dma_hs_mode {
-	LNW_DMA_HW_HS = 0,	/*HW Handshaking only*/
-	LNW_DMA_SW_HS = 1,	/*SW Handshaking not recommended*/
-};
-
-/*Burst size configuration*/
-enum intel_mid_dma_msize {
-	LNW_DMA_MSIZE_1 = 0x0,
-	LNW_DMA_MSIZE_4 = 0x1,
-	LNW_DMA_MSIZE_8 = 0x2,
-	LNW_DMA_MSIZE_16 = 0x3,
-	LNW_DMA_MSIZE_32 = 0x4,
-	LNW_DMA_MSIZE_64 = 0x5,
-};
-
-/**
- * struct intel_mid_dma_slave - DMA slave structure
- *
- * @dirn: DMA trf direction
- * @src_width: tx register width
- * @dst_width: rx register width
- * @hs_mode: HW/SW handshaking mode
- * @cfg_mode: DMA data transfer mode (per-per/mem-per/mem-mem)
- * @src_msize: Source DMA burst size
- * @dst_msize: Dst DMA burst size
- * @per_addr: Periphral address
- * @device_instance: DMA peripheral device instance, we can have multiple
- *		peripheral device connected to single DMAC
- */
-struct intel_mid_dma_slave {
-	enum intel_mid_dma_hs_mode	hs_mode;  /*handshaking*/
-	enum intel_mid_dma_mode		cfg_mode; /*mode configuration*/
-	unsigned int		device_instance; /*0, 1 for periphral instance*/
-	struct dma_slave_config		dma_slave;
-};
-
-#endif /*__INTEL_MID_DMA_H__*/
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 856d34d..d673072 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -162,8 +162,6 @@
  * @remove: Unbinds this driver from the spi device
  * @shutdown: Standard shutdown callback used during system state
  *	transitions such as powerdown/halt and kexec
- * @suspend: Standard suspend callback used during system state transitions
- * @resume: Standard resume callback used during system state transitions
  * @driver: SPI device drivers should initialize the name and owner
  *	field of this structure.
  *
@@ -184,8 +182,6 @@
 	int			(*probe)(struct spi_device *spi);
 	int			(*remove)(struct spi_device *spi);
 	void			(*shutdown)(struct spi_device *spi);
-	int			(*suspend)(struct spi_device *spi, pm_message_t mesg);
-	int			(*resume)(struct spi_device *spi);
 	struct device_driver	driver;
 };
 
@@ -294,6 +290,8 @@
  *                    transfer_one_message are mutually exclusive; when both
  *                    are set, the generic subsystem does not call your
  *                    transfer_one callback.
+ * @handle_err: the subsystem calls the driver to handle an error that occurs
+ *		in the generic implementation of transfer_one_message().
  * @unprepare_message: undo any work done by prepare_message().
  * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS
  *	number. Any individual value may be -ENOENT for CS lines that
@@ -448,6 +446,8 @@
 	void (*set_cs)(struct spi_device *spi, bool enable);
 	int (*transfer_one)(struct spi_master *master, struct spi_device *spi,
 			    struct spi_transfer *transfer);
+	void (*handle_err)(struct spi_master *master,
+			   struct spi_message *message);
 
 	/* gpio chip select */
 	int			*cs_gpios;