Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/hskinnemoen/atmel-mci-2.6.28

Conflicts:
	drivers/mmc/host/atmel-mci.c
diff --git a/arch/avr32/boards/atngw100/setup.c b/arch/avr32/boards/atngw100/setup.c
index b8286f1..f308520 100644
--- a/arch/avr32/boards/atngw100/setup.c
+++ b/arch/avr32/boards/atngw100/setup.c
@@ -53,8 +53,11 @@
 };
 
 static struct mci_platform_data __initdata mci0_data = {
-	.detect_pin	= GPIO_PIN_PC(25),
-	.wp_pin		= GPIO_PIN_PE(0),
+	.slot[0] = {
+		.bus_width	= 4,
+		.detect_pin	= GPIO_PIN_PC(25),
+		.wp_pin		= GPIO_PIN_PE(0),
+	},
 };
 
 /*
diff --git a/arch/avr32/boards/atstk1000/atstk1002.c b/arch/avr32/boards/atstk1000/atstk1002.c
index dfc3443..4fedbc4 100644
--- a/arch/avr32/boards/atstk1000/atstk1002.c
+++ b/arch/avr32/boards/atstk1000/atstk1002.c
@@ -264,16 +264,20 @@
 
 #ifndef CONFIG_BOARD_ATSTK100X_SW2_CUSTOM
 
+static struct mci_platform_data __initdata mci0_data = {
+	.slot[0] = {
+		.bus_width	= 4,
+
 /* MMC card detect requires MACB0 *NOT* be used */
 #ifdef CONFIG_BOARD_ATSTK1002_SW6_CUSTOM
-static struct mci_platform_data __initdata mci0_data = {
-	.detect_pin	= GPIO_PIN_PC(14),	/* gpio30/sdcd */
-	.wp_pin		= GPIO_PIN_PC(15),	/* gpio31/sdwp */
-};
-#define MCI_PDATA	&mci0_data
+		.detect_pin	= GPIO_PIN_PC(14), /* gpio30/sdcd */
+		.wp_pin		= GPIO_PIN_PC(15), /* gpio31/sdwp */
 #else
-#define MCI_PDATA	NULL
+		.detect_pin	= -ENODEV,
+		.wp_pin		= -ENODEV,
 #endif	/* SW6 for sd{cd,wp} routing */
+	},
+};
 
 #endif	/* SW2 for MMC signal routing */
 
@@ -326,7 +330,7 @@
 	at32_add_device_spi(1, spi1_board_info, ARRAY_SIZE(spi1_board_info));
 #endif
 #ifndef CONFIG_BOARD_ATSTK100X_SW2_CUSTOM
-	at32_add_device_mci(0, MCI_PDATA);
+	at32_add_device_mci(0, &mci0_pdata);
 #endif
 #ifdef CONFIG_BOARD_ATSTK1002_SW5_CUSTOM
 	set_hw_addr(at32_add_device_eth(1, &eth_data[1]));
diff --git a/arch/avr32/boards/atstk1000/atstk1003.c b/arch/avr32/boards/atstk1000/atstk1003.c
index 0cf6641..acc6123 100644
--- a/arch/avr32/boards/atstk1000/atstk1003.c
+++ b/arch/avr32/boards/atstk1000/atstk1003.c
@@ -66,6 +66,16 @@
 } };
 #endif
 
+#ifndef CONFIG_BOARD_ATSTK100X_SW2_CUSTOM
+static struct mci_platform_data __initdata mci0_data = {
+	.slot[0] = {
+		.bus_width	= 4,
+		.detect_pin	= -ENODEV,
+		.wp_pin		= -ENODEV,
+	},
+};
+#endif
+
 #ifdef CONFIG_BOARD_ATSTK1000_EXTDAC
 static void __init atstk1003_setup_extdac(void)
 {
@@ -154,7 +164,7 @@
 	at32_add_device_spi(1, spi1_board_info, ARRAY_SIZE(spi1_board_info));
 #endif
 #ifndef CONFIG_BOARD_ATSTK100X_SW2_CUSTOM
-	at32_add_device_mci(0, NULL);
+	at32_add_device_mci(0, &mci0_data);
 #endif
 	at32_add_device_usba(0, NULL);
 #ifndef CONFIG_BOARD_ATSTK100X_SW3_CUSTOM
diff --git a/arch/avr32/boards/atstk1000/atstk1004.c b/arch/avr32/boards/atstk1000/atstk1004.c
index 50a5273..d6a2d02 100644
--- a/arch/avr32/boards/atstk1000/atstk1004.c
+++ b/arch/avr32/boards/atstk1000/atstk1004.c
@@ -71,6 +71,16 @@
 } };
 #endif
 
+#ifndef CONFIG_BOARD_ATSTK100X_SW2_CUSTOM
+static struct mci_platform_data __initdata mci0_data = {
+	.slot[0] = {
+		.bus_width	= 4,
+		.detect_pin	= -ENODEV,
+		.wp_pin		= -ENODEV,
+	},
+};
+#endif
+
 #ifdef CONFIG_BOARD_ATSTK1000_EXTDAC
 static void __init atstk1004_setup_extdac(void)
 {
@@ -137,7 +147,7 @@
 	at32_add_device_spi(1, spi1_board_info, ARRAY_SIZE(spi1_board_info));
 #endif
 #ifndef CONFIG_BOARD_ATSTK100X_SW2_CUSTOM
-	at32_add_device_mci(0, NULL);
+	at32_add_device_mci(0, &mci0_data);
 #endif
 	at32_add_device_lcdc(0, &atstk1000_lcdc_data,
 			     fbmem_start, fbmem_size, 0);
diff --git a/arch/avr32/include/asm/atmel-mci.h b/arch/avr32/include/asm/atmel-mci.h
index c2ea6e1..59f3fad 100644
--- a/arch/avr32/include/asm/atmel-mci.h
+++ b/arch/avr32/include/asm/atmel-mci.h
@@ -1,9 +1,39 @@
 #ifndef __ASM_AVR32_ATMEL_MCI_H
 #define __ASM_AVR32_ATMEL_MCI_H
 
-struct mci_platform_data {
+#define ATMEL_MCI_MAX_NR_SLOTS	2
+
+struct dma_slave;
+
+/**
+ * struct mci_slot_pdata - board-specific per-slot configuration
+ * @bus_width: Number of data lines wired up the slot
+ * @detect_pin: GPIO pin wired to the card detect switch
+ * @wp_pin: GPIO pin wired to the write protect sensor
+ *
+ * If a given slot is not present on the board, @bus_width should be
+ * set to 0. The other fields are ignored in this case.
+ *
+ * Any pins that aren't available should be set to a negative value.
+ *
+ * Note that support for multiple slots is experimental -- some cards
+ * might get upset if we don't get the clock management exactly right.
+ * But in most cases, it should work just fine.
+ */
+struct mci_slot_pdata {
+	unsigned int		bus_width;
 	int			detect_pin;
 	int			wp_pin;
 };
 
+/**
+ * struct mci_platform_data - board-specific MMC/SDcard configuration
+ * @dma_slave: DMA slave interface to use in data transfers, or NULL.
+ * @slot: Per-slot configuration data.
+ */
+struct mci_platform_data {
+	struct dma_slave	*dma_slave;
+	struct mci_slot_pdata	slot[ATMEL_MCI_MAX_NR_SLOTS];
+};
+
 #endif /* __ASM_AVR32_ATMEL_MCI_H */
diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index e01dbe4..f1b9a3a 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -1272,10 +1272,14 @@
 struct platform_device *__init
 at32_add_device_mci(unsigned int id, struct mci_platform_data *data)
 {
-	struct mci_platform_data	_data;
 	struct platform_device		*pdev;
+	struct dw_dma_slave		*dws;
 
-	if (id != 0)
+	if (id != 0 || !data)
+		return NULL;
+
+	/* Must have at least one usable slot */
+	if (!data->slot[0].bus_width && !data->slot[1].bus_width)
 		return NULL;
 
 	pdev = platform_device_alloc("atmel_mci", id);
@@ -1286,28 +1290,76 @@
 				ARRAY_SIZE(atmel_mci0_resource)))
 		goto fail;
 
-	if (!data) {
-		data = &_data;
-		memset(data, -1, sizeof(struct mci_platform_data));
-		data->detect_pin = GPIO_PIN_NONE;
-		data->wp_pin = GPIO_PIN_NONE;
-	}
+	if (data->dma_slave)
+		dws = kmemdup(to_dw_dma_slave(data->dma_slave),
+				sizeof(struct dw_dma_slave), GFP_KERNEL);
+	else
+		dws = kzalloc(sizeof(struct dw_dma_slave), GFP_KERNEL);
+
+	dws->slave.dev = &pdev->dev;
+	dws->slave.dma_dev = &dw_dmac0_device.dev;
+	dws->slave.reg_width = DMA_SLAVE_WIDTH_32BIT;
+	dws->cfg_hi = (DWC_CFGH_SRC_PER(0)
+				| DWC_CFGH_DST_PER(1));
+	dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL
+				| DWC_CFGL_HS_SRC_POL);
+
+	data->dma_slave = &dws->slave;
 
 	if (platform_device_add_data(pdev, data,
 				sizeof(struct mci_platform_data)))
 		goto fail;
 
-	select_peripheral(PA(10), PERIPH_A, 0);	/* CLK	 */
-	select_peripheral(PA(11), PERIPH_A, 0);	/* CMD	 */
-	select_peripheral(PA(12), PERIPH_A, 0);	/* DATA0 */
-	select_peripheral(PA(13), PERIPH_A, 0);	/* DATA1 */
-	select_peripheral(PA(14), PERIPH_A, 0);	/* DATA2 */
-	select_peripheral(PA(15), PERIPH_A, 0);	/* DATA3 */
+	/* CLK line is common to both slots */
+	select_peripheral(PA(10), PERIPH_A, 0);
 
-	if (gpio_is_valid(data->detect_pin))
-		at32_select_gpio(data->detect_pin, 0);
-	if (gpio_is_valid(data->wp_pin))
-		at32_select_gpio(data->wp_pin, 0);
+	switch (data->slot[0].bus_width) {
+	case 4:
+		select_peripheral(PA(13), PERIPH_A, 0);	/* DATA1 */
+		select_peripheral(PA(14), PERIPH_A, 0);	/* DATA2 */
+		select_peripheral(PA(15), PERIPH_A, 0);	/* DATA3 */
+		/* fall through */
+	case 1:
+		select_peripheral(PA(11), PERIPH_A, 0);	/* CMD	 */
+		select_peripheral(PA(12), PERIPH_A, 0);	/* DATA0 */
+
+		if (gpio_is_valid(data->slot[0].detect_pin))
+			at32_select_gpio(data->slot[0].detect_pin, 0);
+		if (gpio_is_valid(data->slot[0].wp_pin))
+			at32_select_gpio(data->slot[0].wp_pin, 0);
+		break;
+	case 0:
+		/* Slot is unused */
+		break;
+	default:
+		goto fail;
+	}
+
+	switch (data->slot[1].bus_width) {
+	case 4:
+		select_peripheral(PB(8),  PERIPH_B, 0);	/* DATA1 */
+		select_peripheral(PB(9),  PERIPH_B, 0);	/* DATA2 */
+		select_peripheral(PB(10), PERIPH_B, 0);	/* DATA3 */
+		/* fall through */
+	case 1:
+		select_peripheral(PB(6),  PERIPH_B, 0); /* CMD	 */
+		select_peripheral(PB(7),  PERIPH_B, 0); /* DATA0 */
+
+		if (gpio_is_valid(data->slot[1].detect_pin))
+			at32_select_gpio(data->slot[1].detect_pin, 0);
+		if (gpio_is_valid(data->slot[1].wp_pin))
+			at32_select_gpio(data->slot[1].wp_pin, 0);
+		break;
+	case 0:
+		/* Slot is unused */
+		break;
+	default:
+		if (!data->slot[0].bus_width)
+			goto fail;
+
+		data->slot[1].bus_width = 0;
+		break;
+	}
 
 	atmel_mci0_pclk.dev = &pdev->dev;
 
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 38468b2..dfa585f 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -114,6 +114,17 @@
 
 	  If unsure, say N.
 
+config MMC_ATMELMCI_DMA
+	bool "Atmel MCI DMA support (EXPERIMENTAL)"
+	depends on MMC_ATMELMCI && DMA_ENGINE && EXPERIMENTAL
+	help
+	  Say Y here to have the Atmel MCI driver use a DMA engine to
+	  do data transfers and thus increase the throughput and
+	  reduce the CPU utilization. Note that this is highly
+	  experimental and may cause the driver to lock up.
+
+	  If unsure, say N.
+
 config MMC_IMX
 	tristate "Motorola i.MX Multimedia Card Interface support"
 	depends on ARCH_IMX
diff --git a/drivers/mmc/host/atmel-mci-regs.h b/drivers/mmc/host/atmel-mci-regs.h
index 26bd80e..b58364e 100644
--- a/drivers/mmc/host/atmel-mci-regs.h
+++ b/drivers/mmc/host/atmel-mci-regs.h
@@ -25,8 +25,10 @@
 #define MCI_SDCR		0x000c	/* SD Card / SDIO */
 # define MCI_SDCSEL_SLOT_A	(  0 <<  0)	/* Select SD slot A */
 # define MCI_SDCSEL_SLOT_B	(  1 <<  0)	/* Select SD slot A */
-# define MCI_SDCBUS_1BIT	(  0 <<  7)	/* 1-bit data bus */
-# define MCI_SDCBUS_4BIT	(  1 <<  7)	/* 4-bit data bus */
+# define MCI_SDCSEL_MASK	(  3 <<  0)
+# define MCI_SDCBUS_1BIT	(  0 <<  6)	/* 1-bit data bus */
+# define MCI_SDCBUS_4BIT	(  2 <<  6)	/* 4-bit data bus */
+# define MCI_SDCBUS_MASK	(  3 <<  6)
 #define MCI_ARGR		0x0010	/* Command Argument */
 #define MCI_CMDR		0x0014	/* Command */
 # define MCI_CMDR_CMDNB(x)	((x) <<  0)	/* Command Opcode */
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 0000896..7a3f243 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -11,6 +11,8 @@
 #include <linux/clk.h>
 #include <linux/debugfs.h>
 #include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/gpio.h>
 #include <linux/init.h>
@@ -33,64 +35,178 @@
 #include "atmel-mci-regs.h"
 
 #define ATMCI_DATA_ERROR_FLAGS	(MCI_DCRCE | MCI_DTOE | MCI_OVRE | MCI_UNRE)
+#define ATMCI_DMA_THRESHOLD	16
 
 enum {
 	EVENT_CMD_COMPLETE = 0,
-	EVENT_DATA_ERROR,
-	EVENT_DATA_COMPLETE,
-	EVENT_STOP_SENT,
-	EVENT_STOP_COMPLETE,
 	EVENT_XFER_COMPLETE,
+	EVENT_DATA_COMPLETE,
+	EVENT_DATA_ERROR,
 };
 
+enum atmel_mci_state {
+	STATE_IDLE = 0,
+	STATE_SENDING_CMD,
+	STATE_SENDING_DATA,
+	STATE_DATA_BUSY,
+	STATE_SENDING_STOP,
+	STATE_DATA_ERROR,
+};
+
+struct atmel_mci_dma {
+#ifdef CONFIG_MMC_ATMELMCI_DMA
+	struct dma_client		client;
+	struct dma_chan			*chan;
+	struct dma_async_tx_descriptor	*data_desc;
+#endif
+};
+
+/**
+ * struct atmel_mci - MMC controller state shared between all slots
+ * @lock: Spinlock protecting the queue and associated data.
+ * @regs: Pointer to MMIO registers.
+ * @sg: Scatterlist entry currently being processed by PIO code, if any.
+ * @pio_offset: Offset into the current scatterlist entry.
+ * @cur_slot: The slot which is currently using the controller.
+ * @mrq: The request currently being processed on @cur_slot,
+ *	or NULL if the controller is idle.
+ * @cmd: The command currently being sent to the card, or NULL.
+ * @data: The data currently being transferred, or NULL if no data
+ *	transfer is in progress.
+ * @dma: DMA client state.
+ * @data_chan: DMA channel being used for the current data transfer.
+ * @cmd_status: Snapshot of SR taken upon completion of the current
+ *	command. Only valid when EVENT_CMD_COMPLETE is pending.
+ * @data_status: Snapshot of SR taken upon completion of the current
+ *	data transfer. Only valid when EVENT_DATA_COMPLETE or
+ *	EVENT_DATA_ERROR is pending.
+ * @stop_cmdr: Value to be loaded into CMDR when the stop command is
+ *	to be sent.
+ * @tasklet: Tasklet running the request state machine.
+ * @pending_events: Bitmask of events flagged by the interrupt handler
+ *	to be processed by the tasklet.
+ * @completed_events: Bitmask of events which the state machine has
+ *	processed.
+ * @state: Tasklet state.
+ * @queue: List of slots waiting for access to the controller.
+ * @need_clock_update: Update the clock rate before the next request.
+ * @need_reset: Reset controller before next request.
+ * @mode_reg: Value of the MR register.
+ * @bus_hz: The rate of @mck in Hz. This forms the basis for MMC bus
+ *	rate and timeout calculations.
+ * @mapbase: Physical address of the MMIO registers.
+ * @mck: The peripheral bus clock hooked up to the MMC controller.
+ * @pdev: Platform device associated with the MMC controller.
+ * @slot: Slots sharing this MMC controller.
+ *
+ * Locking
+ * =======
+ *
+ * @lock is a softirq-safe spinlock protecting @queue as well as
+ * @cur_slot, @mrq and @state. These must always be updated
+ * at the same time while holding @lock.
+ *
+ * @lock also protects mode_reg and need_clock_update since these are
+ * used to synchronize mode register updates with the queue
+ * processing.
+ *
+ * The @mrq field of struct atmel_mci_slot is also protected by @lock,
+ * and must always be written at the same time as the slot is added to
+ * @queue.
+ *
+ * @pending_events and @completed_events are accessed using atomic bit
+ * operations, so they don't need any locking.
+ *
+ * None of the fields touched by the interrupt handler need any
+ * locking. However, ordering is important: Before EVENT_DATA_ERROR or
+ * EVENT_DATA_COMPLETE is set in @pending_events, all data-related
+ * interrupts must be disabled and @data_status updated with a
+ * snapshot of SR. Similarly, before EVENT_CMD_COMPLETE is set, the
+ * CMDRDY interupt must be disabled and @cmd_status updated with a
+ * snapshot of SR, and before EVENT_XFER_COMPLETE can be set, the
+ * bytes_xfered field of @data must be written. This is ensured by
+ * using barriers.
+ */
 struct atmel_mci {
-	struct mmc_host		*mmc;
+	spinlock_t		lock;
 	void __iomem		*regs;
 
 	struct scatterlist	*sg;
 	unsigned int		pio_offset;
 
+	struct atmel_mci_slot	*cur_slot;
 	struct mmc_request	*mrq;
 	struct mmc_command	*cmd;
 	struct mmc_data		*data;
 
+	struct atmel_mci_dma	dma;
+	struct dma_chan		*data_chan;
+
 	u32			cmd_status;
 	u32			data_status;
-	u32			stop_status;
 	u32			stop_cmdr;
 
-	u32			mode_reg;
-	u32			sdc_reg;
-
 	struct tasklet_struct	tasklet;
 	unsigned long		pending_events;
 	unsigned long		completed_events;
+	enum atmel_mci_state	state;
+	struct list_head	queue;
 
-	int			present;
-	int			detect_pin;
-	int			wp_pin;
-
-	/* For detect pin debouncing */
-	struct timer_list	detect_timer;
-
+	bool			need_clock_update;
+	bool			need_reset;
+	u32			mode_reg;
 	unsigned long		bus_hz;
 	unsigned long		mapbase;
 	struct clk		*mck;
 	struct platform_device	*pdev;
+
+	struct atmel_mci_slot	*slot[ATMEL_MCI_MAX_NR_SLOTS];
 };
 
-#define atmci_is_completed(host, event)				\
-	test_bit(event, &host->completed_events)
+/**
+ * struct atmel_mci_slot - MMC slot state
+ * @mmc: The mmc_host representing this slot.
+ * @host: The MMC controller this slot is using.
+ * @sdc_reg: Value of SDCR to be written before using this slot.
+ * @mrq: mmc_request currently being processed or waiting to be
+ *	processed, or NULL when the slot is idle.
+ * @queue_node: List node for placing this node in the @queue list of
+ *	&struct atmel_mci.
+ * @clock: Clock rate configured by set_ios(). Protected by host->lock.
+ * @flags: Random state bits associated with the slot.
+ * @detect_pin: GPIO pin used for card detection, or negative if not
+ *	available.
+ * @wp_pin: GPIO pin used for card write protect sending, or negative
+ *	if not available.
+ * @detect_timer: Timer used for debouncing @detect_pin interrupts.
+ */
+struct atmel_mci_slot {
+	struct mmc_host		*mmc;
+	struct atmel_mci	*host;
+
+	u32			sdc_reg;
+
+	struct mmc_request	*mrq;
+	struct list_head	queue_node;
+
+	unsigned int		clock;
+	unsigned long		flags;
+#define ATMCI_CARD_PRESENT	0
+#define ATMCI_CARD_NEED_INIT	1
+#define ATMCI_SHUTDOWN		2
+
+	int			detect_pin;
+	int			wp_pin;
+
+	struct timer_list	detect_timer;
+};
+
 #define atmci_test_and_clear_pending(host, event)		\
 	test_and_clear_bit(event, &host->pending_events)
-#define atmci_test_and_set_completed(host, event)		\
-	test_and_set_bit(event, &host->completed_events)
 #define atmci_set_completed(host, event)			\
 	set_bit(event, &host->completed_events)
 #define atmci_set_pending(host, event)				\
 	set_bit(event, &host->pending_events)
-#define atmci_clear_pending(host, event)			\
-	clear_bit(event, &host->pending_events)
 
 /*
  * The debugfs stuff below is mostly optimized away when
@@ -98,14 +214,15 @@
  */
 static int atmci_req_show(struct seq_file *s, void *v)
 {
-	struct atmel_mci	*host = s->private;
-	struct mmc_request	*mrq = host->mrq;
+	struct atmel_mci_slot	*slot = s->private;
+	struct mmc_request	*mrq;
 	struct mmc_command	*cmd;
 	struct mmc_command	*stop;
 	struct mmc_data		*data;
 
 	/* Make sure we get a consistent snapshot */
-	spin_lock_irq(&host->mmc->lock);
+	spin_lock_bh(&slot->host->lock);
+	mrq = slot->mrq;
 
 	if (mrq) {
 		cmd = mrq->cmd;
@@ -130,7 +247,7 @@
 				stop->resp[2], stop->error);
 	}
 
-	spin_unlock_irq(&host->mmc->lock);
+	spin_unlock_bh(&slot->host->lock);
 
 	return 0;
 }
@@ -193,12 +310,16 @@
 	if (!buf)
 		return -ENOMEM;
 
-	/* Grab a more or less consistent snapshot */
-	spin_lock_irq(&host->mmc->lock);
+	/*
+	 * Grab a more or less consistent snapshot. Note that we're
+	 * not disabling interrupts, so IMR and SR may not be
+	 * consistent.
+	 */
+	spin_lock_bh(&host->lock);
 	clk_enable(host->mck);
 	memcpy_fromio(buf, host->regs, MCI_REGS_SIZE);
 	clk_disable(host->mck);
-	spin_unlock_irq(&host->mmc->lock);
+	spin_unlock_bh(&host->lock);
 
 	seq_printf(s, "MR:\t0x%08x%s%s CLKDIV=%u\n",
 			buf[MCI_MR / 4],
@@ -236,13 +357,13 @@
 	.release	= single_release,
 };
 
-static void atmci_init_debugfs(struct atmel_mci *host)
+static void atmci_init_debugfs(struct atmel_mci_slot *slot)
 {
-	struct mmc_host	*mmc;
-	struct dentry	*root;
-	struct dentry	*node;
+	struct mmc_host		*mmc = slot->mmc;
+	struct atmel_mci	*host = slot->host;
+	struct dentry		*root;
+	struct dentry		*node;
 
-	mmc = host->mmc;
 	root = mmc->debugfs_root;
 	if (!root)
 		return;
@@ -254,7 +375,11 @@
 	if (!node)
 		goto err;
 
-	node = debugfs_create_file("req", S_IRUSR, root, host, &atmci_req_fops);
+	node = debugfs_create_file("req", S_IRUSR, root, slot, &atmci_req_fops);
+	if (!node)
+		goto err;
+
+	node = debugfs_create_u32("state", S_IRUSR, root, (u32 *)&host->state);
 	if (!node)
 		goto err;
 
@@ -271,25 +396,7 @@
 	return;
 
 err:
-	dev_err(&host->pdev->dev,
-		"failed to initialize debugfs for controller\n");
-}
-
-static void atmci_enable(struct atmel_mci *host)
-{
-	clk_enable(host->mck);
-	mci_writel(host, CR, MCI_CR_MCIEN);
-	mci_writel(host, MR, host->mode_reg);
-	mci_writel(host, SDCR, host->sdc_reg);
-}
-
-static void atmci_disable(struct atmel_mci *host)
-{
-	mci_writel(host, CR, MCI_CR_SWRST);
-
-	/* Stall until write is complete, then disable the bus clock */
-	mci_readl(host, SR);
-	clk_disable(host->mck);
+	dev_err(&mmc->class_dev, "failed to initialize debugfs for slot\n");
 }
 
 static inline unsigned int ns_to_clocks(struct atmel_mci *host,
@@ -299,7 +406,7 @@
 }
 
 static void atmci_set_timeout(struct atmel_mci *host,
-			      struct mmc_data *data)
+		struct atmel_mci_slot *slot, struct mmc_data *data)
 {
 	static unsigned	dtomul_to_shift[] = {
 		0, 4, 7, 8, 10, 12, 16, 20
@@ -322,7 +429,7 @@
 		dtocyc = 15;
 	}
 
-	dev_vdbg(&host->mmc->class_dev, "setting timeout to %u cycles\n",
+	dev_vdbg(&slot->mmc->class_dev, "setting timeout to %u cycles\n",
 			dtocyc << dtomul_to_shift[dtomul]);
 	mci_writel(host, DTOR, (MCI_DTOMUL(dtomul) | MCI_DTOCYC(dtocyc)));
 }
@@ -375,15 +482,12 @@
 }
 
 static void atmci_start_command(struct atmel_mci *host,
-				struct mmc_command *cmd,
-				u32 cmd_flags)
+		struct mmc_command *cmd, u32 cmd_flags)
 {
-	/* Must read host->cmd after testing event flags */
-	smp_rmb();
 	WARN_ON(host->cmd);
 	host->cmd = cmd;
 
-	dev_vdbg(&host->mmc->class_dev,
+	dev_vdbg(&host->pdev->dev,
 			"start command: ARGR=0x%08x CMDR=0x%08x\n",
 			cmd->arg, cmd_flags);
 
@@ -391,34 +495,157 @@
 	mci_writel(host, CMDR, cmd_flags);
 }
 
-static void send_stop_cmd(struct mmc_host *mmc, struct mmc_data *data)
+static void send_stop_cmd(struct atmel_mci *host, struct mmc_data *data)
 {
-	struct atmel_mci *host = mmc_priv(mmc);
-
 	atmci_start_command(host, data->stop, host->stop_cmdr);
 	mci_writel(host, IER, MCI_CMDRDY);
 }
 
-static void atmci_request_end(struct mmc_host *mmc, struct mmc_request *mrq)
+#ifdef CONFIG_MMC_ATMELMCI_DMA
+static void atmci_dma_cleanup(struct atmel_mci *host)
 {
-	struct atmel_mci *host = mmc_priv(mmc);
+	struct mmc_data			*data = host->data;
 
-	WARN_ON(host->cmd || host->data);
-	host->mrq = NULL;
-
-	atmci_disable(host);
-
-	mmc_request_done(mmc, mrq);
+	dma_unmap_sg(&host->pdev->dev, data->sg, data->sg_len,
+		     ((data->flags & MMC_DATA_WRITE)
+		      ? DMA_TO_DEVICE : DMA_FROM_DEVICE));
 }
 
+static void atmci_stop_dma(struct atmel_mci *host)
+{
+	struct dma_chan *chan = host->data_chan;
+
+	if (chan) {
+		chan->device->device_terminate_all(chan);
+		atmci_dma_cleanup(host);
+	} else {
+		/* Data transfer was stopped by the interrupt handler */
+		atmci_set_pending(host, EVENT_XFER_COMPLETE);
+		mci_writel(host, IER, MCI_NOTBUSY);
+	}
+}
+
+/* This function is called by the DMA driver from tasklet context. */
+static void atmci_dma_complete(void *arg)
+{
+	struct atmel_mci	*host = arg;
+	struct mmc_data		*data = host->data;
+
+	dev_vdbg(&host->pdev->dev, "DMA complete\n");
+
+	atmci_dma_cleanup(host);
+
+	/*
+	 * If the card was removed, data will be NULL. No point trying
+	 * to send the stop command or waiting for NBUSY in this case.
+	 */
+	if (data) {
+		atmci_set_pending(host, EVENT_XFER_COMPLETE);
+		tasklet_schedule(&host->tasklet);
+
+		/*
+		 * Regardless of what the documentation says, we have
+		 * to wait for NOTBUSY even after block read
+		 * operations.
+		 *
+		 * When the DMA transfer is complete, the controller
+		 * may still be reading the CRC from the card, i.e.
+		 * the data transfer is still in progress and we
+		 * haven't seen all the potential error bits yet.
+		 *
+		 * The interrupt handler will schedule a different
+		 * tasklet to finish things up when the data transfer
+		 * is completely done.
+		 *
+		 * We may not complete the mmc request here anyway
+		 * because the mmc layer may call back and cause us to
+		 * violate the "don't submit new operations from the
+		 * completion callback" rule of the dma engine
+		 * framework.
+		 */
+		mci_writel(host, IER, MCI_NOTBUSY);
+	}
+}
+
+static int
+atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data)
+{
+	struct dma_chan			*chan;
+	struct dma_async_tx_descriptor	*desc;
+	struct scatterlist		*sg;
+	unsigned int			i;
+	enum dma_data_direction		direction;
+
+	/*
+	 * We don't do DMA on "complex" transfers, i.e. with
+	 * non-word-aligned buffers or lengths. Also, we don't bother
+	 * with all the DMA setup overhead for short transfers.
+	 */
+	if (data->blocks * data->blksz < ATMCI_DMA_THRESHOLD)
+		return -EINVAL;
+	if (data->blksz & 3)
+		return -EINVAL;
+
+	for_each_sg(data->sg, sg, data->sg_len, i) {
+		if (sg->offset & 3 || sg->length & 3)
+			return -EINVAL;
+	}
+
+	/* If we don't have a channel, we can't do DMA */
+	chan = host->dma.chan;
+	if (chan) {
+		dma_chan_get(chan);
+		host->data_chan = chan;
+	}
+
+	if (!chan)
+		return -ENODEV;
+
+	if (data->flags & MMC_DATA_READ)
+		direction = DMA_FROM_DEVICE;
+	else
+		direction = DMA_TO_DEVICE;
+
+	desc = chan->device->device_prep_slave_sg(chan,
+			data->sg, data->sg_len, direction,
+			DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!desc)
+		return -ENOMEM;
+
+	host->dma.data_desc = desc;
+	desc->callback = atmci_dma_complete;
+	desc->callback_param = host;
+	desc->tx_submit(desc);
+
+	/* Go! */
+	chan->device->device_issue_pending(chan);
+
+	return 0;
+}
+
+#else /* CONFIG_MMC_ATMELMCI_DMA */
+
+static int atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data)
+{
+	return -ENOSYS;
+}
+
+static void atmci_stop_dma(struct atmel_mci *host)
+{
+	/* Data transfer was stopped by the interrupt handler */
+	atmci_set_pending(host, EVENT_XFER_COMPLETE);
+	mci_writel(host, IER, MCI_NOTBUSY);
+}
+
+#endif /* CONFIG_MMC_ATMELMCI_DMA */
+
 /*
  * Returns a mask of interrupt flags to be enabled after the whole
  * request has been prepared.
  */
-static u32 atmci_submit_data(struct mmc_host *mmc, struct mmc_data *data)
+static u32 atmci_submit_data(struct atmel_mci *host, struct mmc_data *data)
 {
-	struct atmel_mci	*host = mmc_priv(mmc);
-	u32			iflags;
+	u32 iflags;
 
 	data->error = -EINPROGRESS;
 
@@ -426,77 +653,89 @@
 	host->sg = NULL;
 	host->data = data;
 
-	dev_vdbg(&mmc->class_dev, "BLKR=0x%08x\n",
-			MCI_BCNT(data->blocks) | MCI_BLKLEN(data->blksz));
-
 	iflags = ATMCI_DATA_ERROR_FLAGS;
-	host->sg = data->sg;
-	host->pio_offset = 0;
-	if (data->flags & MMC_DATA_READ)
-		iflags |= MCI_RXRDY;
-	else
-		iflags |= MCI_TXRDY;
+	if (atmci_submit_data_dma(host, data)) {
+		host->data_chan = NULL;
+
+		/*
+		 * Errata: MMC data write operation with less than 12
+		 * bytes is impossible.
+		 *
+		 * Errata: MCI Transmit Data Register (TDR) FIFO
+		 * corruption when length is not multiple of 4.
+		 */
+		if (data->blocks * data->blksz < 12
+				|| (data->blocks * data->blksz) & 3)
+			host->need_reset = true;
+
+		host->sg = data->sg;
+		host->pio_offset = 0;
+		if (data->flags & MMC_DATA_READ)
+			iflags |= MCI_RXRDY;
+		else
+			iflags |= MCI_TXRDY;
+	}
 
 	return iflags;
 }
 
-static void atmci_request(struct mmc_host *mmc, struct mmc_request *mrq)
+static void atmci_start_request(struct atmel_mci *host,
+		struct atmel_mci_slot *slot)
 {
-	struct atmel_mci	*host = mmc_priv(mmc);
-	struct mmc_data		*data;
+	struct mmc_request	*mrq;
 	struct mmc_command	*cmd;
+	struct mmc_data		*data;
 	u32			iflags;
-	u32			cmdflags = 0;
+	u32			cmdflags;
+
+	mrq = slot->mrq;
+	host->cur_slot = slot;
+	host->mrq = mrq;
+
+	host->pending_events = 0;
+	host->completed_events = 0;
+	host->data_status = 0;
+
+	if (host->need_reset) {
+		mci_writel(host, CR, MCI_CR_SWRST);
+		mci_writel(host, CR, MCI_CR_MCIEN);
+		mci_writel(host, MR, host->mode_reg);
+		host->need_reset = false;
+	}
+	mci_writel(host, SDCR, slot->sdc_reg);
 
 	iflags = mci_readl(host, IMR);
 	if (iflags)
-		dev_warn(&mmc->class_dev, "WARNING: IMR=0x%08x\n",
-				mci_readl(host, IMR));
+		dev_warn(&slot->mmc->class_dev, "WARNING: IMR=0x%08x\n",
+				iflags);
 
-	WARN_ON(host->mrq != NULL);
-
-	/*
-	 * We may "know" the card is gone even though there's still an
-	 * electrical connection. If so, we really need to communicate
-	 * this to the MMC core since there won't be any more
-	 * interrupts as the card is completely removed. Otherwise,
-	 * the MMC core might believe the card is still there even
-	 * though the card was just removed very slowly.
-	 */
-	if (!host->present) {
-		mrq->cmd->error = -ENOMEDIUM;
-		mmc_request_done(mmc, mrq);
-		return;
+	if (unlikely(test_and_clear_bit(ATMCI_CARD_NEED_INIT, &slot->flags))) {
+		/* Send init sequence (74 clock cycles) */
+		mci_writel(host, CMDR, MCI_CMDR_SPCMD_INIT);
+		while (!(mci_readl(host, SR) & MCI_CMDRDY))
+			cpu_relax();
 	}
-
-	host->mrq = mrq;
-	host->pending_events = 0;
-	host->completed_events = 0;
-
-	atmci_enable(host);
-
-	/* We don't support multiple blocks of weird lengths. */
 	data = mrq->data;
 	if (data) {
-		if (data->blocks > 1 && data->blksz & 3)
-			goto fail;
-		atmci_set_timeout(host, data);
+		atmci_set_timeout(host, slot, data);
 
 		/* Must set block count/size before sending command */
 		mci_writel(host, BLKR, MCI_BCNT(data->blocks)
 				| MCI_BLKLEN(data->blksz));
+		dev_vdbg(&slot->mmc->class_dev, "BLKR=0x%08x\n",
+			MCI_BCNT(data->blocks) | MCI_BLKLEN(data->blksz));
 	}
 
 	iflags = MCI_CMDRDY;
 	cmd = mrq->cmd;
-	cmdflags = atmci_prepare_command(mmc, cmd);
+	cmdflags = atmci_prepare_command(slot->mmc, cmd);
 	atmci_start_command(host, cmd, cmdflags);
 
 	if (data)
-		iflags |= atmci_submit_data(mmc, data);
+		iflags |= atmci_submit_data(host, data);
 
 	if (mrq->stop) {
-		host->stop_cmdr = atmci_prepare_command(mmc, mrq->stop);
+		host->stop_cmdr = atmci_prepare_command(slot->mmc, mrq->stop);
 		host->stop_cmdr |= MCI_CMDR_STOP_XFER;
 		if (!(data->flags & MMC_DATA_WRITE))
 			host->stop_cmdr |= MCI_CMDR_TRDIR_READ;
@@ -513,59 +752,156 @@
 	 * prepared yet.)
 	 */
 	mci_writel(host, IER, iflags);
+}
 
-	return;
+static void atmci_queue_request(struct atmel_mci *host,
+		struct atmel_mci_slot *slot, struct mmc_request *mrq)
+{
+	dev_vdbg(&slot->mmc->class_dev, "queue request: state=%d\n",
+			host->state);
 
-fail:
-	atmci_disable(host);
-	host->mrq = NULL;
-	mrq->cmd->error = -EINVAL;
-	mmc_request_done(mmc, mrq);
+	spin_lock_bh(&host->lock);
+	slot->mrq = mrq;
+	if (host->state == STATE_IDLE) {
+		host->state = STATE_SENDING_CMD;
+		atmci_start_request(host, slot);
+	} else {
+		list_add_tail(&slot->queue_node, &host->queue);
+	}
+	spin_unlock_bh(&host->lock);
+}
+
+static void atmci_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+	struct atmel_mci_slot	*slot = mmc_priv(mmc);
+	struct atmel_mci	*host = slot->host;
+	struct mmc_data		*data;
+
+	WARN_ON(slot->mrq);
+
+	/*
+	 * We may "know" the card is gone even though there's still an
+	 * electrical connection. If so, we really need to communicate
+	 * this to the MMC core since there won't be any more
+	 * interrupts as the card is completely removed. Otherwise,
+	 * the MMC core might believe the card is still there even
+	 * though the card was just removed very slowly.
+	 */
+	if (!test_bit(ATMCI_CARD_PRESENT, &slot->flags)) {
+		mrq->cmd->error = -ENOMEDIUM;
+		mmc_request_done(mmc, mrq);
+		return;
+	}
+
+	/* We don't support multiple blocks of weird lengths. */
+	data = mrq->data;
+	if (data && data->blocks > 1 && data->blksz & 3) {
+		mrq->cmd->error = -EINVAL;
+		mmc_request_done(mmc, mrq);
+	}
+
+	atmci_queue_request(host, slot, mrq);
 }
 
 static void atmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 {
-	struct atmel_mci	*host = mmc_priv(mmc);
+	struct atmel_mci_slot	*slot = mmc_priv(mmc);
+	struct atmel_mci	*host = slot->host;
+	unsigned int		i;
+
+	slot->sdc_reg &= ~MCI_SDCBUS_MASK;
+	switch (ios->bus_width) {
+	case MMC_BUS_WIDTH_1:
+		slot->sdc_reg |= MCI_SDCBUS_1BIT;
+		break;
+	case MMC_BUS_WIDTH_4:
+		slot->sdc_reg = MCI_SDCBUS_4BIT;
+		break;
+	}
 
 	if (ios->clock) {
+		unsigned int clock_min = ~0U;
 		u32 clkdiv;
 
-		/* Set clock rate */
-		clkdiv = DIV_ROUND_UP(host->bus_hz, 2 * ios->clock) - 1;
+		spin_lock_bh(&host->lock);
+		if (!host->mode_reg) {
+			clk_enable(host->mck);
+			mci_writel(host, CR, MCI_CR_SWRST);
+			mci_writel(host, CR, MCI_CR_MCIEN);
+		}
+
+		/*
+		 * Use mirror of ios->clock to prevent race with mmc
+		 * core ios update when finding the minimum.
+		 */
+		slot->clock = ios->clock;
+		for (i = 0; i < ATMEL_MCI_MAX_NR_SLOTS; i++) {
+			if (host->slot[i] && host->slot[i]->clock
+					&& host->slot[i]->clock < clock_min)
+				clock_min = host->slot[i]->clock;
+		}
+
+		/* Calculate clock divider */
+		clkdiv = DIV_ROUND_UP(host->bus_hz, 2 * clock_min) - 1;
 		if (clkdiv > 255) {
 			dev_warn(&mmc->class_dev,
 				"clock %u too slow; using %lu\n",
-				ios->clock, host->bus_hz / (2 * 256));
+				clock_min, host->bus_hz / (2 * 256));
 			clkdiv = 255;
 		}
 
+		/*
+		 * WRPROOF and RDPROOF prevent overruns/underruns by
+		 * stopping the clock when the FIFO is full/empty.
+		 * This state is not expected to last for long.
+		 */
 		host->mode_reg = MCI_MR_CLKDIV(clkdiv) | MCI_MR_WRPROOF
 					| MCI_MR_RDPROOF;
-	}
 
-	switch (ios->bus_width) {
-	case MMC_BUS_WIDTH_1:
-		host->sdc_reg = 0;
-		break;
-	case MMC_BUS_WIDTH_4:
-		host->sdc_reg = MCI_SDCBUS_4BIT;
-		break;
+		if (list_empty(&host->queue))
+			mci_writel(host, MR, host->mode_reg);
+		else
+			host->need_clock_update = true;
+
+		spin_unlock_bh(&host->lock);
+	} else {
+		bool any_slot_active = false;
+
+		spin_lock_bh(&host->lock);
+		slot->clock = 0;
+		for (i = 0; i < ATMEL_MCI_MAX_NR_SLOTS; i++) {
+			if (host->slot[i] && host->slot[i]->clock) {
+				any_slot_active = true;
+				break;
+			}
+		}
+		if (!any_slot_active) {
+			mci_writel(host, CR, MCI_CR_MCIDIS);
+			if (host->mode_reg) {
+				mci_readl(host, MR);
+				clk_disable(host->mck);
+			}
+			host->mode_reg = 0;
+		}
+		spin_unlock_bh(&host->lock);
 	}
 
 	switch (ios->power_mode) {
-	case MMC_POWER_ON:
-		/* Send init sequence (74 clock cycles) */
-		atmci_enable(host);
-		mci_writel(host, CMDR, MCI_CMDR_SPCMD_INIT);
-		while (!(mci_readl(host, SR) & MCI_CMDRDY))
-			cpu_relax();
-		atmci_disable(host);
+	case MMC_POWER_UP:
+		set_bit(ATMCI_CARD_NEED_INIT, &slot->flags);
 		break;
 	default:
 		/*
 		 * TODO: None of the currently available AVR32-based
 		 * boards allow MMC power to be turned off. Implement
 		 * power control when this can be tested properly.
+		 *
+		 * We also need to hook this into the clock management
+		 * somehow so that newly inserted cards aren't
+		 * subjected to a fast clock before we have a chance
+		 * to figure out what the maximum rate is. Currently,
+		 * there's no way to avoid this, and there never will
+		 * be for boards that don't support power control.
 		 */
 		break;
 	}
@@ -573,31 +909,82 @@
 
 static int atmci_get_ro(struct mmc_host *mmc)
 {
-	int			read_only = 0;
-	struct atmel_mci	*host = mmc_priv(mmc);
+	int			read_only = -ENOSYS;
+	struct atmel_mci_slot	*slot = mmc_priv(mmc);
 
-	if (gpio_is_valid(host->wp_pin)) {
-		read_only = gpio_get_value(host->wp_pin);
+	if (gpio_is_valid(slot->wp_pin)) {
+		read_only = gpio_get_value(slot->wp_pin);
 		dev_dbg(&mmc->class_dev, "card is %s\n",
 				read_only ? "read-only" : "read-write");
-	} else {
-		dev_dbg(&mmc->class_dev,
-			"no pin for checking read-only switch."
-			" Assuming write-enable.\n");
 	}
 
 	return read_only;
 }
 
-static struct mmc_host_ops atmci_ops = {
+static int atmci_get_cd(struct mmc_host *mmc)
+{
+	int			present = -ENOSYS;
+	struct atmel_mci_slot	*slot = mmc_priv(mmc);
+
+	if (gpio_is_valid(slot->detect_pin)) {
+		present = !gpio_get_value(slot->detect_pin);
+		dev_dbg(&mmc->class_dev, "card is %spresent\n",
+				present ? "" : "not ");
+	}
+
+	return present;
+}
+
+static const struct mmc_host_ops atmci_ops = {
 	.request	= atmci_request,
 	.set_ios	= atmci_set_ios,
 	.get_ro		= atmci_get_ro,
+	.get_cd		= atmci_get_cd,
 };
 
-static void atmci_command_complete(struct atmel_mci *host,
-			struct mmc_command *cmd, u32 status)
+/* Called with host->lock held */
+static void atmci_request_end(struct atmel_mci *host, struct mmc_request *mrq)
+	__releases(&host->lock)
+	__acquires(&host->lock)
 {
+	struct atmel_mci_slot	*slot = NULL;
+	struct mmc_host		*prev_mmc = host->cur_slot->mmc;
+
+	WARN_ON(host->cmd || host->data);
+
+	/*
+	 * Update the MMC clock rate if necessary. This may be
+	 * necessary if set_ios() is called when a different slot is
+	 * busy transfering data.
+	 */
+	if (host->need_clock_update)
+		mci_writel(host, MR, host->mode_reg);
+
+	host->cur_slot->mrq = NULL;
+	host->mrq = NULL;
+	if (!list_empty(&host->queue)) {
+		slot = list_entry(host->queue.next,
+				struct atmel_mci_slot, queue_node);
+		list_del(&slot->queue_node);
+		dev_vdbg(&host->pdev->dev, "list not empty: %s is next\n",
+				mmc_hostname(slot->mmc));
+		host->state = STATE_SENDING_CMD;
+		atmci_start_request(host, slot);
+	} else {
+		dev_vdbg(&host->pdev->dev, "list empty\n");
+		host->state = STATE_IDLE;
+	}
+
+	spin_unlock(&host->lock);
+	mmc_request_done(prev_mmc, mrq);
+	spin_lock(&host->lock);
+}
+
+static void atmci_command_complete(struct atmel_mci *host,
+			struct mmc_command *cmd)
+{
+	u32		status = host->cmd_status;
+
 	/* Read the response from the card (up to 16 bytes) */
 	cmd->resp[0] = mci_readl(host, RSPR);
 	cmd->resp[1] = mci_readl(host, RSPR);
@@ -614,11 +1001,12 @@
 		cmd->error = 0;
 
 	if (cmd->error) {
-		dev_dbg(&host->mmc->class_dev,
+		dev_dbg(&host->pdev->dev,
 			"command error: status=0x%08x\n", status);
 
 		if (cmd->data) {
 			host->data = NULL;
+			atmci_stop_dma(host);
 			mci_writel(host, IDR, MCI_NOTBUSY
 					| MCI_TXRDY | MCI_RXRDY
 					| ATMCI_DATA_ERROR_FLAGS);
@@ -628,146 +1016,222 @@
 
 static void atmci_detect_change(unsigned long data)
 {
-	struct atmel_mci *host = (struct atmel_mci *)data;
-	struct mmc_request *mrq = host->mrq;
-	int present;
+	struct atmel_mci_slot	*slot = (struct atmel_mci_slot *)data;
+	bool			present;
+	bool			present_old;
 
 	/*
-	 * atmci_remove() sets detect_pin to -1 before freeing the
-	 * interrupt. We must not re-enable the interrupt if it has
-	 * been freed.
+	 * atmci_cleanup_slot() sets the ATMCI_SHUTDOWN flag before
+	 * freeing the interrupt. We must not re-enable the interrupt
+	 * if it has been freed, and if we're shutting down, it
+	 * doesn't really matter whether the card is present or not.
 	 */
 	smp_rmb();
-	if (!gpio_is_valid(host->detect_pin))
+	if (test_bit(ATMCI_SHUTDOWN, &slot->flags))
 		return;
 
-	enable_irq(gpio_to_irq(host->detect_pin));
-	present = !gpio_get_value(host->detect_pin);
+	enable_irq(gpio_to_irq(slot->detect_pin));
+	present = !gpio_get_value(slot->detect_pin);
+	present_old = test_bit(ATMCI_CARD_PRESENT, &slot->flags);
 
-	dev_vdbg(&host->pdev->dev, "detect change: %d (was %d)\n",
-			present, host->present);
+	dev_vdbg(&slot->mmc->class_dev, "detect change: %d (was %d)\n",
+			present, present_old);
 
-	if (present != host->present) {
-		dev_dbg(&host->mmc->class_dev, "card %s\n",
+	if (present != present_old) {
+		struct atmel_mci	*host = slot->host;
+		struct mmc_request	*mrq;
+
+		dev_dbg(&slot->mmc->class_dev, "card %s\n",
 			present ? "inserted" : "removed");
-		host->present = present;
 
-		/* Reset controller if card is gone */
-		if (!present) {
-			mci_writel(host, CR, MCI_CR_SWRST);
-			mci_writel(host, IDR, ~0UL);
-			mci_writel(host, CR, MCI_CR_MCIEN);
-		}
+		spin_lock(&host->lock);
+
+		if (!present)
+			clear_bit(ATMCI_CARD_PRESENT, &slot->flags);
+		else
+			set_bit(ATMCI_CARD_PRESENT, &slot->flags);
 
 		/* Clean up queue if present */
+		mrq = slot->mrq;
 		if (mrq) {
-			/*
-			 * Reset controller to terminate any ongoing
-			 * commands or data transfers.
-			 */
-			mci_writel(host, CR, MCI_CR_SWRST);
+			if (mrq == host->mrq) {
+				/*
+				 * Reset controller to terminate any ongoing
+				 * commands or data transfers.
+				 */
+				mci_writel(host, CR, MCI_CR_SWRST);
+				mci_writel(host, CR, MCI_CR_MCIEN);
+				mci_writel(host, MR, host->mode_reg);
 
-			if (!atmci_is_completed(host, EVENT_CMD_COMPLETE))
-				mrq->cmd->error = -ENOMEDIUM;
-
-			if (mrq->data && !atmci_is_completed(host,
-						EVENT_DATA_COMPLETE)) {
 				host->data = NULL;
-				mrq->data->error = -ENOMEDIUM;
+				host->cmd = NULL;
+
+				switch (host->state) {
+				case STATE_IDLE:
+					break;
+				case STATE_SENDING_CMD:
+					mrq->cmd->error = -ENOMEDIUM;
+					if (!mrq->data)
+						break;
+					/* fall through */
+				case STATE_SENDING_DATA:
+					mrq->data->error = -ENOMEDIUM;
+					atmci_stop_dma(host);
+					break;
+				case STATE_DATA_BUSY:
+				case STATE_DATA_ERROR:
+					if (mrq->data->error == -EINPROGRESS)
+						mrq->data->error = -ENOMEDIUM;
+					if (!mrq->stop)
+						break;
+					/* fall through */
+				case STATE_SENDING_STOP:
+					mrq->stop->error = -ENOMEDIUM;
+					break;
+				}
+
+				atmci_request_end(host, mrq);
+			} else {
+				list_del(&slot->queue_node);
+				mrq->cmd->error = -ENOMEDIUM;
+				if (mrq->data)
+					mrq->data->error = -ENOMEDIUM;
+				if (mrq->stop)
+					mrq->stop->error = -ENOMEDIUM;
+
+				spin_unlock(&host->lock);
+				mmc_request_done(slot->mmc, mrq);
+				spin_lock(&host->lock);
 			}
-			if (mrq->stop && !atmci_is_completed(host,
-						EVENT_STOP_COMPLETE))
-				mrq->stop->error = -ENOMEDIUM;
-
-			host->cmd = NULL;
-			atmci_request_end(host->mmc, mrq);
 		}
+		spin_unlock(&host->lock);
 
-		mmc_detect_change(host->mmc, 0);
+		mmc_detect_change(slot->mmc, 0);
 	}
 }
 
 static void atmci_tasklet_func(unsigned long priv)
 {
-	struct mmc_host		*mmc = (struct mmc_host *)priv;
-	struct atmel_mci	*host = mmc_priv(mmc);
+	struct atmel_mci	*host = (struct atmel_mci *)priv;
 	struct mmc_request	*mrq = host->mrq;
 	struct mmc_data		*data = host->data;
+	struct mmc_command	*cmd = host->cmd;
+	enum atmel_mci_state	state = host->state;
+	enum atmel_mci_state	prev_state;
+	u32			status;
 
-	dev_vdbg(&mmc->class_dev,
-		"tasklet: pending/completed/mask %lx/%lx/%x\n",
-		host->pending_events, host->completed_events,
+	spin_lock(&host->lock);
+
+	state = host->state;
+
+	dev_vdbg(&host->pdev->dev,
+		"tasklet: state %u pending/completed/mask %lx/%lx/%x\n",
+		state, host->pending_events, host->completed_events,
 		mci_readl(host, IMR));
 
-	if (atmci_test_and_clear_pending(host, EVENT_CMD_COMPLETE)) {
-		/*
-		 * host->cmd must be set to NULL before the interrupt
-		 * handler sees EVENT_CMD_COMPLETE
-		 */
-		host->cmd = NULL;
-		smp_wmb();
-		atmci_set_completed(host, EVENT_CMD_COMPLETE);
-		atmci_command_complete(host, mrq->cmd, host->cmd_status);
+	do {
+		prev_state = state;
 
-		if (!mrq->cmd->error && mrq->stop
-				&& atmci_is_completed(host, EVENT_XFER_COMPLETE)
-				&& !atmci_test_and_set_completed(host,
-					EVENT_STOP_SENT))
-			send_stop_cmd(host->mmc, mrq->data);
-	}
-	if (atmci_test_and_clear_pending(host, EVENT_STOP_COMPLETE)) {
-		/*
-		 * host->cmd must be set to NULL before the interrupt
-		 * handler sees EVENT_STOP_COMPLETE
-		 */
-		host->cmd = NULL;
-		smp_wmb();
-		atmci_set_completed(host, EVENT_STOP_COMPLETE);
-		atmci_command_complete(host, mrq->stop, host->stop_status);
-	}
-	if (atmci_test_and_clear_pending(host, EVENT_DATA_ERROR)) {
-		u32 status = host->data_status;
+		switch (state) {
+		case STATE_IDLE:
+			break;
 
-		dev_vdbg(&mmc->class_dev, "data error: status=%08x\n", status);
+		case STATE_SENDING_CMD:
+			if (!atmci_test_and_clear_pending(host,
+						EVENT_CMD_COMPLETE))
+				break;
 
-		atmci_set_completed(host, EVENT_DATA_ERROR);
-		atmci_set_completed(host, EVENT_DATA_COMPLETE);
+			host->cmd = NULL;
+			atmci_set_completed(host, EVENT_CMD_COMPLETE);
+			atmci_command_complete(host, mrq->cmd);
+			if (!mrq->data || cmd->error) {
+				atmci_request_end(host, host->mrq);
+				goto unlock;
+			}
 
-		if (status & MCI_DTOE) {
-			dev_dbg(&mmc->class_dev,
-					"data timeout error\n");
-			data->error = -ETIMEDOUT;
-		} else if (status & MCI_DCRCE) {
-			dev_dbg(&mmc->class_dev, "data CRC error\n");
-			data->error = -EILSEQ;
-		} else {
-			dev_dbg(&mmc->class_dev,
-					"data FIFO error (status=%08x)\n",
-					status);
-			data->error = -EIO;
+			prev_state = state = STATE_SENDING_DATA;
+			/* fall through */
+
+		case STATE_SENDING_DATA:
+			if (atmci_test_and_clear_pending(host,
+						EVENT_DATA_ERROR)) {
+				atmci_stop_dma(host);
+				if (data->stop)
+					send_stop_cmd(host, data);
+				state = STATE_DATA_ERROR;
+				break;
+			}
+
+			if (!atmci_test_and_clear_pending(host,
+						EVENT_XFER_COMPLETE))
+				break;
+
+			atmci_set_completed(host, EVENT_XFER_COMPLETE);
+			prev_state = state = STATE_DATA_BUSY;
+			/* fall through */
+
+		case STATE_DATA_BUSY:
+			if (!atmci_test_and_clear_pending(host,
+						EVENT_DATA_COMPLETE))
+				break;
+
+			host->data = NULL;
+			atmci_set_completed(host, EVENT_DATA_COMPLETE);
+			status = host->data_status;
+			if (unlikely(status & ATMCI_DATA_ERROR_FLAGS)) {
+				if (status & MCI_DTOE) {
+					dev_dbg(&host->pdev->dev,
+							"data timeout error\n");
+					data->error = -ETIMEDOUT;
+				} else if (status & MCI_DCRCE) {
+					dev_dbg(&host->pdev->dev,
+							"data CRC error\n");
+					data->error = -EILSEQ;
+				} else {
+					dev_dbg(&host->pdev->dev,
+						"data FIFO error (status=%08x)\n",
+						status);
+					data->error = -EIO;
+				}
+			} else {
+				data->bytes_xfered = data->blocks * data->blksz;
+				data->error = 0;
+			}
+
+			if (!data->stop) {
+				atmci_request_end(host, host->mrq);
+				goto unlock;
+			}
+
+			prev_state = state = STATE_SENDING_STOP;
+			if (!data->error)
+				send_stop_cmd(host, data);
+			/* fall through */
+
+		case STATE_SENDING_STOP:
+			if (!atmci_test_and_clear_pending(host,
+						EVENT_CMD_COMPLETE))
+				break;
+
+			host->cmd = NULL;
+			atmci_command_complete(host, mrq->stop);
+			atmci_request_end(host, host->mrq);
+			goto unlock;
+
+		case STATE_DATA_ERROR:
+			if (!atmci_test_and_clear_pending(host,
+						EVENT_XFER_COMPLETE))
+				break;
+
+			state = STATE_DATA_BUSY;
+			break;
 		}
+	} while (state != prev_state);
 
-		if (host->present && data->stop
-				&& atmci_is_completed(host, EVENT_CMD_COMPLETE)
-				&& !atmci_test_and_set_completed(
-					host, EVENT_STOP_SENT))
-			send_stop_cmd(host->mmc, data);
+	host->state = state;
 
-		host->data = NULL;
-	}
-	if (atmci_test_and_clear_pending(host, EVENT_DATA_COMPLETE)) {
-		atmci_set_completed(host, EVENT_DATA_COMPLETE);
-
-		if (!atmci_is_completed(host, EVENT_DATA_ERROR)) {
-			data->bytes_xfered = data->blocks * data->blksz;
-			data->error = 0;
-		}
-
-		host->data = NULL;
-	}
-
-	if (host->mrq && !host->cmd && !host->data)
-		atmci_request_end(mmc, host->mrq);
+unlock:
+	spin_unlock(&host->lock);
 }
 
 static void atmci_read_data_pio(struct atmel_mci *host)
@@ -789,6 +1253,7 @@
 			nbytes += 4;
 
 			if (offset == sg->length) {
+				flush_dcache_page(sg_page(sg));
 				host->sg = sg = sg_next(sg);
 				if (!sg)
 					goto done;
@@ -817,9 +1282,11 @@
 			mci_writel(host, IDR, (MCI_NOTBUSY | MCI_RXRDY
 						| ATMCI_DATA_ERROR_FLAGS));
 			host->data_status = status;
+			data->bytes_xfered += nbytes;
+			smp_wmb();
 			atmci_set_pending(host, EVENT_DATA_ERROR);
 			tasklet_schedule(&host->tasklet);
-			break;
+			return;
 		}
 	} while (status & MCI_RXRDY);
 
@@ -832,10 +1299,8 @@
 	mci_writel(host, IDR, MCI_RXRDY);
 	mci_writel(host, IER, MCI_NOTBUSY);
 	data->bytes_xfered += nbytes;
-	atmci_set_completed(host, EVENT_XFER_COMPLETE);
-	if (data->stop && atmci_is_completed(host, EVENT_CMD_COMPLETE)
-			&& !atmci_test_and_set_completed(host, EVENT_STOP_SENT))
-		send_stop_cmd(host->mmc, data);
+	smp_wmb();
+	atmci_set_pending(host, EVENT_XFER_COMPLETE);
 }
 
 static void atmci_write_data_pio(struct atmel_mci *host)
@@ -888,9 +1353,11 @@
 			mci_writel(host, IDR, (MCI_NOTBUSY | MCI_TXRDY
 						| ATMCI_DATA_ERROR_FLAGS));
 			host->data_status = status;
+			data->bytes_xfered += nbytes;
+			smp_wmb();
 			atmci_set_pending(host, EVENT_DATA_ERROR);
 			tasklet_schedule(&host->tasklet);
-			break;
+			return;
 		}
 	} while (status & MCI_TXRDY);
 
@@ -903,38 +1370,26 @@
 	mci_writel(host, IDR, MCI_TXRDY);
 	mci_writel(host, IER, MCI_NOTBUSY);
 	data->bytes_xfered += nbytes;
-	atmci_set_completed(host, EVENT_XFER_COMPLETE);
-	if (data->stop && atmci_is_completed(host, EVENT_CMD_COMPLETE)
-			&& !atmci_test_and_set_completed(host, EVENT_STOP_SENT))
-		send_stop_cmd(host->mmc, data);
+	smp_wmb();
+	atmci_set_pending(host, EVENT_XFER_COMPLETE);
 }
 
-static void atmci_cmd_interrupt(struct mmc_host *mmc, u32 status)
+static void atmci_cmd_interrupt(struct atmel_mci *host, u32 status)
 {
-	struct atmel_mci	*host = mmc_priv(mmc);
-
 	mci_writel(host, IDR, MCI_CMDRDY);
 
-	if (atmci_is_completed(host, EVENT_STOP_SENT)) {
-		host->stop_status = status;
-		atmci_set_pending(host, EVENT_STOP_COMPLETE);
-	} else {
-		host->cmd_status = status;
-		atmci_set_pending(host, EVENT_CMD_COMPLETE);
-	}
-
+	host->cmd_status = status;
+	smp_wmb();
+	atmci_set_pending(host, EVENT_CMD_COMPLETE);
 	tasklet_schedule(&host->tasklet);
 }
 
 static irqreturn_t atmci_interrupt(int irq, void *dev_id)
 {
-	struct mmc_host		*mmc = dev_id;
-	struct atmel_mci	*host = mmc_priv(mmc);
+	struct atmel_mci	*host = dev_id;
 	u32			status, mask, pending;
 	unsigned int		pass_count = 0;
 
-	spin_lock(&mmc->lock);
-
 	do {
 		status = mci_readl(host, SR);
 		mask = mci_readl(host, IMR);
@@ -946,13 +1401,18 @@
 			mci_writel(host, IDR, ATMCI_DATA_ERROR_FLAGS
 					| MCI_RXRDY | MCI_TXRDY);
 			pending &= mci_readl(host, IMR);
+
 			host->data_status = status;
+			smp_wmb();
 			atmci_set_pending(host, EVENT_DATA_ERROR);
 			tasklet_schedule(&host->tasklet);
 		}
 		if (pending & MCI_NOTBUSY) {
-			mci_writel(host, IDR, (MCI_NOTBUSY
-					       | ATMCI_DATA_ERROR_FLAGS));
+			mci_writel(host, IDR,
+					ATMCI_DATA_ERROR_FLAGS | MCI_NOTBUSY);
+			if (!host->data_status)
+				host->data_status = status;
+			smp_wmb();
 			atmci_set_pending(host, EVENT_DATA_COMPLETE);
 			tasklet_schedule(&host->tasklet);
 		}
@@ -962,18 +1422,15 @@
 			atmci_write_data_pio(host);
 
 		if (pending & MCI_CMDRDY)
-			atmci_cmd_interrupt(mmc, status);
+			atmci_cmd_interrupt(host, status);
 	} while (pass_count++ < 5);
 
-	spin_unlock(&mmc->lock);
-
 	return pass_count ? IRQ_HANDLED : IRQ_NONE;
 }
 
 static irqreturn_t atmci_detect_interrupt(int irq, void *dev_id)
 {
-	struct mmc_host		*mmc = dev_id;
-	struct atmel_mci	*host = mmc_priv(mmc);
+	struct atmel_mci_slot	*slot = dev_id;
 
 	/*
 	 * Disable interrupts until the pin has stabilized and check
@@ -981,19 +1438,176 @@
 	 * middle of the timer routine when this interrupt triggers.
 	 */
 	disable_irq_nosync(irq);
-	mod_timer(&host->detect_timer, jiffies + msecs_to_jiffies(20));
+	mod_timer(&slot->detect_timer, jiffies + msecs_to_jiffies(20));
 
 	return IRQ_HANDLED;
 }
 
+#ifdef CONFIG_MMC_ATMELMCI_DMA
+
+static inline struct atmel_mci *
+dma_client_to_atmel_mci(struct dma_client *client)
+{
+	return container_of(client, struct atmel_mci, dma.client);
+}
+
+static enum dma_state_client atmci_dma_event(struct dma_client *client,
+		struct dma_chan *chan, enum dma_state state)
+{
+	struct atmel_mci	*host;
+	enum dma_state_client	ret = DMA_NAK;
+
+	host = dma_client_to_atmel_mci(client);
+
+	switch (state) {
+	case DMA_RESOURCE_AVAILABLE:
+		spin_lock_bh(&host->lock);
+		if (!host->dma.chan) {
+			host->dma.chan = chan;
+			ret = DMA_ACK;
+		}
+		spin_unlock_bh(&host->lock);
+
+		if (ret == DMA_ACK)
+			dev_info(&host->pdev->dev,
+					"Using %s for DMA transfers\n",
+					chan->dev.bus_id);
+		break;
+
+	case DMA_RESOURCE_REMOVED:
+		spin_lock_bh(&host->lock);
+		if (host->dma.chan == chan) {
+			host->dma.chan = NULL;
+			ret = DMA_ACK;
+		}
+		spin_unlock_bh(&host->lock);
+
+		if (ret == DMA_ACK)
+			dev_info(&host->pdev->dev,
+					"Lost %s, falling back to PIO\n",
+					chan->dev.bus_id);
+		break;
+
+	default:
+		break;
+	}
+
+
+	return ret;
+}
+#endif /* CONFIG_MMC_ATMELMCI_DMA */
+
+static int __init atmci_init_slot(struct atmel_mci *host,
+		struct mci_slot_pdata *slot_data, unsigned int id,
+		u32 sdc_reg)
+{
+	struct mmc_host			*mmc;
+	struct atmel_mci_slot		*slot;
+
+	mmc = mmc_alloc_host(sizeof(struct atmel_mci_slot), &host->pdev->dev);
+	if (!mmc)
+		return -ENOMEM;
+
+	slot = mmc_priv(mmc);
+	slot->mmc = mmc;
+	slot->host = host;
+	slot->detect_pin = slot_data->detect_pin;
+	slot->wp_pin = slot_data->wp_pin;
+	slot->sdc_reg = sdc_reg;
+
+	mmc->ops = &atmci_ops;
+	mmc->f_min = DIV_ROUND_UP(host->bus_hz, 512);
+	mmc->f_max = host->bus_hz / 2;
+	mmc->ocr_avail	= MMC_VDD_32_33 | MMC_VDD_33_34;
+	if (slot_data->bus_width >= 4)
+		mmc->caps |= MMC_CAP_4_BIT_DATA;
+
+	mmc->max_hw_segs = 64;
+	mmc->max_phys_segs = 64;
+	mmc->max_req_size = 32768 * 512;
+	mmc->max_blk_size = 32768;
+	mmc->max_blk_count = 512;
+
+	/* Assume card is present initially */
+	set_bit(ATMCI_CARD_PRESENT, &slot->flags);
+	if (gpio_is_valid(slot->detect_pin)) {
+		if (gpio_request(slot->detect_pin, "mmc_detect")) {
+			dev_dbg(&mmc->class_dev, "no detect pin available\n");
+			slot->detect_pin = -EBUSY;
+		} else if (gpio_get_value(slot->detect_pin)) {
+			clear_bit(ATMCI_CARD_PRESENT, &slot->flags);
+		}
+	}
+
+	if (!gpio_is_valid(slot->detect_pin))
+		mmc->caps |= MMC_CAP_NEEDS_POLL;
+
+	if (gpio_is_valid(slot->wp_pin)) {
+		if (gpio_request(slot->wp_pin, "mmc_wp")) {
+			dev_dbg(&mmc->class_dev, "no WP pin available\n");
+			slot->wp_pin = -EBUSY;
+		}
+	}
+
+	host->slot[id] = slot;
+	mmc_add_host(mmc);
+
+	if (gpio_is_valid(slot->detect_pin)) {
+		int ret;
+
+		setup_timer(&slot->detect_timer, atmci_detect_change,
+				(unsigned long)slot);
+
+		ret = request_irq(gpio_to_irq(slot->detect_pin),
+				atmci_detect_interrupt,
+				IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
+				"mmc-detect", slot);
+		if (ret) {
+			dev_dbg(&mmc->class_dev,
+				"could not request IRQ %d for detect pin\n",
+				gpio_to_irq(slot->detect_pin));
+			gpio_free(slot->detect_pin);
+			slot->detect_pin = -EBUSY;
+		}
+	}
+
+	atmci_init_debugfs(slot);
+
+	return 0;
+}
+
+static void __exit atmci_cleanup_slot(struct atmel_mci_slot *slot,
+		unsigned int id)
+{
+	/* Debugfs stuff is cleaned up by mmc core */
+
+	set_bit(ATMCI_SHUTDOWN, &slot->flags);
+	smp_wmb();
+
+	mmc_remove_host(slot->mmc);
+
+	if (gpio_is_valid(slot->detect_pin)) {
+		int pin = slot->detect_pin;
+
+		free_irq(gpio_to_irq(pin), slot);
+		del_timer_sync(&slot->detect_timer);
+		gpio_free(pin);
+	}
+	if (gpio_is_valid(slot->wp_pin))
+		gpio_free(slot->wp_pin);
+
+	slot->host->slot[id] = NULL;
+	mmc_free_host(slot->mmc);
+}
+
 static int __init atmci_probe(struct platform_device *pdev)
 {
 	struct mci_platform_data	*pdata;
-	struct atmel_mci *host;
-	struct mmc_host *mmc;
-	struct resource *regs;
-	int irq;
-	int ret;
+	struct atmel_mci		*host;
+	struct resource			*regs;
+	unsigned int			nr_slots;
+	int				irq;
+	int				ret;
 
 	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!regs)
@@ -1005,15 +1619,13 @@
 	if (irq < 0)
 		return irq;
 
-	mmc = mmc_alloc_host(sizeof(struct atmel_mci), &pdev->dev);
-	if (!mmc)
+	host = kzalloc(sizeof(struct atmel_mci), GFP_KERNEL);
+	if (!host)
 		return -ENOMEM;
 
-	host = mmc_priv(mmc);
 	host->pdev = pdev;
-	host->mmc = mmc;
-	host->detect_pin = pdata->detect_pin;
-	host->wp_pin = pdata->wp_pin;
+	spin_lock_init(&host->lock);
+	INIT_LIST_HEAD(&host->queue);
 
 	host->mck = clk_get(&pdev->dev, "mci_clk");
 	if (IS_ERR(host->mck)) {
@@ -1033,122 +1645,102 @@
 
 	host->mapbase = regs->start;
 
-	mmc->ops = &atmci_ops;
-	mmc->f_min = (host->bus_hz + 511) / 512;
-	mmc->f_max = host->bus_hz / 2;
-	mmc->ocr_avail	= MMC_VDD_32_33 | MMC_VDD_33_34;
-	mmc->caps |= MMC_CAP_4_BIT_DATA;
+	tasklet_init(&host->tasklet, atmci_tasklet_func, (unsigned long)host);
 
-	mmc->max_hw_segs = 64;
-	mmc->max_phys_segs = 64;
-	mmc->max_req_size = 32768 * 512;
-	mmc->max_blk_size = 32768;
-	mmc->max_blk_count = 512;
-
-	tasklet_init(&host->tasklet, atmci_tasklet_func, (unsigned long)mmc);
-
-	ret = request_irq(irq, atmci_interrupt, 0, pdev->dev.bus_id, mmc);
+	ret = request_irq(irq, atmci_interrupt, 0, pdev->dev.bus_id, host);
 	if (ret)
 		goto err_request_irq;
 
-	/* Assume card is present if we don't have a detect pin */
-	host->present = 1;
-	if (gpio_is_valid(host->detect_pin)) {
-		if (gpio_request(host->detect_pin, "mmc_detect")) {
-			dev_dbg(&mmc->class_dev, "no detect pin available\n");
-			host->detect_pin = -1;
-		} else {
-			host->present = !gpio_get_value(host->detect_pin);
-		}
-	}
+#ifdef CONFIG_MMC_ATMELMCI_DMA
+	if (pdata->dma_slave) {
+		struct dma_slave *slave = pdata->dma_slave;
 
-	if (!gpio_is_valid(host->detect_pin))
-		mmc->caps |= MMC_CAP_NEEDS_POLL;
+		slave->tx_reg = regs->start + MCI_TDR;
+		slave->rx_reg = regs->start + MCI_RDR;
 
-	if (gpio_is_valid(host->wp_pin)) {
-		if (gpio_request(host->wp_pin, "mmc_wp")) {
-			dev_dbg(&mmc->class_dev, "no WP pin available\n");
-			host->wp_pin = -1;
-		}
+		/* Try to grab a DMA channel */
+		host->dma.client.event_callback = atmci_dma_event;
+		dma_cap_set(DMA_SLAVE, host->dma.client.cap_mask);
+		host->dma.client.slave = slave;
+
+		dma_async_client_register(&host->dma.client);
+		dma_async_client_chan_request(&host->dma.client);
+	} else {
+		dev_notice(&pdev->dev, "DMA not available, using PIO\n");
 	}
+#endif /* CONFIG_MMC_ATMELMCI_DMA */
 
 	platform_set_drvdata(pdev, host);
 
-	mmc_add_host(mmc);
-
-	if (gpio_is_valid(host->detect_pin)) {
-		setup_timer(&host->detect_timer, atmci_detect_change,
-				(unsigned long)host);
-
-		ret = request_irq(gpio_to_irq(host->detect_pin),
-				atmci_detect_interrupt,
-				IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
-				"mmc-detect", mmc);
-		if (ret) {
-			dev_dbg(&mmc->class_dev,
-				"could not request IRQ %d for detect pin\n",
-				gpio_to_irq(host->detect_pin));
-			gpio_free(host->detect_pin);
-			host->detect_pin = -1;
-		}
+	/* We need at least one slot to succeed */
+	nr_slots = 0;
+	ret = -ENODEV;
+	if (pdata->slot[0].bus_width) {
+		ret = atmci_init_slot(host, &pdata->slot[0],
+				MCI_SDCSEL_SLOT_A, 0);
+		if (!ret)
+			nr_slots++;
+	}
+	if (pdata->slot[1].bus_width) {
+		ret = atmci_init_slot(host, &pdata->slot[1],
+				MCI_SDCSEL_SLOT_B, 1);
+		if (!ret)
+			nr_slots++;
 	}
 
-	dev_info(&mmc->class_dev,
-			"Atmel MCI controller at 0x%08lx irq %d\n",
-			host->mapbase, irq);
+	if (!nr_slots)
+		goto err_init_slot;
 
-	atmci_init_debugfs(host);
+	dev_info(&pdev->dev,
+			"Atmel MCI controller at 0x%08lx irq %d, %u slots\n",
+			host->mapbase, irq, nr_slots);
 
 	return 0;
 
+err_init_slot:
+#ifdef CONFIG_MMC_ATMELMCI_DMA
+	if (pdata->dma_slave)
+		dma_async_client_unregister(&host->dma.client);
+#endif
+	free_irq(irq, host);
 err_request_irq:
 	iounmap(host->regs);
 err_ioremap:
 	clk_put(host->mck);
 err_clk_get:
-	mmc_free_host(mmc);
+	kfree(host);
 	return ret;
 }
 
 static int __exit atmci_remove(struct platform_device *pdev)
 {
-	struct atmel_mci *host = platform_get_drvdata(pdev);
+	struct atmel_mci	*host = platform_get_drvdata(pdev);
+	unsigned int		i;
 
 	platform_set_drvdata(pdev, NULL);
 
-	if (host) {
-		/* Debugfs stuff is cleaned up by mmc core */
-
-		if (gpio_is_valid(host->detect_pin)) {
-			int pin = host->detect_pin;
-
-			/* Make sure the timer doesn't enable the interrupt */
-			host->detect_pin = -1;
-			smp_wmb();
-
-			free_irq(gpio_to_irq(pin), host->mmc);
-			del_timer_sync(&host->detect_timer);
-			gpio_free(pin);
-		}
-
-		mmc_remove_host(host->mmc);
-
-		clk_enable(host->mck);
-		mci_writel(host, IDR, ~0UL);
-		mci_writel(host, CR, MCI_CR_MCIDIS);
-		mci_readl(host, SR);
-		clk_disable(host->mck);
-
-		if (gpio_is_valid(host->wp_pin))
-			gpio_free(host->wp_pin);
-
-		free_irq(platform_get_irq(pdev, 0), host->mmc);
-		iounmap(host->regs);
-
-		clk_put(host->mck);
-
-		mmc_free_host(host->mmc);
+	for (i = 0; i < ATMEL_MCI_MAX_NR_SLOTS; i++) {
+		if (host->slot[i])
+			atmci_cleanup_slot(host->slot[i], i);
 	}
+
+	clk_enable(host->mck);
+	mci_writel(host, IDR, ~0UL);
+	mci_writel(host, CR, MCI_CR_MCIDIS);
+	mci_readl(host, SR);
+	clk_disable(host->mck);
+
+#ifdef CONFIG_MMC_ATMELMCI_DMA
+	if (host->dma.client.slave)
+		dma_async_client_unregister(&host->dma.client);
+#endif
+
+	free_irq(platform_get_irq(pdev, 0), host);
+	iounmap(host->regs);
+
+	clk_put(host->mck);
+	kfree(host);
+
 	return 0;
 }