Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/egtvedt/linux-avr32

Pull AVR32 updates from Hans-Christian Egtvedt.

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/egtvedt/linux-avr32:
  avr32: uapi: be sure of "_UAPI" prefix for all guard macros
  avr32: add kprobe_ctlblk memory struct
  avr32: fix out-of-range jump in large kernels
  avr32: setup crt for early panic()
diff --git a/Documentation/devicetree/bindings/dma/atmel-dma.txt b/Documentation/devicetree/bindings/dma/atmel-dma.txt
index e1f343c..f69bcf5 100644
--- a/Documentation/devicetree/bindings/dma/atmel-dma.txt
+++ b/Documentation/devicetree/bindings/dma/atmel-dma.txt
@@ -28,7 +28,7 @@
 dependent:
   - bit 7-0: peripheral identifier for the hardware handshaking interface. The
   identifier can be different for tx and rx.
-  - bit 11-8: FIFO configuration. 0 for half FIFO, 1 for ALAP, 1 for ASAP.
+  - bit 11-8: FIFO configuration. 0 for half FIFO, 1 for ALAP, 2 for ASAP.
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/dma.txt b/Documentation/devicetree/bindings/powerpc/fsl/dma.txt
index 2a4b4bc..7fc1b01 100644
--- a/Documentation/devicetree/bindings/powerpc/fsl/dma.txt
+++ b/Documentation/devicetree/bindings/powerpc/fsl/dma.txt
@@ -1,33 +1,30 @@
-* Freescale 83xx DMA Controller
+* Freescale DMA Controllers
 
-Freescale PowerPC 83xx have on chip general purpose DMA controllers.
+** Freescale Elo DMA Controller
+   This is a little-endian 4-channel DMA controller, used in Freescale mpc83xx
+   series chips such as mpc8315, mpc8349, mpc8379 etc.
 
 Required properties:
 
-- compatible        : compatible list, contains 2 entries, first is
-		 "fsl,CHIP-dma", where CHIP is the processor
-		 (mpc8349, mpc8360, etc.) and the second is
-		 "fsl,elo-dma"
-- reg               : <registers mapping for DMA general status reg>
-- ranges		: Should be defined as specified in 1) to describe the
-		  DMA controller channels.
+- compatible        : must include "fsl,elo-dma"
+- reg               : DMA General Status Register, i.e. DGSR which contains
+                      status for all the 4 DMA channels
+- ranges            : describes the mapping between the address space of the
+                      DMA channels and the address space of the DMA controller
 - cell-index        : controller index.  0 for controller @ 0x8100
-- interrupts        : <interrupt mapping for DMA IRQ>
+- interrupts        : interrupt specifier for DMA IRQ
 - interrupt-parent  : optional, if needed for interrupt mapping
 
-
 - DMA channel nodes:
-        - compatible        : compatible list, contains 2 entries, first is
-			 "fsl,CHIP-dma-channel", where CHIP is the processor
-			 (mpc8349, mpc8350, etc.) and the second is
-			 "fsl,elo-dma-channel". However, see note below.
-        - reg               : <registers mapping for channel>
-        - cell-index        : dma channel index starts at 0.
+        - compatible        : must include "fsl,elo-dma-channel"
+                              However, see note below.
+        - reg               : DMA channel specific registers
+        - cell-index        : DMA channel index starts at 0.
 
 Optional properties:
-        - interrupts        : <interrupt mapping for DMA channel IRQ>
-			  (on 83xx this is expected to be identical to
-			   the interrupts property of the parent node)
+        - interrupts        : interrupt specifier for DMA channel IRQ
+                              (on 83xx this is expected to be identical to
+                              the interrupts property of the parent node)
         - interrupt-parent  : optional, if needed for interrupt mapping
 
 Example:
@@ -70,30 +67,27 @@
 		};
 	};
 
-* Freescale 85xx/86xx DMA Controller
-
-Freescale PowerPC 85xx/86xx have on chip general purpose DMA controllers.
+** Freescale EloPlus DMA Controller
+   This is a 4-channel DMA controller with extended addresses and chaining,
+   mainly used in Freescale mpc85xx/86xx, Pxxx and BSC series chips, such as
+   mpc8540, mpc8641 p4080, bsc9131 etc.
 
 Required properties:
 
-- compatible        : compatible list, contains 2 entries, first is
-		 "fsl,CHIP-dma", where CHIP is the processor
-		 (mpc8540, mpc8540, etc.) and the second is
-		 "fsl,eloplus-dma"
-- reg               : <registers mapping for DMA general status reg>
+- compatible        : must include "fsl,eloplus-dma"
+- reg               : DMA General Status Register, i.e. DGSR which contains
+                      status for all the 4 DMA channels
 - cell-index        : controller index.  0 for controller @ 0x21000,
                                          1 for controller @ 0xc000
-- ranges		: Should be defined as specified in 1) to describe the
-		  DMA controller channels.
+- ranges            : describes the mapping between the address space of the
+                      DMA channels and the address space of the DMA controller
 
 - DMA channel nodes:
-        - compatible        : compatible list, contains 2 entries, first is
-			 "fsl,CHIP-dma-channel", where CHIP is the processor
-			 (mpc8540, mpc8560, etc.) and the second is
-			 "fsl,eloplus-dma-channel". However, see note below.
-        - cell-index        : dma channel index starts at 0.
-        - reg               : <registers mapping for channel>
-        - interrupts        : <interrupt mapping for DMA channel IRQ>
+        - compatible        : must include "fsl,eloplus-dma-channel"
+                              However, see note below.
+        - cell-index        : DMA channel index starts at 0.
+        - reg               : DMA channel specific registers
+        - interrupts        : interrupt specifier for DMA channel IRQ
         - interrupt-parent  : optional, if needed for interrupt mapping
 
 Example:
@@ -134,6 +128,76 @@
 		};
 	};
 
+** Freescale Elo3 DMA Controller
+   DMA controller which has same function as EloPlus except that Elo3 has 8
+   channels while EloPlus has only 4, it is used in Freescale Txxx and Bxxx
+   series chips, such as t1040, t4240, b4860.
+
+Required properties:
+
+- compatible        : must include "fsl,elo3-dma"
+- reg               : contains two entries for DMA General Status Registers,
+                      i.e. DGSR0 which includes status for channel 1~4, and
+                      DGSR1 for channel 5~8
+- ranges            : describes the mapping between the address space of the
+                      DMA channels and the address space of the DMA controller
+
+- DMA channel nodes:
+        - compatible        : must include "fsl,eloplus-dma-channel"
+        - reg               : DMA channel specific registers
+        - interrupts        : interrupt specifier for DMA channel IRQ
+        - interrupt-parent  : optional, if needed for interrupt mapping
+
+Example:
+dma@100300 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,elo3-dma";
+	reg = <0x100300 0x4>,
+	      <0x100600 0x4>;
+	ranges = <0x0 0x100100 0x500>;
+	dma-channel@0 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x0 0x80>;
+		interrupts = <28 2 0 0>;
+	};
+	dma-channel@80 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x80 0x80>;
+		interrupts = <29 2 0 0>;
+	};
+	dma-channel@100 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x100 0x80>;
+		interrupts = <30 2 0 0>;
+	};
+	dma-channel@180 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x180 0x80>;
+		interrupts = <31 2 0 0>;
+	};
+	dma-channel@300 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x300 0x80>;
+		interrupts = <76 2 0 0>;
+	};
+	dma-channel@380 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x380 0x80>;
+		interrupts = <77 2 0 0>;
+	};
+	dma-channel@400 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x400 0x80>;
+		interrupts = <78 2 0 0>;
+	};
+	dma-channel@480 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x480 0x80>;
+		interrupts = <79 2 0 0>;
+	};
+};
+
 Note on DMA channel compatible properties: The compatible property must say
 "fsl,elo-dma-channel" or "fsl,eloplus-dma-channel" to be used by the Elo DMA
 driver (fsldma).  Any DMA channel used by fsldma cannot be used by another
diff --git a/Documentation/dmatest.txt b/Documentation/dmatest.txt
index a2b5663..dd77a81 100644
--- a/Documentation/dmatest.txt
+++ b/Documentation/dmatest.txt
@@ -15,39 +15,48 @@
 
 	Part 2 - When dmatest is built as a module...
 
-After mounting debugfs and loading the module, the /sys/kernel/debug/dmatest
-folder with nodes will be created. There are two important files located. First
-is the 'run' node that controls run and stop phases of the test, and the second
-one, 'results', is used to get the test case results.
-
-Note that in this case test will not run on load automatically.
-
 Example of usage:
+	% modprobe dmatest channel=dma0chan0 timeout=2000 iterations=1 run=1
+
+...or:
+	% modprobe dmatest
 	% echo dma0chan0 > /sys/module/dmatest/parameters/channel
 	% echo 2000 > /sys/module/dmatest/parameters/timeout
 	% echo 1 > /sys/module/dmatest/parameters/iterations
-	% echo 1 > /sys/kernel/debug/dmatest/run
+	% echo 1 > /sys/module/dmatest/parameters/run
+
+...or on the kernel command line:
+
+	dmatest.channel=dma0chan0 dmatest.timeout=2000 dmatest.iterations=1 dmatest.run=1
 
 Hint: available channel list could be extracted by running the following
 command:
 	% ls -1 /sys/class/dma/
 
-After a while you will start to get messages about current status or error like
-in the original code.
+Once started a message like "dmatest: Started 1 threads using dma0chan0" is
+emitted.  After that only test failure messages are reported until the test
+stops.
 
 Note that running a new test will not stop any in progress test.
 
-The following command should return actual state of the test.
-	% cat /sys/kernel/debug/dmatest/run
+The following command returns the state of the test.
+	% cat /sys/module/dmatest/parameters/run
 
-To wait for test done the user may perform a busy loop that checks the state.
+To wait for test completion userpace can poll 'run' until it is false, or use
+the wait parameter.  Specifying 'wait=1' when loading the module causes module
+initialization to pause until a test run has completed, while reading
+/sys/module/dmatest/parameters/wait waits for any running test to complete
+before returning.  For example, the following scripts wait for 42 tests
+to complete before exiting.  Note that if 'iterations' is set to 'infinite' then
+waiting is disabled.
 
-	% while [ $(cat /sys/kernel/debug/dmatest/run) = "Y" ]
-	> do
-	> 	echo -n "."
-	> 	sleep 1
-	> done
-	> echo
+Example:
+	% modprobe dmatest run=1 iterations=42 wait=1
+	% modprobe -r dmatest
+...or:
+	% modprobe dmatest run=1 iterations=42
+	% cat /sys/module/dmatest/parameters/wait
+	% modprobe -r dmatest
 
 	Part 3 - When built-in in the kernel...
 
@@ -62,21 +71,22 @@
 
 	Part 4 - Gathering the test results
 
-The module provides a storage for the test results in the memory. The gathered
-data could be used after test is done.
+Test results are printed to the kernel log buffer with the format:
 
-The special file 'results' in the debugfs represents gathered data of the in
-progress test. The messages collected are printed to the kernel log as well.
+"dmatest: result <channel>: <test id>: '<error msg>' with src_off=<val> dst_off=<val> len=<val> (<err code>)"
 
 Example of output:
-	% cat /sys/kernel/debug/dmatest/results
-	dma0chan0-copy0: #1: No errors with src_off=0x7bf dst_off=0x8ad len=0x3fea (0)
+	% dmesg | tail -n 1
+	dmatest: result dma0chan0-copy0: #1: No errors with src_off=0x7bf dst_off=0x8ad len=0x3fea (0)
 
 The message format is unified across the different types of errors. A number in
 the parens represents additional information, e.g. error code, error counter,
-or status.
+or status.  A test thread also emits a summary line at completion listing the
+number of tests executed, number that failed, and a result code.
 
-Comparison between buffers is stored to the dedicated structure.
+Example:
+	% dmesg | tail -n 1
+	dmatest: dma0chan0-copy0: summary 1 test, 0 failures 1000 iops 100000 KB/s (0)
 
-Note that the verify result is now accessible only via file 'results' in the
-debugfs.
+The details of a data miscompare error are also emitted, but do not follow the
+above format.
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 0f54333..b6ce00b 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -547,13 +547,11 @@
 should be used.  Of course, for this purpose the device's runtime PM has to be
 enabled earlier by calling pm_runtime_enable().
 
-If the device bus type's or driver's ->probe() callback runs
-pm_runtime_suspend() or pm_runtime_idle() or their asynchronous counterparts,
-they will fail returning -EAGAIN, because the device's usage counter is
-incremented by the driver core before executing ->probe().  Still, it may be
-desirable to suspend the device as soon as ->probe() has finished, so the driver
-core uses pm_runtime_put_sync() to invoke the subsystem-level idle callback for
-the device at that time.
+It may be desirable to suspend the device once ->probe() has finished.
+Therefore the driver core uses the asyncronous pm_request_idle() to submit a
+request to execute the subsystem-level idle callback for the device at that
+time.  A driver that makes use of the runtime autosuspend feature, may want to
+update the last busy mark before returning from ->probe().
 
 Moreover, the driver core prevents runtime PM callbacks from racing with the bus
 notifier callback in __device_release_driver(), which is necessary, because the
@@ -656,7 +654,7 @@
     __pm_runtime_disable() with 'false' as the second argument for every device
     right before executing the subsystem-level .suspend_late() callback for it.
 
-  * During system resume it calls pm_runtime_enable() and pm_runtime_put_sync()
+  * During system resume it calls pm_runtime_enable() and pm_runtime_put()
     for every device right after executing the subsystem-level .resume_early()
     callback and right after executing the subsystem-level .resume() callback
     for it, respectively.
diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index 8e1a024..41bca32 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -404,7 +404,7 @@
 					BIT(slot));
 			if (edma_cc[ctlr]->intr_data[channel].callback)
 				edma_cc[ctlr]->intr_data[channel].callback(
-					channel, DMA_COMPLETE,
+					channel, EDMA_DMA_COMPLETE,
 					edma_cc[ctlr]->intr_data[channel].data);
 		}
 	} while (sh_ipr);
@@ -459,7 +459,7 @@
 								callback) {
 						edma_cc[ctlr]->intr_data[k].
 						callback(k,
-						DMA_CC_ERROR,
+						EDMA_DMA_CC_ERROR,
 						edma_cc[ctlr]->intr_data
 						[k].data);
 					}
diff --git a/arch/arm/include/asm/hardware/iop3xx-adma.h b/arch/arm/include/asm/hardware/iop3xx-adma.h
index 9b28f12..240b29e 100644
--- a/arch/arm/include/asm/hardware/iop3xx-adma.h
+++ b/arch/arm/include/asm/hardware/iop3xx-adma.h
@@ -393,36 +393,6 @@
 	return slot_cnt;
 }
 
-static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
-{
-	return 0;
-}
-
-static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
-					struct iop_adma_chan *chan)
-{
-	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
-
-	switch (chan->device->id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		return hw_desc.dma->dest_addr;
-	case AAU_ID:
-		return hw_desc.aau->dest_addr;
-	default:
-		BUG();
-	}
-	return 0;
-}
-
-
-static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
-					  struct iop_adma_chan *chan)
-{
-	BUG();
-	return 0;
-}
-
 static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
 					struct iop_adma_chan *chan)
 {
diff --git a/arch/arm/include/asm/hardware/iop_adma.h b/arch/arm/include/asm/hardware/iop_adma.h
index 122f86d..250760e 100644
--- a/arch/arm/include/asm/hardware/iop_adma.h
+++ b/arch/arm/include/asm/hardware/iop_adma.h
@@ -82,8 +82,6 @@
  * @slot_cnt: total slots used in an transaction (group of operations)
  * @slots_per_op: number of slots per operation
  * @idx: pool index
- * @unmap_src_cnt: number of xor sources
- * @unmap_len: transaction bytecount
  * @tx_list: list of descriptors that are associated with one operation
  * @async_tx: support for the async_tx api
  * @group_list: list of slots that make up a multi-descriptor transaction
@@ -99,8 +97,6 @@
 	u16 slot_cnt;
 	u16 slots_per_op;
 	u16 idx;
-	u16 unmap_src_cnt;
-	size_t unmap_len;
 	struct list_head tx_list;
 	struct dma_async_tx_descriptor async_tx;
 	union {
diff --git a/arch/arm/mach-iop13xx/include/mach/adma.h b/arch/arm/mach-iop13xx/include/mach/adma.h
index 6d3782d..a86fd0e 100644
--- a/arch/arm/mach-iop13xx/include/mach/adma.h
+++ b/arch/arm/mach-iop13xx/include/mach/adma.h
@@ -218,20 +218,6 @@
 #define iop_chan_pq_slot_count iop_chan_xor_slot_count
 #define iop_chan_pq_zero_sum_slot_count iop_chan_xor_slot_count
 
-static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
-					struct iop_adma_chan *chan)
-{
-	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
-	return hw_desc->dest_addr;
-}
-
-static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc,
-					  struct iop_adma_chan *chan)
-{
-	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
-	return hw_desc->q_dest_addr;
-}
-
 static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
 					struct iop_adma_chan *chan)
 {
@@ -350,18 +336,6 @@
 	hw_desc->desc_ctrl = u_desc_ctrl.value;
 }
 
-static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc)
-{
-	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
-	union {
-		u32 value;
-		struct iop13xx_adma_desc_ctrl field;
-	} u_desc_ctrl;
-
-	u_desc_ctrl.value = hw_desc->desc_ctrl;
-	return u_desc_ctrl.field.pq_xfer_en;
-}
-
 static inline void
 iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
 			  unsigned long flags)
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index d43daf1..4c530a8 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -1992,7 +1992,7 @@
 	if (PCI_CONTROLLER(bus)->iommu)
 		return;
 
-	handle = PCI_CONTROLLER(bus)->acpi_handle;
+	handle = acpi_device_handle(PCI_CONTROLLER(bus)->companion);
 	if (!handle)
 		return;
 
diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
index 80775f5..71fbaaa 100644
--- a/arch/ia64/include/asm/pci.h
+++ b/arch/ia64/include/asm/pci.h
@@ -95,7 +95,7 @@
 };
 
 struct pci_controller {
-	void *acpi_handle;
+	struct acpi_device *companion;
 	void *iommu;
 	int segment;
 	int node;		/* nearest node with memory or -1 for global allocation */
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 5a9ff1c..cb59277 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2166,12 +2166,6 @@
 	.flush		= pfm_flush
 };
 
-static int
-pfmfs_delete_dentry(const struct dentry *dentry)
-{
-	return 1;
-}
-
 static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen)
 {
 	return dynamic_dname(dentry, buffer, buflen, "pfm:[%lu]",
@@ -2179,7 +2173,7 @@
 }
 
 static const struct dentry_operations pfmfs_dentry_operations = {
-	.d_delete = pfmfs_delete_dentry,
+	.d_delete = always_delete_dentry,
 	.d_dname = pfmfs_dname,
 };
 
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 2326790..9e4938d 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -436,9 +436,9 @@
 	if (!controller)
 		return NULL;
 
-	controller->acpi_handle = device->handle;
+	controller->companion = device;
 
-	pxm = acpi_get_pxm(controller->acpi_handle);
+	pxm = acpi_get_pxm(device->handle);
 #ifdef CONFIG_NUMA
 	if (pxm >= 0)
 		controller->node = pxm_to_node(pxm);
@@ -489,7 +489,7 @@
 {
 	struct pci_controller *controller = bridge->bus->sysdata;
 
-	ACPI_HANDLE_SET(&bridge->dev, controller->acpi_handle);
+	ACPI_COMPANION_SET(&bridge->dev, controller->companion);
 	return 0;
 }
 
diff --git a/arch/ia64/sn/kernel/io_acpi_init.c b/arch/ia64/sn/kernel/io_acpi_init.c
index b172539..0640739 100644
--- a/arch/ia64/sn/kernel/io_acpi_init.c
+++ b/arch/ia64/sn/kernel/io_acpi_init.c
@@ -132,7 +132,7 @@
 	struct acpi_resource_vendor_typed *vendor;
 
 
-	handle = PCI_CONTROLLER(bus)->acpi_handle;
+	handle = acpi_device_handle(PCI_CONTROLLER(bus)->companion);
 	status = acpi_get_vendor_resource(handle, METHOD_NAME__CRS,
 					  &sn_uuid, &buffer);
 	if (ACPI_FAILURE(status)) {
@@ -360,7 +360,7 @@
 	acpi_status status;
 	struct acpi_buffer name_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 
-	rootbus_handle = PCI_CONTROLLER(dev)->acpi_handle;
+	rootbus_handle = acpi_device_handle(PCI_CONTROLLER(dev)->companion);
         status = acpi_evaluate_integer(rootbus_handle, METHOD_NAME__SEG, NULL,
                                        &segment);
         if (ACPI_SUCCESS(status)) {
diff --git a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
index 4c617bf..4f6e482 100644
--- a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
@@ -223,13 +223,13 @@
 		reg = <0xe2000 0x1000>;
 	};
 
-/include/ "qoriq-dma-0.dtsi"
+/include/ "elo3-dma-0.dtsi"
 	dma@100300 {
 		fsl,iommu-parent = <&pamu0>;
 		fsl,liodn-reg = <&guts 0x580>; /* DMA1LIODNR */
 	};
 
-/include/ "qoriq-dma-1.dtsi"
+/include/ "elo3-dma-1.dtsi"
 	dma@101300 {
 		fsl,iommu-parent = <&pamu0>;
 		fsl,liodn-reg = <&guts 0x584>; /* DMA2LIODNR */
diff --git a/arch/powerpc/boot/dts/fsl/elo3-dma-0.dtsi b/arch/powerpc/boot/dts/fsl/elo3-dma-0.dtsi
new file mode 100644
index 0000000..3c210e0
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/elo3-dma-0.dtsi
@@ -0,0 +1,82 @@
+/*
+ * QorIQ Elo3 DMA device tree stub [ controller @ offset 0x100000 ]
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+dma0: dma@100300 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,elo3-dma";
+	reg = <0x100300 0x4>,
+	      <0x100600 0x4>;
+	ranges = <0x0 0x100100 0x500>;
+	dma-channel@0 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x0 0x80>;
+		interrupts = <28 2 0 0>;
+	};
+	dma-channel@80 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x80 0x80>;
+		interrupts = <29 2 0 0>;
+	};
+	dma-channel@100 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x100 0x80>;
+		interrupts = <30 2 0 0>;
+	};
+	dma-channel@180 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x180 0x80>;
+		interrupts = <31 2 0 0>;
+	};
+	dma-channel@300 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x300 0x80>;
+		interrupts = <76 2 0 0>;
+	};
+	dma-channel@380 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x380 0x80>;
+		interrupts = <77 2 0 0>;
+	};
+	dma-channel@400 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x400 0x80>;
+		interrupts = <78 2 0 0>;
+	};
+	dma-channel@480 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x480 0x80>;
+		interrupts = <79 2 0 0>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/elo3-dma-1.dtsi b/arch/powerpc/boot/dts/fsl/elo3-dma-1.dtsi
new file mode 100644
index 0000000..cccf3bb
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/elo3-dma-1.dtsi
@@ -0,0 +1,82 @@
+/*
+ * QorIQ Elo3 DMA device tree stub [ controller @ offset 0x101000 ]
+ *
+ * Copyright 2013 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+dma1: dma@101300 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	compatible = "fsl,elo3-dma";
+	reg = <0x101300 0x4>,
+	      <0x101600 0x4>;
+	ranges = <0x0 0x101100 0x500>;
+	dma-channel@0 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x0 0x80>;
+		interrupts = <32 2 0 0>;
+	};
+	dma-channel@80 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x80 0x80>;
+		interrupts = <33 2 0 0>;
+	};
+	dma-channel@100 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x100 0x80>;
+		interrupts = <34 2 0 0>;
+	};
+	dma-channel@180 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x180 0x80>;
+		interrupts = <35 2 0 0>;
+	};
+	dma-channel@300 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x300 0x80>;
+		interrupts = <80 2 0 0>;
+	};
+	dma-channel@380 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x380 0x80>;
+		interrupts = <81 2 0 0>;
+	};
+	dma-channel@400 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x400 0x80>;
+		interrupts = <82 2 0 0>;
+	};
+	dma-channel@480 {
+		compatible = "fsl,eloplus-dma-channel";
+		reg = <0x480 0x80>;
+		interrupts = <83 2 0 0>;
+	};
+};
diff --git a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
index 510afa3..4143a97 100644
--- a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
@@ -387,8 +387,8 @@
 		reg	   = <0xea000 0x4000>;
 	};
 
-/include/ "qoriq-dma-0.dtsi"
-/include/ "qoriq-dma-1.dtsi"
+/include/ "elo3-dma-0.dtsi"
+/include/ "elo3-dma-1.dtsi"
 
 /include/ "qoriq-espi-0.dtsi"
 	spi@110000 {
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 7d74432..947b5c4 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -15,7 +15,7 @@
 	int		domain;		/* PCI domain */
 	int		node;		/* NUMA node */
 #ifdef CONFIG_ACPI
-	void		*acpi;		/* ACPI-specific data */
+	struct acpi_device *companion;	/* ACPI companion device */
 #endif
 #ifdef CONFIG_X86_64
 	void		*iommu;		/* IOMMU private data */
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index b93e09a..37813b5 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -147,6 +147,8 @@
 #define MSR_PP1_ENERGY_STATUS		0x00000641
 #define MSR_PP1_POLICY			0x00000642
 
+#define MSR_CORE_C1_RES			0x00000660
+
 #define MSR_AMD64_MC0_MASK		0xc0010044
 
 #define MSR_IA32_MCx_CTL(x)		(MSR_IA32_MC0_CTL + 4*(x))
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index a7cccb6..36aa999 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -209,7 +209,7 @@
 		if (!pmd)
 			failed = true;
 		if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) {
-			free_page((unsigned long)pmds[i]);
+			free_page((unsigned long)pmd);
 			pmd = NULL;
 			failed = true;
 		}
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 7fb24e5..4f25ec0 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -518,7 +518,7 @@
 	sd = &info->sd;
 	sd->domain = domain;
 	sd->node = node;
-	sd->acpi = device->handle;
+	sd->companion = device;
 	/*
 	 * Maybe the desired pci bus has been already scanned. In such case
 	 * it is unnecessary to scan the pci bus with the given domain,busnum.
@@ -589,7 +589,7 @@
 {
 	struct pci_sysdata *sd = bridge->bus->sysdata;
 
-	ACPI_HANDLE_SET(&bridge->dev, sd->acpi);
+	ACPI_COMPANION_SET(&bridge->dev, sd->companion);
 	return 0;
 }
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 862f458..cdc629c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -171,9 +171,12 @@
 }
 EXPORT_SYMBOL(blk_mq_can_queue);
 
-static void blk_mq_rq_ctx_init(struct blk_mq_ctx *ctx, struct request *rq,
-			       unsigned int rw_flags)
+static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
+			       struct request *rq, unsigned int rw_flags)
 {
+	if (blk_queue_io_stat(q))
+		rw_flags |= REQ_IO_STAT;
+
 	rq->mq_ctx = ctx;
 	rq->cmd_flags = rw_flags;
 	ctx->rq_dispatched[rw_is_sync(rw_flags)]++;
@@ -197,7 +200,7 @@
 
 		rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved);
 		if (rq) {
-			blk_mq_rq_ctx_init(ctx, rq, rw);
+			blk_mq_rq_ctx_init(q, ctx, rq, rw);
 			break;
 		} else if (!(gfp & __GFP_WAIT))
 			break;
@@ -718,6 +721,8 @@
 {
 	struct blk_mq_ctx *ctx = rq->mq_ctx;
 
+	trace_block_rq_insert(hctx->queue, rq);
+
 	list_add_tail(&rq->queuelist, &ctx->rq_list);
 	blk_mq_hctx_mark_pending(hctx, ctx);
 
@@ -921,7 +926,7 @@
 	trace_block_getrq(q, bio, rw);
 	rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false);
 	if (likely(rq))
-		blk_mq_rq_ctx_init(ctx, rq, rw);
+		blk_mq_rq_ctx_init(q, ctx, rq, rw);
 	else {
 		blk_mq_put_ctx(ctx);
 		trace_block_sleeprq(q, bio, rw);
@@ -1377,6 +1382,7 @@
 	q->queue_hw_ctx = hctxs;
 
 	q->mq_ops = reg->ops;
+	q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
 
 	blk_queue_make_request(q, blk_mq_make_request);
 	blk_queue_rq_timed_out(q, reg->ops->timeout);
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index 9e62fef..f8c0b8d 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -50,33 +50,36 @@
 						      &dest, 1, &src, 1, len);
 	struct dma_device *device = chan ? chan->device : NULL;
 	struct dma_async_tx_descriptor *tx = NULL;
+	struct dmaengine_unmap_data *unmap = NULL;
 
-	if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
-		dma_addr_t dma_dest, dma_src;
+	if (device)
+		unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOIO);
+
+	if (unmap && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
 		unsigned long dma_prep_flags = 0;
 
 		if (submit->cb_fn)
 			dma_prep_flags |= DMA_PREP_INTERRUPT;
 		if (submit->flags & ASYNC_TX_FENCE)
 			dma_prep_flags |= DMA_PREP_FENCE;
-		dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
-					DMA_FROM_DEVICE);
 
-		dma_src = dma_map_page(device->dev, src, src_offset, len,
-				       DMA_TO_DEVICE);
+		unmap->to_cnt = 1;
+		unmap->addr[0] = dma_map_page(device->dev, src, src_offset, len,
+					      DMA_TO_DEVICE);
+		unmap->from_cnt = 1;
+		unmap->addr[1] = dma_map_page(device->dev, dest, dest_offset, len,
+					      DMA_FROM_DEVICE);
+		unmap->len = len;
 
-		tx = device->device_prep_dma_memcpy(chan, dma_dest, dma_src,
-						    len, dma_prep_flags);
-		if (!tx) {
-			dma_unmap_page(device->dev, dma_dest, len,
-				       DMA_FROM_DEVICE);
-			dma_unmap_page(device->dev, dma_src, len,
-				       DMA_TO_DEVICE);
-		}
+		tx = device->device_prep_dma_memcpy(chan, unmap->addr[1],
+						    unmap->addr[0], len,
+						    dma_prep_flags);
 	}
 
 	if (tx) {
 		pr_debug("%s: (async) len: %zu\n", __func__, len);
+
+		dma_set_unmap(tx, unmap);
 		async_tx_submit(chan, tx, submit);
 	} else {
 		void *dest_buf, *src_buf;
@@ -96,6 +99,8 @@
 		async_tx_sync_epilog(submit);
 	}
 
+	dmaengine_unmap_put(unmap);
+
 	return tx;
 }
 EXPORT_SYMBOL_GPL(async_memcpy);
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
index 91d5d38..d05327c 100644
--- a/crypto/async_tx/async_pq.c
+++ b/crypto/async_tx/async_pq.c
@@ -46,49 +46,24 @@
  * do_async_gen_syndrome - asynchronously calculate P and/or Q
  */
 static __async_inline struct dma_async_tx_descriptor *
-do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
-		      const unsigned char *scfs, unsigned int offset, int disks,
-		      size_t len, dma_addr_t *dma_src,
+do_async_gen_syndrome(struct dma_chan *chan,
+		      const unsigned char *scfs, int disks,
+		      struct dmaengine_unmap_data *unmap,
+		      enum dma_ctrl_flags dma_flags,
 		      struct async_submit_ctl *submit)
 {
 	struct dma_async_tx_descriptor *tx = NULL;
 	struct dma_device *dma = chan->device;
-	enum dma_ctrl_flags dma_flags = 0;
 	enum async_tx_flags flags_orig = submit->flags;
 	dma_async_tx_callback cb_fn_orig = submit->cb_fn;
 	dma_async_tx_callback cb_param_orig = submit->cb_param;
 	int src_cnt = disks - 2;
-	unsigned char coefs[src_cnt];
 	unsigned short pq_src_cnt;
 	dma_addr_t dma_dest[2];
 	int src_off = 0;
-	int idx;
-	int i;
 
-	/* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */
-	if (P(blocks, disks))
-		dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset,
-					   len, DMA_BIDIRECTIONAL);
-	else
-		dma_flags |= DMA_PREP_PQ_DISABLE_P;
-	if (Q(blocks, disks))
-		dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset,
-					   len, DMA_BIDIRECTIONAL);
-	else
-		dma_flags |= DMA_PREP_PQ_DISABLE_Q;
-
-	/* convert source addresses being careful to collapse 'empty'
-	 * sources and update the coefficients accordingly
-	 */
-	for (i = 0, idx = 0; i < src_cnt; i++) {
-		if (blocks[i] == NULL)
-			continue;
-		dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len,
-					    DMA_TO_DEVICE);
-		coefs[idx] = scfs[i];
-		idx++;
-	}
-	src_cnt = idx;
+	if (submit->flags & ASYNC_TX_FENCE)
+		dma_flags |= DMA_PREP_FENCE;
 
 	while (src_cnt > 0) {
 		submit->flags = flags_orig;
@@ -100,28 +75,25 @@
 		if (src_cnt > pq_src_cnt) {
 			submit->flags &= ~ASYNC_TX_ACK;
 			submit->flags |= ASYNC_TX_FENCE;
-			dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
 			submit->cb_fn = NULL;
 			submit->cb_param = NULL;
 		} else {
-			dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP;
 			submit->cb_fn = cb_fn_orig;
 			submit->cb_param = cb_param_orig;
 			if (cb_fn_orig)
 				dma_flags |= DMA_PREP_INTERRUPT;
 		}
-		if (submit->flags & ASYNC_TX_FENCE)
-			dma_flags |= DMA_PREP_FENCE;
 
-		/* Since we have clobbered the src_list we are committed
-		 * to doing this asynchronously.  Drivers force forward
-		 * progress in case they can not provide a descriptor
+		/* Drivers force forward progress in case they can not provide
+		 * a descriptor
 		 */
 		for (;;) {
+			dma_dest[0] = unmap->addr[disks - 2];
+			dma_dest[1] = unmap->addr[disks - 1];
 			tx = dma->device_prep_dma_pq(chan, dma_dest,
-						     &dma_src[src_off],
+						     &unmap->addr[src_off],
 						     pq_src_cnt,
-						     &coefs[src_off], len,
+						     &scfs[src_off], unmap->len,
 						     dma_flags);
 			if (likely(tx))
 				break;
@@ -129,6 +101,7 @@
 			dma_async_issue_pending(chan);
 		}
 
+		dma_set_unmap(tx, unmap);
 		async_tx_submit(chan, tx, submit);
 		submit->depend_tx = tx;
 
@@ -188,10 +161,6 @@
  * set to NULL those buffers will be replaced with the raid6_zero_page
  * in the synchronous path and omitted in the hardware-asynchronous
  * path.
- *
- * 'blocks' note: if submit->scribble is NULL then the contents of
- * 'blocks' may be overwritten to perform address conversions
- * (dma_map_page() or page_address()).
  */
 struct dma_async_tx_descriptor *
 async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
@@ -202,26 +171,69 @@
 						      &P(blocks, disks), 2,
 						      blocks, src_cnt, len);
 	struct dma_device *device = chan ? chan->device : NULL;
-	dma_addr_t *dma_src = NULL;
+	struct dmaengine_unmap_data *unmap = NULL;
 
 	BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));
 
-	if (submit->scribble)
-		dma_src = submit->scribble;
-	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
-		dma_src = (dma_addr_t *) blocks;
+	if (device)
+		unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);
 
-	if (dma_src && device &&
+	if (unmap &&
 	    (src_cnt <= dma_maxpq(device, 0) ||
 	     dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
 	    is_dma_pq_aligned(device, offset, 0, len)) {
+		struct dma_async_tx_descriptor *tx;
+		enum dma_ctrl_flags dma_flags = 0;
+		unsigned char coefs[src_cnt];
+		int i, j;
+
 		/* run the p+q asynchronously */
 		pr_debug("%s: (async) disks: %d len: %zu\n",
 			 __func__, disks, len);
-		return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset,
-					     disks, len, dma_src, submit);
+
+		/* convert source addresses being careful to collapse 'empty'
+		 * sources and update the coefficients accordingly
+		 */
+		unmap->len = len;
+		for (i = 0, j = 0; i < src_cnt; i++) {
+			if (blocks[i] == NULL)
+				continue;
+			unmap->addr[j] = dma_map_page(device->dev, blocks[i], offset,
+						      len, DMA_TO_DEVICE);
+			coefs[j] = raid6_gfexp[i];
+			unmap->to_cnt++;
+			j++;
+		}
+
+		/*
+		 * DMAs use destinations as sources,
+		 * so use BIDIRECTIONAL mapping
+		 */
+		unmap->bidi_cnt++;
+		if (P(blocks, disks))
+			unmap->addr[j++] = dma_map_page(device->dev, P(blocks, disks),
+							offset, len, DMA_BIDIRECTIONAL);
+		else {
+			unmap->addr[j++] = 0;
+			dma_flags |= DMA_PREP_PQ_DISABLE_P;
+		}
+
+		unmap->bidi_cnt++;
+		if (Q(blocks, disks))
+			unmap->addr[j++] = dma_map_page(device->dev, Q(blocks, disks),
+						       offset, len, DMA_BIDIRECTIONAL);
+		else {
+			unmap->addr[j++] = 0;
+			dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+		}
+
+		tx = do_async_gen_syndrome(chan, coefs, j, unmap, dma_flags, submit);
+		dmaengine_unmap_put(unmap);
+		return tx;
 	}
 
+	dmaengine_unmap_put(unmap);
+
 	/* run the pq synchronously */
 	pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len);
 
@@ -277,50 +289,60 @@
 	struct dma_async_tx_descriptor *tx;
 	unsigned char coefs[disks-2];
 	enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
-	dma_addr_t *dma_src = NULL;
-	int src_cnt = 0;
+	struct dmaengine_unmap_data *unmap = NULL;
 
 	BUG_ON(disks < 4);
 
-	if (submit->scribble)
-		dma_src = submit->scribble;
-	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
-		dma_src = (dma_addr_t *) blocks;
+	if (device)
+		unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);
 
-	if (dma_src && device && disks <= dma_maxpq(device, 0) &&
+	if (unmap && disks <= dma_maxpq(device, 0) &&
 	    is_dma_pq_aligned(device, offset, 0, len)) {
 		struct device *dev = device->dev;
-		dma_addr_t *pq = &dma_src[disks-2];
-		int i;
+		dma_addr_t pq[2];
+		int i, j = 0, src_cnt = 0;
 
 		pr_debug("%s: (async) disks: %d len: %zu\n",
 			 __func__, disks, len);
-		if (!P(blocks, disks))
+
+		unmap->len = len;
+		for (i = 0; i < disks-2; i++)
+			if (likely(blocks[i])) {
+				unmap->addr[j] = dma_map_page(dev, blocks[i],
+							      offset, len,
+							      DMA_TO_DEVICE);
+				coefs[j] = raid6_gfexp[i];
+				unmap->to_cnt++;
+				src_cnt++;
+				j++;
+			}
+
+		if (!P(blocks, disks)) {
+			pq[0] = 0;
 			dma_flags |= DMA_PREP_PQ_DISABLE_P;
-		else
+		} else {
 			pq[0] = dma_map_page(dev, P(blocks, disks),
 					     offset, len,
 					     DMA_TO_DEVICE);
-		if (!Q(blocks, disks))
+			unmap->addr[j++] = pq[0];
+			unmap->to_cnt++;
+		}
+		if (!Q(blocks, disks)) {
+			pq[1] = 0;
 			dma_flags |= DMA_PREP_PQ_DISABLE_Q;
-		else
+		} else {
 			pq[1] = dma_map_page(dev, Q(blocks, disks),
 					     offset, len,
 					     DMA_TO_DEVICE);
+			unmap->addr[j++] = pq[1];
+			unmap->to_cnt++;
+		}
 
 		if (submit->flags & ASYNC_TX_FENCE)
 			dma_flags |= DMA_PREP_FENCE;
-		for (i = 0; i < disks-2; i++)
-			if (likely(blocks[i])) {
-				dma_src[src_cnt] = dma_map_page(dev, blocks[i],
-								offset, len,
-								DMA_TO_DEVICE);
-				coefs[src_cnt] = raid6_gfexp[i];
-				src_cnt++;
-			}
-
 		for (;;) {
-			tx = device->device_prep_dma_pq_val(chan, pq, dma_src,
+			tx = device->device_prep_dma_pq_val(chan, pq,
+							    unmap->addr,
 							    src_cnt,
 							    coefs,
 							    len, pqres,
@@ -330,6 +352,8 @@
 			async_tx_quiesce(&submit->depend_tx);
 			dma_async_issue_pending(chan);
 		}
+
+		dma_set_unmap(tx, unmap);
 		async_tx_submit(chan, tx, submit);
 
 		return tx;
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
index a9f08a6..934a849 100644
--- a/crypto/async_tx/async_raid6_recov.c
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -26,6 +26,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/raid/pq.h>
 #include <linux/async_tx.h>
+#include <linux/dmaengine.h>
 
 static struct dma_async_tx_descriptor *
 async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
@@ -34,35 +35,45 @@
 	struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
 						      &dest, 1, srcs, 2, len);
 	struct dma_device *dma = chan ? chan->device : NULL;
+	struct dmaengine_unmap_data *unmap = NULL;
 	const u8 *amul, *bmul;
 	u8 ax, bx;
 	u8 *a, *b, *c;
 
-	if (dma) {
-		dma_addr_t dma_dest[2];
-		dma_addr_t dma_src[2];
+	if (dma)
+		unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO);
+
+	if (unmap) {
 		struct device *dev = dma->dev;
+		dma_addr_t pq[2];
 		struct dma_async_tx_descriptor *tx;
 		enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
 
 		if (submit->flags & ASYNC_TX_FENCE)
 			dma_flags |= DMA_PREP_FENCE;
-		dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
-		dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
-		dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
-		tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef,
+		unmap->addr[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
+		unmap->addr[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
+		unmap->to_cnt = 2;
+
+		unmap->addr[2] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+		unmap->bidi_cnt = 1;
+		/* engine only looks at Q, but expects it to follow P */
+		pq[1] = unmap->addr[2];
+
+		unmap->len = len;
+		tx = dma->device_prep_dma_pq(chan, pq, unmap->addr, 2, coef,
 					     len, dma_flags);
 		if (tx) {
+			dma_set_unmap(tx, unmap);
 			async_tx_submit(chan, tx, submit);
+			dmaengine_unmap_put(unmap);
 			return tx;
 		}
 
 		/* could not get a descriptor, unmap and fall through to
 		 * the synchronous path
 		 */
-		dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
-		dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
-		dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE);
+		dmaengine_unmap_put(unmap);
 	}
 
 	/* run the operation synchronously */
@@ -89,23 +100,38 @@
 	struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
 						      &dest, 1, &src, 1, len);
 	struct dma_device *dma = chan ? chan->device : NULL;
+	struct dmaengine_unmap_data *unmap = NULL;
 	const u8 *qmul; /* Q multiplier table */
 	u8 *d, *s;
 
-	if (dma) {
+	if (dma)
+		unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO);
+
+	if (unmap) {
 		dma_addr_t dma_dest[2];
-		dma_addr_t dma_src[1];
 		struct device *dev = dma->dev;
 		struct dma_async_tx_descriptor *tx;
 		enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
 
 		if (submit->flags & ASYNC_TX_FENCE)
 			dma_flags |= DMA_PREP_FENCE;
-		dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
-		dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
-		tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
-					     len, dma_flags);
+		unmap->addr[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
+		unmap->to_cnt++;
+		unmap->addr[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+		dma_dest[1] = unmap->addr[1];
+		unmap->bidi_cnt++;
+		unmap->len = len;
+
+		/* this looks funny, but the engine looks for Q at
+		 * dma_dest[1] and ignores dma_dest[0] as a dest
+		 * due to DMA_PREP_PQ_DISABLE_P
+		 */
+		tx = dma->device_prep_dma_pq(chan, dma_dest, unmap->addr,
+					     1, &coef, len, dma_flags);
+
 		if (tx) {
+			dma_set_unmap(tx, unmap);
+			dmaengine_unmap_put(unmap);
 			async_tx_submit(chan, tx, submit);
 			return tx;
 		}
@@ -113,8 +139,7 @@
 		/* could not get a descriptor, unmap and fall through to
 		 * the synchronous path
 		 */
-		dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL);
-		dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE);
+		dmaengine_unmap_put(unmap);
 	}
 
 	/* no channel available, or failed to allocate a descriptor, so
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index 7be3424..39ea479 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -128,7 +128,7 @@
 		}
 		device->device_issue_pending(chan);
 	} else {
-		if (dma_wait_for_async_tx(depend_tx) != DMA_SUCCESS)
+		if (dma_wait_for_async_tx(depend_tx) != DMA_COMPLETE)
 			panic("%s: DMA error waiting for depend_tx\n",
 			      __func__);
 		tx->tx_submit(tx);
@@ -280,7 +280,7 @@
 		 * we are referring to the correct operation
 		 */
 		BUG_ON(async_tx_test_ack(*tx));
-		if (dma_wait_for_async_tx(*tx) != DMA_SUCCESS)
+		if (dma_wait_for_async_tx(*tx) != DMA_COMPLETE)
 			panic("%s: DMA error waiting for transaction\n",
 			      __func__);
 		async_tx_ack(*tx);
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 8ade0a0..3c562f5 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -33,48 +33,31 @@
 
 /* do_async_xor - dma map the pages and perform the xor with an engine */
 static __async_inline struct dma_async_tx_descriptor *
-do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
-	     unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src,
+do_async_xor(struct dma_chan *chan, struct dmaengine_unmap_data *unmap,
 	     struct async_submit_ctl *submit)
 {
 	struct dma_device *dma = chan->device;
 	struct dma_async_tx_descriptor *tx = NULL;
-	int src_off = 0;
-	int i;
 	dma_async_tx_callback cb_fn_orig = submit->cb_fn;
 	void *cb_param_orig = submit->cb_param;
 	enum async_tx_flags flags_orig = submit->flags;
-	enum dma_ctrl_flags dma_flags;
-	int xor_src_cnt = 0;
-	dma_addr_t dma_dest;
-
-	/* map the dest bidrectional in case it is re-used as a source */
-	dma_dest = dma_map_page(dma->dev, dest, offset, len, DMA_BIDIRECTIONAL);
-	for (i = 0; i < src_cnt; i++) {
-		/* only map the dest once */
-		if (!src_list[i])
-			continue;
-		if (unlikely(src_list[i] == dest)) {
-			dma_src[xor_src_cnt++] = dma_dest;
-			continue;
-		}
-		dma_src[xor_src_cnt++] = dma_map_page(dma->dev, src_list[i], offset,
-						      len, DMA_TO_DEVICE);
-	}
-	src_cnt = xor_src_cnt;
+	enum dma_ctrl_flags dma_flags = 0;
+	int src_cnt = unmap->to_cnt;
+	int xor_src_cnt;
+	dma_addr_t dma_dest = unmap->addr[unmap->to_cnt];
+	dma_addr_t *src_list = unmap->addr;
 
 	while (src_cnt) {
+		dma_addr_t tmp;
+
 		submit->flags = flags_orig;
-		dma_flags = 0;
 		xor_src_cnt = min(src_cnt, (int)dma->max_xor);
-		/* if we are submitting additional xors, leave the chain open,
-		 * clear the callback parameters, and leave the destination
-		 * buffer mapped
+		/* if we are submitting additional xors, leave the chain open
+		 * and clear the callback parameters
 		 */
 		if (src_cnt > xor_src_cnt) {
 			submit->flags &= ~ASYNC_TX_ACK;
 			submit->flags |= ASYNC_TX_FENCE;
-			dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
 			submit->cb_fn = NULL;
 			submit->cb_param = NULL;
 		} else {
@@ -85,12 +68,18 @@
 			dma_flags |= DMA_PREP_INTERRUPT;
 		if (submit->flags & ASYNC_TX_FENCE)
 			dma_flags |= DMA_PREP_FENCE;
-		/* Since we have clobbered the src_list we are committed
-		 * to doing this asynchronously.  Drivers force forward progress
-		 * in case they can not provide a descriptor
+
+		/* Drivers force forward progress in case they can not provide a
+		 * descriptor
 		 */
-		tx = dma->device_prep_dma_xor(chan, dma_dest, &dma_src[src_off],
-					      xor_src_cnt, len, dma_flags);
+		tmp = src_list[0];
+		if (src_list > unmap->addr)
+			src_list[0] = dma_dest;
+		tx = dma->device_prep_dma_xor(chan, dma_dest, src_list,
+					      xor_src_cnt, unmap->len,
+					      dma_flags);
+		src_list[0] = tmp;
+
 
 		if (unlikely(!tx))
 			async_tx_quiesce(&submit->depend_tx);
@@ -99,22 +88,21 @@
 		while (unlikely(!tx)) {
 			dma_async_issue_pending(chan);
 			tx = dma->device_prep_dma_xor(chan, dma_dest,
-						      &dma_src[src_off],
-						      xor_src_cnt, len,
+						      src_list,
+						      xor_src_cnt, unmap->len,
 						      dma_flags);
 		}
 
+		dma_set_unmap(tx, unmap);
 		async_tx_submit(chan, tx, submit);
 		submit->depend_tx = tx;
 
 		if (src_cnt > xor_src_cnt) {
 			/* drop completed sources */
 			src_cnt -= xor_src_cnt;
-			src_off += xor_src_cnt;
-
 			/* use the intermediate result a source */
-			dma_src[--src_off] = dma_dest;
 			src_cnt++;
+			src_list += xor_src_cnt - 1;
 		} else
 			break;
 	}
@@ -189,22 +177,40 @@
 	struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
 						      &dest, 1, src_list,
 						      src_cnt, len);
-	dma_addr_t *dma_src = NULL;
+	struct dma_device *device = chan ? chan->device : NULL;
+	struct dmaengine_unmap_data *unmap = NULL;
 
 	BUG_ON(src_cnt <= 1);
 
-	if (submit->scribble)
-		dma_src = submit->scribble;
-	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
-		dma_src = (dma_addr_t *) src_list;
+	if (device)
+		unmap = dmaengine_get_unmap_data(device->dev, src_cnt+1, GFP_NOIO);
 
-	if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) {
+	if (unmap && is_dma_xor_aligned(device, offset, 0, len)) {
+		struct dma_async_tx_descriptor *tx;
+		int i, j;
+
 		/* run the xor asynchronously */
 		pr_debug("%s (async): len: %zu\n", __func__, len);
 
-		return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
-				    dma_src, submit);
+		unmap->len = len;
+		for (i = 0, j = 0; i < src_cnt; i++) {
+			if (!src_list[i])
+				continue;
+			unmap->to_cnt++;
+			unmap->addr[j++] = dma_map_page(device->dev, src_list[i],
+							offset, len, DMA_TO_DEVICE);
+		}
+
+		/* map it bidirectional as it may be re-used as a source */
+		unmap->addr[j] = dma_map_page(device->dev, dest, offset, len,
+					      DMA_BIDIRECTIONAL);
+		unmap->bidi_cnt = 1;
+
+		tx = do_async_xor(chan, unmap, submit);
+		dmaengine_unmap_put(unmap);
+		return tx;
 	} else {
+		dmaengine_unmap_put(unmap);
 		/* run the xor synchronously */
 		pr_debug("%s (sync): len: %zu\n", __func__, len);
 		WARN_ONCE(chan, "%s: no space for dma address conversion\n",
@@ -268,16 +274,14 @@
 	struct dma_chan *chan = xor_val_chan(submit, dest, src_list, src_cnt, len);
 	struct dma_device *device = chan ? chan->device : NULL;
 	struct dma_async_tx_descriptor *tx = NULL;
-	dma_addr_t *dma_src = NULL;
+	struct dmaengine_unmap_data *unmap = NULL;
 
 	BUG_ON(src_cnt <= 1);
 
-	if (submit->scribble)
-		dma_src = submit->scribble;
-	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
-		dma_src = (dma_addr_t *) src_list;
+	if (device)
+		unmap = dmaengine_get_unmap_data(device->dev, src_cnt, GFP_NOIO);
 
-	if (dma_src && device && src_cnt <= device->max_xor &&
+	if (unmap && src_cnt <= device->max_xor &&
 	    is_dma_xor_aligned(device, offset, 0, len)) {
 		unsigned long dma_prep_flags = 0;
 		int i;
@@ -288,11 +292,15 @@
 			dma_prep_flags |= DMA_PREP_INTERRUPT;
 		if (submit->flags & ASYNC_TX_FENCE)
 			dma_prep_flags |= DMA_PREP_FENCE;
-		for (i = 0; i < src_cnt; i++)
-			dma_src[i] = dma_map_page(device->dev, src_list[i],
-						  offset, len, DMA_TO_DEVICE);
 
-		tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt,
+		for (i = 0; i < src_cnt; i++) {
+			unmap->addr[i] = dma_map_page(device->dev, src_list[i],
+						      offset, len, DMA_TO_DEVICE);
+			unmap->to_cnt++;
+		}
+		unmap->len = len;
+
+		tx = device->device_prep_dma_xor_val(chan, unmap->addr, src_cnt,
 						     len, result,
 						     dma_prep_flags);
 		if (unlikely(!tx)) {
@@ -301,11 +309,11 @@
 			while (!tx) {
 				dma_async_issue_pending(chan);
 				tx = device->device_prep_dma_xor_val(chan,
-					dma_src, src_cnt, len, result,
+					unmap->addr, src_cnt, len, result,
 					dma_prep_flags);
 			}
 		}
-
+		dma_set_unmap(tx, unmap);
 		async_tx_submit(chan, tx, submit);
 	} else {
 		enum async_tx_flags flags_orig = submit->flags;
@@ -327,6 +335,7 @@
 		async_tx_sync_epilog(submit);
 		submit->flags = flags_orig;
 	}
+	dmaengine_unmap_put(unmap);
 
 	return tx;
 }
diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c
index 4a92bac..dad95f4 100644
--- a/crypto/async_tx/raid6test.c
+++ b/crypto/async_tx/raid6test.c
@@ -28,7 +28,7 @@
 #undef pr
 #define pr(fmt, args...) pr_info("raid6test: " fmt, ##args)
 
-#define NDISKS 16 /* Including P and Q */
+#define NDISKS 64 /* Including P and Q */
 
 static struct page *dataptrs[NDISKS];
 static addr_conv_t addr_conv[NDISKS];
@@ -219,6 +219,14 @@
 		err += test(11, &tests);
 		err += test(12, &tests);
 	}
+
+	/* the 24 disk case is special for ioatdma as it is the boudary point
+	 * at which it needs to switch from 8-source ops to 16-source
+	 * ops for continuation (assumes DMA_HAS_PQ_CONTINUE is not set)
+	 */
+	if (NDISKS > 24)
+		err += test(24, &tests);
+
 	err += test(NDISKS, &tests);
 
 	pr("\n");
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index c95df0b..5d92485 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -235,17 +235,6 @@
 	  initrd, therefore it's safe to say Y.
 	  See Documentation/acpi/initrd_table_override.txt for details
 
-config ACPI_BLACKLIST_YEAR
-	int "Disable ACPI for systems before Jan 1st this year" if X86_32
-	default 0
-	help
-	  Enter a 4-digit year, e.g., 2001, to disable ACPI by default
-	  on platforms with DMI BIOS date before January 1st that year.
-	  "acpi=force" can be used to override this mechanism.
-
-	  Enter 0 to disable this mechanism and allow ACPI to
-	  run by default no matter what the year.  (default)
-
 config ACPI_DEBUG
 	bool "Debug Statements"
 	default n
diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c
index b9f0d5f..8711e37 100644
--- a/drivers/acpi/ac.c
+++ b/drivers/acpi/ac.c
@@ -56,7 +56,6 @@
 
 struct acpi_ac {
 	struct power_supply charger;
-	struct acpi_device *adev;
 	struct platform_device *pdev;
 	unsigned long long state;
 };
@@ -70,8 +69,9 @@
 static int acpi_ac_get_state(struct acpi_ac *ac)
 {
 	acpi_status status;
+	acpi_handle handle = ACPI_HANDLE(&ac->pdev->dev);
 
-	status = acpi_evaluate_integer(ac->adev->handle, "_PSR", NULL,
+	status = acpi_evaluate_integer(handle, "_PSR", NULL,
 				       &ac->state);
 	if (ACPI_FAILURE(status)) {
 		ACPI_EXCEPTION((AE_INFO, status,
@@ -119,6 +119,7 @@
 static void acpi_ac_notify_handler(acpi_handle handle, u32 event, void *data)
 {
 	struct acpi_ac *ac = data;
+	struct acpi_device *adev;
 
 	if (!ac)
 		return;
@@ -141,10 +142,11 @@
 			msleep(ac_sleep_before_get_state_ms);
 
 		acpi_ac_get_state(ac);
-		acpi_bus_generate_netlink_event(ac->adev->pnp.device_class,
+		adev = ACPI_COMPANION(&ac->pdev->dev);
+		acpi_bus_generate_netlink_event(adev->pnp.device_class,
 						dev_name(&ac->pdev->dev),
 						event, (u32) ac->state);
-		acpi_notifier_call_chain(ac->adev, event, (u32) ac->state);
+		acpi_notifier_call_chain(adev, event, (u32) ac->state);
 		kobject_uevent(&ac->charger.dev->kobj, KOBJ_CHANGE);
 	}
 
@@ -178,8 +180,8 @@
 	if (!pdev)
 		return -EINVAL;
 
-	result = acpi_bus_get_device(ACPI_HANDLE(&pdev->dev), &adev);
-	if (result)
+	adev = ACPI_COMPANION(&pdev->dev);
+	if (!adev)
 		return -ENODEV;
 
 	ac = kzalloc(sizeof(struct acpi_ac), GFP_KERNEL);
@@ -188,7 +190,6 @@
 
 	strcpy(acpi_device_name(adev), ACPI_AC_DEVICE_NAME);
 	strcpy(acpi_device_class(adev), ACPI_AC_CLASS);
-	ac->adev = adev;
 	ac->pdev = pdev;
 	platform_set_drvdata(pdev, ac);
 
diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index d396101..6745fe1 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -163,6 +163,15 @@
 	{ "80860F41", (unsigned long)&byt_i2c_dev_desc },
 	{ "INT33B2", },
 
+	{ "INT3430", (unsigned long)&lpt_dev_desc },
+	{ "INT3431", (unsigned long)&lpt_dev_desc },
+	{ "INT3432", (unsigned long)&lpt_dev_desc },
+	{ "INT3433", (unsigned long)&lpt_dev_desc },
+	{ "INT3434", (unsigned long)&lpt_uart_dev_desc },
+	{ "INT3435", (unsigned long)&lpt_uart_dev_desc },
+	{ "INT3436", (unsigned long)&lpt_sdio_dev_desc },
+	{ "INT3437", },
+
 	{ }
 };
 
diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index 8a4cfc7..dbfe49e 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -111,7 +111,7 @@
 	pdevinfo.id = -1;
 	pdevinfo.res = resources;
 	pdevinfo.num_res = count;
-	pdevinfo.acpi_node.handle = adev->handle;
+	pdevinfo.acpi_node.companion = adev;
 	pdev = platform_device_register_full(&pdevinfo);
 	if (IS_ERR(pdev)) {
 		dev_err(&adev->dev, "platform device creation failed: %ld\n",
diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c
index fb84837..078c4f7 100644
--- a/drivers/acpi/blacklist.c
+++ b/drivers/acpi/blacklist.c
@@ -75,39 +75,6 @@
 	{""}
 };
 
-#if	CONFIG_ACPI_BLACKLIST_YEAR
-
-static int __init blacklist_by_year(void)
-{
-	int year;
-
-	/* Doesn't exist? Likely an old system */
-	if (!dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL)) {
-		printk(KERN_ERR PREFIX "no DMI BIOS year, "
-			"acpi=force is required to enable ACPI\n" );
-		return 1;
-	}
-	/* 0? Likely a buggy new BIOS */
-	if (year == 0) {
-		printk(KERN_ERR PREFIX "DMI BIOS year==0, "
-			"assuming ACPI-capable machine\n" );
-		return 0;
-	}
-	if (year < CONFIG_ACPI_BLACKLIST_YEAR) {
-		printk(KERN_ERR PREFIX "BIOS age (%d) fails cutoff (%d), "
-		       "acpi=force is required to enable ACPI\n",
-		       year, CONFIG_ACPI_BLACKLIST_YEAR);
-		return 1;
-	}
-	return 0;
-}
-#else
-static inline int blacklist_by_year(void)
-{
-	return 0;
-}
-#endif
-
 int __init acpi_blacklisted(void)
 {
 	int i = 0;
@@ -166,8 +133,6 @@
 		}
 	}
 
-	blacklisted += blacklist_by_year();
-
 	dmi_check_system(acpi_osi_dmi_table);
 
 	return blacklisted;
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index d42b2fb..b3480cf 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -22,16 +22,12 @@
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 
-#include <linux/device.h>
+#include <linux/acpi.h>
 #include <linux/export.h>
 #include <linux/mutex.h>
 #include <linux/pm_qos.h>
 #include <linux/pm_runtime.h>
 
-#include <acpi/acpi.h>
-#include <acpi/acpi_bus.h>
-#include <acpi/acpi_drivers.h>
-
 #include "internal.h"
 
 #define _COMPONENT	ACPI_POWER_COMPONENT
@@ -548,7 +544,7 @@
  */
 int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p, int d_max_in)
 {
-	acpi_handle handle = DEVICE_ACPI_HANDLE(dev);
+	acpi_handle handle = ACPI_HANDLE(dev);
 	struct acpi_device *adev;
 	int ret, d_min, d_max;
 
@@ -656,7 +652,7 @@
 	if (!device_run_wake(phys_dev))
 		return -EINVAL;
 
-	handle = DEVICE_ACPI_HANDLE(phys_dev);
+	handle = ACPI_HANDLE(phys_dev);
 	if (!handle || acpi_bus_get_device(handle, &adev)) {
 		dev_dbg(phys_dev, "ACPI handle without context in %s!\n",
 			__func__);
@@ -700,7 +696,7 @@
 	if (!device_can_wakeup(dev))
 		return -EINVAL;
 
-	handle = DEVICE_ACPI_HANDLE(dev);
+	handle = ACPI_HANDLE(dev);
 	if (!handle || acpi_bus_get_device(handle, &adev)) {
 		dev_dbg(dev, "ACPI handle without context in %s!\n", __func__);
 		return -ENODEV;
@@ -722,7 +718,7 @@
  */
 struct acpi_device *acpi_dev_pm_get_node(struct device *dev)
 {
-	acpi_handle handle = DEVICE_ACPI_HANDLE(dev);
+	acpi_handle handle = ACPI_HANDLE(dev);
 	struct acpi_device *adev;
 
 	return handle && !acpi_bus_get_device(handle, &adev) ? adev : NULL;
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index d5309fd..ba5b56d 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -173,9 +173,10 @@
 static void advance_transaction(struct acpi_ec *ec, u8 status)
 {
 	unsigned long flags;
-	struct transaction *t = ec->curr;
+	struct transaction *t;
 
 	spin_lock_irqsave(&ec->lock, flags);
+	t = ec->curr;
 	if (!t)
 		goto unlock;
 	if (t->wlen > t->wi) {
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 10f0f40..a22a295 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -197,30 +197,28 @@
 
 int acpi_bind_one(struct device *dev, acpi_handle handle)
 {
-	struct acpi_device *acpi_dev;
-	acpi_status status;
+	struct acpi_device *acpi_dev = NULL;
 	struct acpi_device_physical_node *physical_node, *pn;
 	char physical_node_name[PHYSICAL_NODE_NAME_SIZE];
 	struct list_head *physnode_list;
 	unsigned int node_id;
 	int retval = -EINVAL;
 
-	if (ACPI_HANDLE(dev)) {
+	if (ACPI_COMPANION(dev)) {
 		if (handle) {
-			dev_warn(dev, "ACPI handle is already set\n");
+			dev_warn(dev, "ACPI companion already set\n");
 			return -EINVAL;
 		} else {
-			handle = ACPI_HANDLE(dev);
+			acpi_dev = ACPI_COMPANION(dev);
 		}
+	} else {
+		acpi_bus_get_device(handle, &acpi_dev);
 	}
-	if (!handle)
+	if (!acpi_dev)
 		return -EINVAL;
 
+	get_device(&acpi_dev->dev);
 	get_device(dev);
-	status = acpi_bus_get_device(handle, &acpi_dev);
-	if (ACPI_FAILURE(status))
-		goto err;
-
 	physical_node = kzalloc(sizeof(*physical_node), GFP_KERNEL);
 	if (!physical_node) {
 		retval = -ENOMEM;
@@ -242,10 +240,11 @@
 
 			dev_warn(dev, "Already associated with ACPI node\n");
 			kfree(physical_node);
-			if (ACPI_HANDLE(dev) != handle)
+			if (ACPI_COMPANION(dev) != acpi_dev)
 				goto err;
 
 			put_device(dev);
+			put_device(&acpi_dev->dev);
 			return 0;
 		}
 		if (pn->node_id == node_id) {
@@ -259,8 +258,8 @@
 	list_add(&physical_node->node, physnode_list);
 	acpi_dev->physical_node_count++;
 
-	if (!ACPI_HANDLE(dev))
-		ACPI_HANDLE_SET(dev, acpi_dev->handle);
+	if (!ACPI_COMPANION(dev))
+		ACPI_COMPANION_SET(dev, acpi_dev);
 
 	acpi_physnode_link_name(physical_node_name, node_id);
 	retval = sysfs_create_link(&acpi_dev->dev.kobj, &dev->kobj,
@@ -283,27 +282,21 @@
 	return 0;
 
  err:
-	ACPI_HANDLE_SET(dev, NULL);
+	ACPI_COMPANION_SET(dev, NULL);
 	put_device(dev);
+	put_device(&acpi_dev->dev);
 	return retval;
 }
 EXPORT_SYMBOL_GPL(acpi_bind_one);
 
 int acpi_unbind_one(struct device *dev)
 {
+	struct acpi_device *acpi_dev = ACPI_COMPANION(dev);
 	struct acpi_device_physical_node *entry;
-	struct acpi_device *acpi_dev;
-	acpi_status status;
 
-	if (!ACPI_HANDLE(dev))
+	if (!acpi_dev)
 		return 0;
 
-	status = acpi_bus_get_device(ACPI_HANDLE(dev), &acpi_dev);
-	if (ACPI_FAILURE(status)) {
-		dev_err(dev, "Oops, ACPI handle corrupt in %s()\n", __func__);
-		return -EINVAL;
-	}
-
 	mutex_lock(&acpi_dev->physical_node_lock);
 
 	list_for_each_entry(entry, &acpi_dev->physical_node_list, node)
@@ -316,9 +309,10 @@
 			acpi_physnode_link_name(physnode_name, entry->node_id);
 			sysfs_remove_link(&acpi_dev->dev.kobj, physnode_name);
 			sysfs_remove_link(&dev->kobj, "firmware_node");
-			ACPI_HANDLE_SET(dev, NULL);
-			/* acpi_bind_one() increase refcnt by one. */
+			ACPI_COMPANION_SET(dev, NULL);
+			/* Drop references taken by acpi_bind_one(). */
 			put_device(dev);
+			put_device(&acpi_dev->dev);
 			kfree(entry);
 			break;
 		}
@@ -328,6 +322,15 @@
 }
 EXPORT_SYMBOL_GPL(acpi_unbind_one);
 
+void acpi_preset_companion(struct device *dev, acpi_handle parent, u64 addr)
+{
+	struct acpi_device *adev;
+
+	if (!acpi_bus_get_device(acpi_get_child(parent, addr), &adev))
+		ACPI_COMPANION_SET(dev, adev);
+}
+EXPORT_SYMBOL_GPL(acpi_preset_companion);
+
 static int acpi_platform_notify(struct device *dev)
 {
 	struct acpi_bus_type *type = acpi_get_bus_type(dev);
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 56f0586..0703bff 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -575,6 +575,7 @@
 		dev_err(&device->dev,
 			"Bus %04x:%02x not present in PCI namespace\n",
 			root->segment, (unsigned int)root->secondary.start);
+		device->driver_data = NULL;
 		result = -ENODEV;
 		goto end;
 	}
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 55f9ded..15daa21 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -289,24 +289,17 @@
 {
 	struct acpi_device *device = data;
 	acpi_handle handle = device->handle;
-	struct acpi_scan_handler *handler;
 	u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
 	int error;
 
 	lock_device_hotplug();
 	mutex_lock(&acpi_scan_lock);
 
-	handler = device->handler;
-	if (!handler || !handler->hotplug.enabled) {
-		put_device(&device->dev);
-		goto err_support;
-	}
-
 	if (ost_src == ACPI_NOTIFY_EJECT_REQUEST)
 		acpi_evaluate_hotplug_ost(handle, ACPI_NOTIFY_EJECT_REQUEST,
 					  ACPI_OST_SC_EJECT_IN_PROGRESS, NULL);
 
-	if (handler->hotplug.mode == AHM_CONTAINER)
+	if (device->handler && device->handler->hotplug.mode == AHM_CONTAINER)
 		kobject_uevent(&device->dev.kobj, KOBJ_OFFLINE);
 
 	error = acpi_scan_hot_remove(device);
@@ -411,8 +404,7 @@
 		break;
 	case ACPI_NOTIFY_EJECT_REQUEST:
 		acpi_handle_debug(handle, "ACPI_NOTIFY_EJECT_REQUEST event\n");
-		status = acpi_bus_get_device(handle, &adev);
-		if (ACPI_FAILURE(status))
+		if (acpi_bus_get_device(handle, &adev))
 			goto err_out;
 
 		get_device(&adev->dev);
@@ -1997,6 +1989,7 @@
 		if (result)
 			return result;
 
+		device->flags.match_driver = true;
 		result = device_attach(&device->dev);
 		if (result < 0)
 			return result;
@@ -2013,6 +2006,7 @@
 		if (result)
 			return result;
 
+		device->flags.match_driver = true;
 		result = device_attach(&device->dev);
 	}
 
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 18dbdff..995e91b 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -82,13 +82,6 @@
 module_param(allow_duplicates, bool, 0644);
 
 /*
- * Some BIOSes claim they use minimum backlight at boot,
- * and this may bring dimming screen after boot
- */
-static bool use_bios_initial_backlight = 1;
-module_param(use_bios_initial_backlight, bool, 0644);
-
-/*
  * For Windows 8 systems: if set ture and the GPU driver has
  * registered a backlight interface, skip registering ACPI video's.
  */
@@ -406,12 +399,6 @@
 	return 0;
 }
 
-static int video_ignore_initial_backlight(const struct dmi_system_id *d)
-{
-	use_bios_initial_backlight = 0;
-	return 0;
-}
-
 static struct dmi_system_id video_dmi_table[] __initdata = {
 	/*
 	 * Broken _BQC workaround http://bugzilla.kernel.org/show_bug.cgi?id=13121
@@ -456,54 +443,6 @@
 		DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 7720"),
 		},
 	},
-	{
-	 .callback = video_ignore_initial_backlight,
-	 .ident = "HP Folio 13-2000",
-	 .matches = {
-		DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
-		DMI_MATCH(DMI_PRODUCT_NAME, "HP Folio 13 - 2000 Notebook PC"),
-		},
-	},
-	{
-	 .callback = video_ignore_initial_backlight,
-	 .ident = "Fujitsu E753",
-	 .matches = {
-		DMI_MATCH(DMI_BOARD_VENDOR, "FUJITSU"),
-		DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E753"),
-		},
-	},
-	{
-	 .callback = video_ignore_initial_backlight,
-	 .ident = "HP Pavilion dm4",
-	 .matches = {
-		DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
-		DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion dm4 Notebook PC"),
-		},
-	},
-	{
-	 .callback = video_ignore_initial_backlight,
-	 .ident = "HP Pavilion g6 Notebook PC",
-	 .matches = {
-		 DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
-		 DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion g6 Notebook PC"),
-		},
-	},
-	{
-	 .callback = video_ignore_initial_backlight,
-	 .ident = "HP 1000 Notebook PC",
-	 .matches = {
-		DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
-		DMI_MATCH(DMI_PRODUCT_NAME, "HP 1000 Notebook PC"),
-		},
-	},
-	{
-	 .callback = video_ignore_initial_backlight,
-	 .ident = "HP Pavilion m4",
-	 .matches = {
-		DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
-		DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion m4 Notebook PC"),
-		},
-	},
 	{}
 };
 
@@ -839,20 +778,18 @@
 	if (!device->cap._BQC)
 		goto set_level;
 
-	if (use_bios_initial_backlight) {
-		level = acpi_video_bqc_value_to_level(device, level_old);
-		/*
-		 * On some buggy laptops, _BQC returns an uninitialized
-		 * value when invoked for the first time, i.e.
-		 * level_old is invalid (no matter whether it's a level
-		 * or an index). Set the backlight to max_level in this case.
-		 */
-		for (i = 2; i < br->count; i++)
-			if (level == br->levels[i])
-				break;
-		if (i == br->count || !level)
-			level = max_level;
-	}
+	level = acpi_video_bqc_value_to_level(device, level_old);
+	/*
+	 * On some buggy laptops, _BQC returns an uninitialized
+	 * value when invoked for the first time, i.e.
+	 * level_old is invalid (no matter whether it's a level
+	 * or an index). Set the backlight to max_level in this case.
+	 */
+	for (i = 2; i < br->count; i++)
+		if (level == br->levels[i])
+			break;
+	if (i == br->count || !level)
+		level = max_level;
 
 set_level:
 	result = acpi_video_device_lcd_set_level(device, level);
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index ab714d2..4372cfa 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -185,7 +185,7 @@
 	if (libata_noacpi || ap->flags & ATA_FLAG_ACPI_SATA || !host_handle)
 		return;
 
-	ACPI_HANDLE_SET(&ap->tdev, acpi_get_child(host_handle, ap->port_no));
+	acpi_preset_companion(&ap->tdev, host_handle, ap->port_no);
 
 	if (ata_acpi_gtm(ap, &ap->__acpi_init_gtm) == 0)
 		ap->pflags |= ATA_PFLAG_INIT_GTM_VALID;
@@ -222,7 +222,7 @@
 		parent_handle = port_handle;
 	}
 
-	ACPI_HANDLE_SET(&dev->tdev, acpi_get_child(parent_handle, adr));
+	acpi_preset_companion(&dev->tdev, parent_handle, adr);
 
 	register_hotplug_dock_device(ata_dev_acpi_handle(dev),
 				     &ata_acpi_dev_dock_ops, dev, NULL, NULL);
diff --git a/drivers/ata/pata_arasan_cf.c b/drivers/ata/pata_arasan_cf.c
index 853f610..e88690e 100644
--- a/drivers/ata/pata_arasan_cf.c
+++ b/drivers/ata/pata_arasan_cf.c
@@ -396,8 +396,7 @@
 	struct dma_async_tx_descriptor *tx;
 	struct dma_chan *chan = acdev->dma_chan;
 	dma_cookie_t cookie;
-	unsigned long flags = DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP |
-		DMA_COMPL_SKIP_DEST_UNMAP;
+	unsigned long flags = DMA_PREP_INTERRUPT;
 	int ret = 0;
 
 	tx = chan->device->device_prep_dma_memcpy(chan, dest, src, len, flags);
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 47051cd..3a94b79 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -432,7 +432,7 @@
 		goto err_alloc;
 
 	pdev->dev.parent = pdevinfo->parent;
-	ACPI_HANDLE_SET(&pdev->dev, pdevinfo->acpi_node.handle);
+	ACPI_COMPANION_SET(&pdev->dev, pdevinfo->acpi_node.companion);
 
 	if (pdevinfo->dma_mask) {
 		/*
@@ -463,7 +463,7 @@
 	ret = platform_device_add(pdev);
 	if (ret) {
 err:
-		ACPI_HANDLE_SET(&pdev->dev, NULL);
+		ACPI_COMPANION_SET(&pdev->dev, NULL);
 		kfree(pdev->dev.dma_mask);
 
 err_alloc:
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index c12e9b9..1b41fca 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1350,6 +1350,9 @@
 
 	device_unlock(dev);
 
+	if (error)
+		pm_runtime_put(dev);
+
 	return error;
 }
 
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 588479d..6a680d4 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -199,15 +199,16 @@
 
 	spin_lock_irqsave(&vblk->vq_lock, flags);
 	if (__virtblk_add_req(vblk->vq, vbr, vbr->sg, num) < 0) {
+		virtqueue_kick(vblk->vq);
 		spin_unlock_irqrestore(&vblk->vq_lock, flags);
 		blk_mq_stop_hw_queue(hctx);
-		virtqueue_kick(vblk->vq);
 		return BLK_MQ_RQ_QUEUE_BUSY;
 	}
-	spin_unlock_irqrestore(&vblk->vq_lock, flags);
 
 	if (last)
 		virtqueue_kick(vblk->vq);
+
+	spin_unlock_irqrestore(&vblk->vq_lock, flags);
 	return BLK_MQ_RQ_QUEUE_OK;
 }
 
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 218460f..25a70d0 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -68,6 +68,9 @@
 
 		dbs_info->requested_freq += get_freq_target(cs_tuners, policy);
 
+		if (dbs_info->requested_freq > policy->max)
+			dbs_info->requested_freq = policy->max;
+
 		__cpufreq_driver_target(policy, dbs_info->requested_freq,
 			CPUFREQ_RELATION_H);
 		return;
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 0806c31..e6be635 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -328,10 +328,6 @@
 					     dbs_data->cdata->gov_dbs_timer);
 		}
 
-		/*
-		 * conservative does not implement micro like ondemand
-		 * governor, thus we are bound to jiffes/HZ
-		 */
 		if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
 			cs_dbs_info->down_skip = 0;
 			cs_dbs_info->enable = 1;
diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
index be6d143..a0acd0b 100644
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c
@@ -53,6 +53,7 @@
 
 static int omap_target(struct cpufreq_policy *policy, unsigned int index)
 {
+	int r, ret;
 	struct dev_pm_opp *opp;
 	unsigned long freq, volt = 0, volt_old = 0, tol = 0;
 	unsigned int old_freq, new_freq;
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index dd2874ec..446687c 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -89,14 +89,15 @@
 	  Support the Atmel AHB DMA controller.
 
 config FSL_DMA
-	tristate "Freescale Elo and Elo Plus DMA support"
+	tristate "Freescale Elo series DMA support"
 	depends on FSL_SOC
 	select DMA_ENGINE
 	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
 	---help---
-	  Enable support for the Freescale Elo and Elo Plus DMA controllers.
-	  The Elo is the DMA controller on some 82xx and 83xx parts, and the
-	  Elo Plus is the DMA controller on 85xx and 86xx parts.
+	  Enable support for the Freescale Elo series DMA controllers.
+	  The Elo is the DMA controller on some mpc82xx and mpc83xx parts, the
+	  EloPlus is on mpc85xx and mpc86xx and Pxxx parts, and the Elo3 is on
+	  some Txxx and Bxxx parts.
 
 config MPC512X_DMA
 	tristate "Freescale MPC512x built-in DMA engine support"
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index e51a983..16a2aa2 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -1164,42 +1164,12 @@
 	kfree(txd);
 }
 
-static void pl08x_unmap_buffers(struct pl08x_txd *txd)
-{
-	struct device *dev = txd->vd.tx.chan->device->dev;
-	struct pl08x_sg *dsg;
-
-	if (!(txd->vd.tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-		if (txd->vd.tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-			list_for_each_entry(dsg, &txd->dsg_list, node)
-				dma_unmap_single(dev, dsg->src_addr, dsg->len,
-						DMA_TO_DEVICE);
-		else {
-			list_for_each_entry(dsg, &txd->dsg_list, node)
-				dma_unmap_page(dev, dsg->src_addr, dsg->len,
-						DMA_TO_DEVICE);
-		}
-	}
-	if (!(txd->vd.tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-		if (txd->vd.tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-			list_for_each_entry(dsg, &txd->dsg_list, node)
-				dma_unmap_single(dev, dsg->dst_addr, dsg->len,
-						DMA_FROM_DEVICE);
-		else
-			list_for_each_entry(dsg, &txd->dsg_list, node)
-				dma_unmap_page(dev, dsg->dst_addr, dsg->len,
-						DMA_FROM_DEVICE);
-	}
-}
-
 static void pl08x_desc_free(struct virt_dma_desc *vd)
 {
 	struct pl08x_txd *txd = to_pl08x_txd(&vd->tx);
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(vd->tx.chan);
 
-	if (!plchan->slave)
-		pl08x_unmap_buffers(txd);
-
+	dma_descriptor_unmap(txd);
 	if (!txd->done)
 		pl08x_release_mux(plchan);
 
@@ -1252,7 +1222,7 @@
 	size_t bytes = 0;
 
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret == DMA_SUCCESS)
+	if (ret == DMA_COMPLETE)
 		return ret;
 
 	/*
@@ -1267,7 +1237,7 @@
 
 	spin_lock_irqsave(&plchan->vc.lock, flags);
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret != DMA_SUCCESS) {
+	if (ret != DMA_COMPLETE) {
 		vd = vchan_find_desc(&plchan->vc, cookie);
 		if (vd) {
 			/* On the issued list, so hasn't been processed yet */
@@ -2138,8 +2108,7 @@
 	writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR);
 	writel(0x000000FF, pl08x->base + PL080_TC_CLEAR);
 
-	ret = request_irq(adev->irq[0], pl08x_irq, IRQF_DISABLED,
-			  DRIVER_NAME, pl08x);
+	ret = request_irq(adev->irq[0], pl08x_irq, 0, DRIVER_NAME, pl08x);
 	if (ret) {
 		dev_err(&adev->dev, "%s failed to request interrupt %d\n",
 			__func__, adev->irq[0]);
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index c787f38..e2c04dc 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -344,31 +344,7 @@
 	/* move myself to free_list */
 	list_move(&desc->desc_node, &atchan->free_list);
 
-	/* unmap dma addresses (not on slave channels) */
-	if (!atchan->chan_common.private) {
-		struct device *parent = chan2parent(&atchan->chan_common);
-		if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-			if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-				dma_unmap_single(parent,
-						desc->lli.daddr,
-						desc->len, DMA_FROM_DEVICE);
-			else
-				dma_unmap_page(parent,
-						desc->lli.daddr,
-						desc->len, DMA_FROM_DEVICE);
-		}
-		if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-			if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-				dma_unmap_single(parent,
-						desc->lli.saddr,
-						desc->len, DMA_TO_DEVICE);
-			else
-				dma_unmap_page(parent,
-						desc->lli.saddr,
-						desc->len, DMA_TO_DEVICE);
-		}
-	}
-
+	dma_descriptor_unmap(txd);
 	/* for cyclic transfers,
 	 * no need to replay callback function while stopping */
 	if (!atc_chan_is_cyclic(atchan)) {
@@ -1102,7 +1078,7 @@
 	int bytes = 0;
 
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret == DMA_SUCCESS)
+	if (ret == DMA_COMPLETE)
 		return ret;
 	/*
 	 * There's no point calculating the residue if there's
diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c
index 31011d2..3c6716e 100644
--- a/drivers/dma/coh901318.c
+++ b/drivers/dma/coh901318.c
@@ -2369,7 +2369,7 @@
 	enum dma_status ret;
 
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret == DMA_SUCCESS)
+	if (ret == DMA_COMPLETE)
 		return ret;
 
 	dma_set_residue(txstate, coh901318_get_bytes_left(chan));
@@ -2694,7 +2694,7 @@
 	if (irq < 0)
 		return irq;
 
-	err = devm_request_irq(&pdev->dev, irq, dma_irq_handler, IRQF_DISABLED,
+	err = devm_request_irq(&pdev->dev, irq, dma_irq_handler, 0,
 			       "coh901318", base);
 	if (err)
 		return err;
diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c
index 7c82b92..c29dacf 100644
--- a/drivers/dma/cppi41.c
+++ b/drivers/dma/cppi41.c
@@ -141,6 +141,9 @@
 	const struct chan_queues *queues_rx;
 	const struct chan_queues *queues_tx;
 	struct chan_queues td_queue;
+
+	/* context for suspend/resume */
+	unsigned int dma_tdfdq;
 };
 
 #define FIST_COMPLETION_QUEUE	93
@@ -263,6 +266,15 @@
 	return val & ((1 << (DESC_LENGTH_BITS_NUM + 1)) - 1);
 }
 
+static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num)
+{
+	u32 desc;
+
+	desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(queue_num));
+	desc &= ~0x1f;
+	return desc;
+}
+
 static irqreturn_t cppi41_irq(int irq, void *data)
 {
 	struct cppi41_dd *cdd = data;
@@ -300,8 +312,7 @@
 			q_num = __fls(val);
 			val &= ~(1 << q_num);
 			q_num += 32 * i;
-			desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(q_num));
-			desc &= ~0x1f;
+			desc = cppi41_pop_desc(cdd, q_num);
 			c = desc_to_chan(cdd, desc);
 			if (WARN_ON(!c)) {
 				pr_err("%s() q %d desc %08x\n", __func__,
@@ -353,7 +364,7 @@
 
 	/* lock */
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (txstate && ret == DMA_SUCCESS)
+	if (txstate && ret == DMA_COMPLETE)
 		txstate->residue = c->residue;
 	/* unlock */
 
@@ -517,15 +528,6 @@
 	d->pd0 = DESC_TYPE_TEARD << DESC_TYPE;
 }
 
-static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num)
-{
-	u32 desc;
-
-	desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(queue_num));
-	desc &= ~0x1f;
-	return desc;
-}
-
 static int cppi41_tear_down_chan(struct cppi41_channel *c)
 {
 	struct cppi41_dd *cdd = c->cdd;
@@ -561,36 +563,26 @@
 		c->td_retry = 100;
 	}
 
-	if (!c->td_seen) {
-		unsigned td_comp_queue;
+	if (!c->td_seen || !c->td_desc_seen) {
 
-		if (c->is_tx)
-			td_comp_queue =  cdd->td_queue.complete;
-		else
-			td_comp_queue =  c->q_comp_num;
+		desc_phys = cppi41_pop_desc(cdd, cdd->td_queue.complete);
+		if (!desc_phys)
+			desc_phys = cppi41_pop_desc(cdd, c->q_comp_num);
 
-		desc_phys = cppi41_pop_desc(cdd, td_comp_queue);
-		if (desc_phys) {
-			__iormb();
-
-			if (desc_phys == td_desc_phys) {
-				u32 pd0;
-				pd0 = td->pd0;
-				WARN_ON((pd0 >> DESC_TYPE) != DESC_TYPE_TEARD);
-				WARN_ON(!c->is_tx && !(pd0 & TD_DESC_IS_RX));
-				WARN_ON((pd0 & 0x1f) != c->port_num);
-			} else {
-				WARN_ON_ONCE(1);
-			}
-			c->td_seen = 1;
-		}
-	}
-	if (!c->td_desc_seen) {
-		desc_phys = cppi41_pop_desc(cdd, c->q_comp_num);
-		if (desc_phys) {
-			__iormb();
-			WARN_ON(c->desc_phys != desc_phys);
+		if (desc_phys == c->desc_phys) {
 			c->td_desc_seen = 1;
+
+		} else if (desc_phys == td_desc_phys) {
+			u32 pd0;
+
+			__iormb();
+			pd0 = td->pd0;
+			WARN_ON((pd0 >> DESC_TYPE) != DESC_TYPE_TEARD);
+			WARN_ON(!c->is_tx && !(pd0 & TD_DESC_IS_RX));
+			WARN_ON((pd0 & 0x1f) != c->port_num);
+			c->td_seen = 1;
+		} else if (desc_phys) {
+			WARN_ON_ONCE(1);
 		}
 	}
 	c->td_retry--;
@@ -609,7 +601,7 @@
 
 	WARN_ON(!c->td_retry);
 	if (!c->td_desc_seen) {
-		desc_phys = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num));
+		desc_phys = cppi41_pop_desc(cdd, c->q_num);
 		WARN_ON(!desc_phys);
 	}
 
@@ -674,14 +666,14 @@
 	}
 }
 
-static int cppi41_add_chans(struct platform_device *pdev, struct cppi41_dd *cdd)
+static int cppi41_add_chans(struct device *dev, struct cppi41_dd *cdd)
 {
 	struct cppi41_channel *cchan;
 	int i;
 	int ret;
 	u32 n_chans;
 
-	ret = of_property_read_u32(pdev->dev.of_node, "#dma-channels",
+	ret = of_property_read_u32(dev->of_node, "#dma-channels",
 			&n_chans);
 	if (ret)
 		return ret;
@@ -719,7 +711,7 @@
 	return -ENOMEM;
 }
 
-static void purge_descs(struct platform_device *pdev, struct cppi41_dd *cdd)
+static void purge_descs(struct device *dev, struct cppi41_dd *cdd)
 {
 	unsigned int mem_decs;
 	int i;
@@ -731,7 +723,7 @@
 		cppi_writel(0, cdd->qmgr_mem + QMGR_MEMBASE(i));
 		cppi_writel(0, cdd->qmgr_mem + QMGR_MEMCTRL(i));
 
-		dma_free_coherent(&pdev->dev, mem_decs, cdd->cd,
+		dma_free_coherent(dev, mem_decs, cdd->cd,
 				cdd->descs_phys);
 	}
 }
@@ -741,19 +733,19 @@
 	cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL);
 }
 
-static void deinit_cpii41(struct platform_device *pdev, struct cppi41_dd *cdd)
+static void deinit_cppi41(struct device *dev, struct cppi41_dd *cdd)
 {
 	disable_sched(cdd);
 
-	purge_descs(pdev, cdd);
+	purge_descs(dev, cdd);
 
 	cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE);
 	cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE);
-	dma_free_coherent(&pdev->dev, QMGR_SCRATCH_SIZE, cdd->qmgr_scratch,
+	dma_free_coherent(dev, QMGR_SCRATCH_SIZE, cdd->qmgr_scratch,
 			cdd->scratch_phys);
 }
 
-static int init_descs(struct platform_device *pdev, struct cppi41_dd *cdd)
+static int init_descs(struct device *dev, struct cppi41_dd *cdd)
 {
 	unsigned int desc_size;
 	unsigned int mem_decs;
@@ -777,7 +769,7 @@
 		reg |= ilog2(ALLOC_DECS_NUM) - 5;
 
 		BUILD_BUG_ON(DESCS_AREAS != 1);
-		cdd->cd = dma_alloc_coherent(&pdev->dev, mem_decs,
+		cdd->cd = dma_alloc_coherent(dev, mem_decs,
 				&cdd->descs_phys, GFP_KERNEL);
 		if (!cdd->cd)
 			return -ENOMEM;
@@ -813,12 +805,12 @@
 	cppi_writel(reg, cdd->sched_mem + DMA_SCHED_CTRL);
 }
 
-static int init_cppi41(struct platform_device *pdev, struct cppi41_dd *cdd)
+static int init_cppi41(struct device *dev, struct cppi41_dd *cdd)
 {
 	int ret;
 
 	BUILD_BUG_ON(QMGR_SCRATCH_SIZE > ((1 << 14) - 1));
-	cdd->qmgr_scratch = dma_alloc_coherent(&pdev->dev, QMGR_SCRATCH_SIZE,
+	cdd->qmgr_scratch = dma_alloc_coherent(dev, QMGR_SCRATCH_SIZE,
 			&cdd->scratch_phys, GFP_KERNEL);
 	if (!cdd->qmgr_scratch)
 		return -ENOMEM;
@@ -827,7 +819,7 @@
 	cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE);
 	cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE);
 
-	ret = init_descs(pdev, cdd);
+	ret = init_descs(dev, cdd);
 	if (ret)
 		goto err_td;
 
@@ -835,7 +827,7 @@
 	init_sched(cdd);
 	return 0;
 err_td:
-	deinit_cpii41(pdev, cdd);
+	deinit_cppi41(dev, cdd);
 	return ret;
 }
 
@@ -914,11 +906,11 @@
 };
 MODULE_DEVICE_TABLE(of, cppi41_dma_ids);
 
-static const struct cppi_glue_infos *get_glue_info(struct platform_device *pdev)
+static const struct cppi_glue_infos *get_glue_info(struct device *dev)
 {
 	const struct of_device_id *of_id;
 
-	of_id = of_match_node(cppi41_dma_ids, pdev->dev.of_node);
+	of_id = of_match_node(cppi41_dma_ids, dev->of_node);
 	if (!of_id)
 		return NULL;
 	return of_id->data;
@@ -927,11 +919,12 @@
 static int cppi41_dma_probe(struct platform_device *pdev)
 {
 	struct cppi41_dd *cdd;
+	struct device *dev = &pdev->dev;
 	const struct cppi_glue_infos *glue_info;
 	int irq;
 	int ret;
 
-	glue_info = get_glue_info(pdev);
+	glue_info = get_glue_info(dev);
 	if (!glue_info)
 		return -EINVAL;
 
@@ -946,14 +939,14 @@
 	cdd->ddev.device_issue_pending = cppi41_dma_issue_pending;
 	cdd->ddev.device_prep_slave_sg = cppi41_dma_prep_slave_sg;
 	cdd->ddev.device_control = cppi41_dma_control;
-	cdd->ddev.dev = &pdev->dev;
+	cdd->ddev.dev = dev;
 	INIT_LIST_HEAD(&cdd->ddev.channels);
 	cpp41_dma_info.dma_cap = cdd->ddev.cap_mask;
 
-	cdd->usbss_mem = of_iomap(pdev->dev.of_node, 0);
-	cdd->ctrl_mem = of_iomap(pdev->dev.of_node, 1);
-	cdd->sched_mem = of_iomap(pdev->dev.of_node, 2);
-	cdd->qmgr_mem = of_iomap(pdev->dev.of_node, 3);
+	cdd->usbss_mem = of_iomap(dev->of_node, 0);
+	cdd->ctrl_mem = of_iomap(dev->of_node, 1);
+	cdd->sched_mem = of_iomap(dev->of_node, 2);
+	cdd->qmgr_mem = of_iomap(dev->of_node, 3);
 
 	if (!cdd->usbss_mem || !cdd->ctrl_mem || !cdd->sched_mem ||
 			!cdd->qmgr_mem) {
@@ -961,31 +954,31 @@
 		goto err_remap;
 	}
 
-	pm_runtime_enable(&pdev->dev);
-	ret = pm_runtime_get_sync(&pdev->dev);
-	if (ret)
+	pm_runtime_enable(dev);
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0)
 		goto err_get_sync;
 
 	cdd->queues_rx = glue_info->queues_rx;
 	cdd->queues_tx = glue_info->queues_tx;
 	cdd->td_queue = glue_info->td_queue;
 
-	ret = init_cppi41(pdev, cdd);
+	ret = init_cppi41(dev, cdd);
 	if (ret)
 		goto err_init_cppi;
 
-	ret = cppi41_add_chans(pdev, cdd);
+	ret = cppi41_add_chans(dev, cdd);
 	if (ret)
 		goto err_chans;
 
-	irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+	irq = irq_of_parse_and_map(dev->of_node, 0);
 	if (!irq)
 		goto err_irq;
 
 	cppi_writel(USBSS_IRQ_PD_COMP, cdd->usbss_mem + USBSS_IRQ_ENABLER);
 
 	ret = request_irq(irq, glue_info->isr, IRQF_SHARED,
-			dev_name(&pdev->dev), cdd);
+			dev_name(dev), cdd);
 	if (ret)
 		goto err_irq;
 	cdd->irq = irq;
@@ -994,7 +987,7 @@
 	if (ret)
 		goto err_dma_reg;
 
-	ret = of_dma_controller_register(pdev->dev.of_node,
+	ret = of_dma_controller_register(dev->of_node,
 			cppi41_dma_xlate, &cpp41_dma_info);
 	if (ret)
 		goto err_of;
@@ -1009,11 +1002,11 @@
 	cppi_writel(0, cdd->usbss_mem + USBSS_IRQ_CLEARR);
 	cleanup_chans(cdd);
 err_chans:
-	deinit_cpii41(pdev, cdd);
+	deinit_cppi41(dev, cdd);
 err_init_cppi:
-	pm_runtime_put(&pdev->dev);
+	pm_runtime_put(dev);
 err_get_sync:
-	pm_runtime_disable(&pdev->dev);
+	pm_runtime_disable(dev);
 	iounmap(cdd->usbss_mem);
 	iounmap(cdd->ctrl_mem);
 	iounmap(cdd->sched_mem);
@@ -1033,7 +1026,7 @@
 	cppi_writel(0, cdd->usbss_mem + USBSS_IRQ_CLEARR);
 	free_irq(cdd->irq, cdd);
 	cleanup_chans(cdd);
-	deinit_cpii41(pdev, cdd);
+	deinit_cppi41(&pdev->dev, cdd);
 	iounmap(cdd->usbss_mem);
 	iounmap(cdd->ctrl_mem);
 	iounmap(cdd->sched_mem);
@@ -1044,12 +1037,53 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int cppi41_suspend(struct device *dev)
+{
+	struct cppi41_dd *cdd = dev_get_drvdata(dev);
+
+	cdd->dma_tdfdq = cppi_readl(cdd->ctrl_mem + DMA_TDFDQ);
+	cppi_writel(0, cdd->usbss_mem + USBSS_IRQ_CLEARR);
+	disable_sched(cdd);
+
+	return 0;
+}
+
+static int cppi41_resume(struct device *dev)
+{
+	struct cppi41_dd *cdd = dev_get_drvdata(dev);
+	struct cppi41_channel *c;
+	int i;
+
+	for (i = 0; i < DESCS_AREAS; i++)
+		cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i));
+
+	list_for_each_entry(c, &cdd->ddev.channels, chan.device_node)
+		if (!c->is_tx)
+			cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0);
+
+	init_sched(cdd);
+
+	cppi_writel(cdd->dma_tdfdq, cdd->ctrl_mem + DMA_TDFDQ);
+	cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE);
+	cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE);
+	cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE);
+
+	cppi_writel(USBSS_IRQ_PD_COMP, cdd->usbss_mem + USBSS_IRQ_ENABLER);
+
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(cppi41_pm_ops, cppi41_suspend, cppi41_resume);
+
 static struct platform_driver cpp41_dma_driver = {
 	.probe  = cppi41_dma_probe,
 	.remove = cppi41_dma_remove,
 	.driver = {
 		.name = "cppi41-dma-engine",
 		.owner = THIS_MODULE,
+		.pm = &cppi41_pm_ops,
 		.of_match_table = of_match_ptr(cppi41_dma_ids),
 	},
 };
diff --git a/drivers/dma/dma-jz4740.c b/drivers/dma/dma-jz4740.c
index b0c0c82..94c380f 100644
--- a/drivers/dma/dma-jz4740.c
+++ b/drivers/dma/dma-jz4740.c
@@ -491,7 +491,7 @@
 	unsigned long flags;
 
 	status = dma_cookie_status(c, cookie, state);
-	if (status == DMA_SUCCESS || !state)
+	if (status == DMA_COMPLETE || !state)
 		return status;
 
 	spin_lock_irqsave(&chan->vchan.lock, flags);
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 9162ac8..ea806bd 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -65,6 +65,7 @@
 #include <linux/acpi.h>
 #include <linux/acpi_dma.h>
 #include <linux/of_dma.h>
+#include <linux/mempool.h>
 
 static DEFINE_MUTEX(dma_list_mutex);
 static DEFINE_IDR(dma_idr);
@@ -901,98 +902,132 @@
 }
 EXPORT_SYMBOL(dma_async_device_unregister);
 
-/**
- * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
- * @chan: DMA channel to offload copy to
- * @dest: destination address (virtual)
- * @src: source address (virtual)
- * @len: length
- *
- * Both @dest and @src must be mappable to a bus address according to the
- * DMA mapping API rules for streaming mappings.
- * Both @dest and @src must stay memory resident (kernel memory or locked
- * user space pages).
- */
-dma_cookie_t
-dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest,
-			void *src, size_t len)
+struct dmaengine_unmap_pool {
+	struct kmem_cache *cache;
+	const char *name;
+	mempool_t *pool;
+	size_t size;
+};
+
+#define __UNMAP_POOL(x) { .size = x, .name = "dmaengine-unmap-" __stringify(x) }
+static struct dmaengine_unmap_pool unmap_pool[] = {
+	__UNMAP_POOL(2),
+	#if IS_ENABLED(CONFIG_ASYNC_TX_DMA)
+	__UNMAP_POOL(16),
+	__UNMAP_POOL(128),
+	__UNMAP_POOL(256),
+	#endif
+};
+
+static struct dmaengine_unmap_pool *__get_unmap_pool(int nr)
 {
-	struct dma_device *dev = chan->device;
-	struct dma_async_tx_descriptor *tx;
-	dma_addr_t dma_dest, dma_src;
-	dma_cookie_t cookie;
-	unsigned long flags;
+	int order = get_count_order(nr);
 
-	dma_src = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE);
-	dma_dest = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE);
-	flags = DMA_CTRL_ACK |
-		DMA_COMPL_SRC_UNMAP_SINGLE |
-		DMA_COMPL_DEST_UNMAP_SINGLE;
-	tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
+	switch (order) {
+	case 0 ... 1:
+		return &unmap_pool[0];
+	case 2 ... 4:
+		return &unmap_pool[1];
+	case 5 ... 7:
+		return &unmap_pool[2];
+	case 8:
+		return &unmap_pool[3];
+	default:
+		BUG();
+		return NULL;
+	}
+}
 
-	if (!tx) {
-		dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
-		dma_unmap_single(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
-		return -ENOMEM;
+static void dmaengine_unmap(struct kref *kref)
+{
+	struct dmaengine_unmap_data *unmap = container_of(kref, typeof(*unmap), kref);
+	struct device *dev = unmap->dev;
+	int cnt, i;
+
+	cnt = unmap->to_cnt;
+	for (i = 0; i < cnt; i++)
+		dma_unmap_page(dev, unmap->addr[i], unmap->len,
+			       DMA_TO_DEVICE);
+	cnt += unmap->from_cnt;
+	for (; i < cnt; i++)
+		dma_unmap_page(dev, unmap->addr[i], unmap->len,
+			       DMA_FROM_DEVICE);
+	cnt += unmap->bidi_cnt;
+	for (; i < cnt; i++) {
+		if (unmap->addr[i] == 0)
+			continue;
+		dma_unmap_page(dev, unmap->addr[i], unmap->len,
+			       DMA_BIDIRECTIONAL);
+	}
+	mempool_free(unmap, __get_unmap_pool(cnt)->pool);
+}
+
+void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap)
+{
+	if (unmap)
+		kref_put(&unmap->kref, dmaengine_unmap);
+}
+EXPORT_SYMBOL_GPL(dmaengine_unmap_put);
+
+static void dmaengine_destroy_unmap_pool(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(unmap_pool); i++) {
+		struct dmaengine_unmap_pool *p = &unmap_pool[i];
+
+		if (p->pool)
+			mempool_destroy(p->pool);
+		p->pool = NULL;
+		if (p->cache)
+			kmem_cache_destroy(p->cache);
+		p->cache = NULL;
+	}
+}
+
+static int __init dmaengine_init_unmap_pool(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(unmap_pool); i++) {
+		struct dmaengine_unmap_pool *p = &unmap_pool[i];
+		size_t size;
+
+		size = sizeof(struct dmaengine_unmap_data) +
+		       sizeof(dma_addr_t) * p->size;
+
+		p->cache = kmem_cache_create(p->name, size, 0,
+					     SLAB_HWCACHE_ALIGN, NULL);
+		if (!p->cache)
+			break;
+		p->pool = mempool_create_slab_pool(1, p->cache);
+		if (!p->pool)
+			break;
 	}
 
-	tx->callback = NULL;
-	cookie = tx->tx_submit(tx);
+	if (i == ARRAY_SIZE(unmap_pool))
+		return 0;
 
-	preempt_disable();
-	__this_cpu_add(chan->local->bytes_transferred, len);
-	__this_cpu_inc(chan->local->memcpy_count);
-	preempt_enable();
-
-	return cookie;
+	dmaengine_destroy_unmap_pool();
+	return -ENOMEM;
 }
-EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf);
 
-/**
- * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
- * @chan: DMA channel to offload copy to
- * @page: destination page
- * @offset: offset in page to copy to
- * @kdata: source address (virtual)
- * @len: length
- *
- * Both @page/@offset and @kdata must be mappable to a bus address according
- * to the DMA mapping API rules for streaming mappings.
- * Both @page/@offset and @kdata must stay memory resident (kernel memory or
- * locked user space pages)
- */
-dma_cookie_t
-dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page,
-			unsigned int offset, void *kdata, size_t len)
+struct dmaengine_unmap_data *
+dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags)
 {
-	struct dma_device *dev = chan->device;
-	struct dma_async_tx_descriptor *tx;
-	dma_addr_t dma_dest, dma_src;
-	dma_cookie_t cookie;
-	unsigned long flags;
+	struct dmaengine_unmap_data *unmap;
 
-	dma_src = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE);
-	dma_dest = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE);
-	flags = DMA_CTRL_ACK | DMA_COMPL_SRC_UNMAP_SINGLE;
-	tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
+	unmap = mempool_alloc(__get_unmap_pool(nr)->pool, flags);
+	if (!unmap)
+		return NULL;
 
-	if (!tx) {
-		dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
-		dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
-		return -ENOMEM;
-	}
+	memset(unmap, 0, sizeof(*unmap));
+	kref_init(&unmap->kref);
+	unmap->dev = dev;
 
-	tx->callback = NULL;
-	cookie = tx->tx_submit(tx);
-
-	preempt_disable();
-	__this_cpu_add(chan->local->bytes_transferred, len);
-	__this_cpu_inc(chan->local->memcpy_count);
-	preempt_enable();
-
-	return cookie;
+	return unmap;
 }
-EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg);
+EXPORT_SYMBOL(dmaengine_get_unmap_data);
 
 /**
  * dma_async_memcpy_pg_to_pg - offloaded copy from page to page
@@ -1015,24 +1050,33 @@
 {
 	struct dma_device *dev = chan->device;
 	struct dma_async_tx_descriptor *tx;
-	dma_addr_t dma_dest, dma_src;
+	struct dmaengine_unmap_data *unmap;
 	dma_cookie_t cookie;
 	unsigned long flags;
 
-	dma_src = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE);
-	dma_dest = dma_map_page(dev->dev, dest_pg, dest_off, len,
-				DMA_FROM_DEVICE);
+	unmap = dmaengine_get_unmap_data(dev->dev, 2, GFP_NOIO);
+	if (!unmap)
+		return -ENOMEM;
+
+	unmap->to_cnt = 1;
+	unmap->from_cnt = 1;
+	unmap->addr[0] = dma_map_page(dev->dev, src_pg, src_off, len,
+				      DMA_TO_DEVICE);
+	unmap->addr[1] = dma_map_page(dev->dev, dest_pg, dest_off, len,
+				      DMA_FROM_DEVICE);
+	unmap->len = len;
 	flags = DMA_CTRL_ACK;
-	tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
+	tx = dev->device_prep_dma_memcpy(chan, unmap->addr[1], unmap->addr[0],
+					 len, flags);
 
 	if (!tx) {
-		dma_unmap_page(dev->dev, dma_src, len, DMA_TO_DEVICE);
-		dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
+		dmaengine_unmap_put(unmap);
 		return -ENOMEM;
 	}
 
-	tx->callback = NULL;
+	dma_set_unmap(tx, unmap);
 	cookie = tx->tx_submit(tx);
+	dmaengine_unmap_put(unmap);
 
 	preempt_disable();
 	__this_cpu_add(chan->local->bytes_transferred, len);
@@ -1043,6 +1087,52 @@
 }
 EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg);
 
+/**
+ * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
+ * @chan: DMA channel to offload copy to
+ * @dest: destination address (virtual)
+ * @src: source address (virtual)
+ * @len: length
+ *
+ * Both @dest and @src must be mappable to a bus address according to the
+ * DMA mapping API rules for streaming mappings.
+ * Both @dest and @src must stay memory resident (kernel memory or locked
+ * user space pages).
+ */
+dma_cookie_t
+dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest,
+			    void *src, size_t len)
+{
+	return dma_async_memcpy_pg_to_pg(chan, virt_to_page(dest),
+					 (unsigned long) dest & ~PAGE_MASK,
+					 virt_to_page(src),
+					 (unsigned long) src & ~PAGE_MASK, len);
+}
+EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf);
+
+/**
+ * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
+ * @chan: DMA channel to offload copy to
+ * @page: destination page
+ * @offset: offset in page to copy to
+ * @kdata: source address (virtual)
+ * @len: length
+ *
+ * Both @page/@offset and @kdata must be mappable to a bus address according
+ * to the DMA mapping API rules for streaming mappings.
+ * Both @page/@offset and @kdata must stay memory resident (kernel memory or
+ * locked user space pages)
+ */
+dma_cookie_t
+dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page,
+			   unsigned int offset, void *kdata, size_t len)
+{
+	return dma_async_memcpy_pg_to_pg(chan, page, offset,
+					 virt_to_page(kdata),
+					 (unsigned long) kdata & ~PAGE_MASK, len);
+}
+EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg);
+
 void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
 	struct dma_chan *chan)
 {
@@ -1062,7 +1152,7 @@
 	unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
 
 	if (!tx)
-		return DMA_SUCCESS;
+		return DMA_COMPLETE;
 
 	while (tx->cookie == -EBUSY) {
 		if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
@@ -1116,6 +1206,10 @@
 
 static int __init dma_bus_init(void)
 {
+	int err = dmaengine_init_unmap_pool();
+
+	if (err)
+		return err;
 	return class_register(&dma_devclass);
 }
 arch_initcall(dma_bus_init);
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 92f796c..20f9a3a 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -8,6 +8,8 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
@@ -19,10 +21,6 @@
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/wait.h>
-#include <linux/ctype.h>
-#include <linux/debugfs.h>
-#include <linux/uaccess.h>
-#include <linux/seq_file.h>
 
 static unsigned int test_buf_size = 16384;
 module_param(test_buf_size, uint, S_IRUGO | S_IWUSR);
@@ -68,92 +66,13 @@
 MODULE_PARM_DESC(timeout, "Transfer Timeout in msec (default: 3000), "
 		 "Pass -1 for infinite timeout");
 
-/* Maximum amount of mismatched bytes in buffer to print */
-#define MAX_ERROR_COUNT		32
+static bool noverify;
+module_param(noverify, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(noverify, "Disable random data setup and verification");
 
-/*
- * Initialization patterns. All bytes in the source buffer has bit 7
- * set, all bytes in the destination buffer has bit 7 cleared.
- *
- * Bit 6 is set for all bytes which are to be copied by the DMA
- * engine. Bit 5 is set for all bytes which are to be overwritten by
- * the DMA engine.
- *
- * The remaining bits are the inverse of a counter which increments by
- * one for each byte address.
- */
-#define PATTERN_SRC		0x80
-#define PATTERN_DST		0x00
-#define PATTERN_COPY		0x40
-#define PATTERN_OVERWRITE	0x20
-#define PATTERN_COUNT_MASK	0x1f
-
-enum dmatest_error_type {
-	DMATEST_ET_OK,
-	DMATEST_ET_MAP_SRC,
-	DMATEST_ET_MAP_DST,
-	DMATEST_ET_PREP,
-	DMATEST_ET_SUBMIT,
-	DMATEST_ET_TIMEOUT,
-	DMATEST_ET_DMA_ERROR,
-	DMATEST_ET_DMA_IN_PROGRESS,
-	DMATEST_ET_VERIFY,
-	DMATEST_ET_VERIFY_BUF,
-};
-
-struct dmatest_verify_buffer {
-	unsigned int	index;
-	u8		expected;
-	u8		actual;
-};
-
-struct dmatest_verify_result {
-	unsigned int			error_count;
-	struct dmatest_verify_buffer	data[MAX_ERROR_COUNT];
-	u8				pattern;
-	bool				is_srcbuf;
-};
-
-struct dmatest_thread_result {
-	struct list_head	node;
-	unsigned int		n;
-	unsigned int		src_off;
-	unsigned int		dst_off;
-	unsigned int		len;
-	enum dmatest_error_type	type;
-	union {
-		unsigned long			data;
-		dma_cookie_t			cookie;
-		enum dma_status			status;
-		int				error;
-		struct dmatest_verify_result	*vr;
-	};
-};
-
-struct dmatest_result {
-	struct list_head	node;
-	char			*name;
-	struct list_head	results;
-};
-
-struct dmatest_info;
-
-struct dmatest_thread {
-	struct list_head	node;
-	struct dmatest_info	*info;
-	struct task_struct	*task;
-	struct dma_chan		*chan;
-	u8			**srcs;
-	u8			**dsts;
-	enum dma_transaction_type type;
-	bool			done;
-};
-
-struct dmatest_chan {
-	struct list_head	node;
-	struct dma_chan		*chan;
-	struct list_head	threads;
-};
+static bool verbose;
+module_param(verbose, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(verbose, "Enable \"success\" result messages (default: off)");
 
 /**
  * struct dmatest_params - test parameters.
@@ -177,6 +96,7 @@
 	unsigned int	xor_sources;
 	unsigned int	pq_sources;
 	int		timeout;
+	bool		noverify;
 };
 
 /**
@@ -184,7 +104,7 @@
  * @params:		test parameters
  * @lock:		access protection to the fields of this structure
  */
-struct dmatest_info {
+static struct dmatest_info {
 	/* Test parameters */
 	struct dmatest_params	params;
 
@@ -192,16 +112,95 @@
 	struct list_head	channels;
 	unsigned int		nr_channels;
 	struct mutex		lock;
-
-	/* debugfs related stuff */
-	struct dentry		*root;
-
-	/* Test results */
-	struct list_head	results;
-	struct mutex		results_lock;
+	bool			did_init;
+} test_info = {
+	.channels = LIST_HEAD_INIT(test_info.channels),
+	.lock = __MUTEX_INITIALIZER(test_info.lock),
 };
 
-static struct dmatest_info test_info;
+static int dmatest_run_set(const char *val, const struct kernel_param *kp);
+static int dmatest_run_get(char *val, const struct kernel_param *kp);
+static struct kernel_param_ops run_ops = {
+	.set = dmatest_run_set,
+	.get = dmatest_run_get,
+};
+static bool dmatest_run;
+module_param_cb(run, &run_ops, &dmatest_run, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(run, "Run the test (default: false)");
+
+/* Maximum amount of mismatched bytes in buffer to print */
+#define MAX_ERROR_COUNT		32
+
+/*
+ * Initialization patterns. All bytes in the source buffer has bit 7
+ * set, all bytes in the destination buffer has bit 7 cleared.
+ *
+ * Bit 6 is set for all bytes which are to be copied by the DMA
+ * engine. Bit 5 is set for all bytes which are to be overwritten by
+ * the DMA engine.
+ *
+ * The remaining bits are the inverse of a counter which increments by
+ * one for each byte address.
+ */
+#define PATTERN_SRC		0x80
+#define PATTERN_DST		0x00
+#define PATTERN_COPY		0x40
+#define PATTERN_OVERWRITE	0x20
+#define PATTERN_COUNT_MASK	0x1f
+
+struct dmatest_thread {
+	struct list_head	node;
+	struct dmatest_info	*info;
+	struct task_struct	*task;
+	struct dma_chan		*chan;
+	u8			**srcs;
+	u8			**dsts;
+	enum dma_transaction_type type;
+	bool			done;
+};
+
+struct dmatest_chan {
+	struct list_head	node;
+	struct dma_chan		*chan;
+	struct list_head	threads;
+};
+
+static DECLARE_WAIT_QUEUE_HEAD(thread_wait);
+static bool wait;
+
+static bool is_threaded_test_run(struct dmatest_info *info)
+{
+	struct dmatest_chan *dtc;
+
+	list_for_each_entry(dtc, &info->channels, node) {
+		struct dmatest_thread *thread;
+
+		list_for_each_entry(thread, &dtc->threads, node) {
+			if (!thread->done)
+				return true;
+		}
+	}
+
+	return false;
+}
+
+static int dmatest_wait_get(char *val, const struct kernel_param *kp)
+{
+	struct dmatest_info *info = &test_info;
+	struct dmatest_params *params = &info->params;
+
+	if (params->iterations)
+		wait_event(thread_wait, !is_threaded_test_run(info));
+	wait = true;
+	return param_get_bool(val, kp);
+}
+
+static struct kernel_param_ops wait_ops = {
+	.get = dmatest_wait_get,
+	.set = param_set_bool,
+};
+module_param_cb(wait, &wait_ops, &wait, S_IRUGO);
+MODULE_PARM_DESC(wait, "Wait for tests to complete (default: false)");
 
 static bool dmatest_match_channel(struct dmatest_params *params,
 		struct dma_chan *chan)
@@ -223,7 +222,7 @@
 {
 	unsigned long buf;
 
-	get_random_bytes(&buf, sizeof(buf));
+	prandom_bytes(&buf, sizeof(buf));
 	return buf;
 }
 
@@ -262,9 +261,31 @@
 	}
 }
 
-static unsigned int dmatest_verify(struct dmatest_verify_result *vr, u8 **bufs,
-		unsigned int start, unsigned int end, unsigned int counter,
-		u8 pattern, bool is_srcbuf)
+static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index,
+		unsigned int counter, bool is_srcbuf)
+{
+	u8		diff = actual ^ pattern;
+	u8		expected = pattern | (~counter & PATTERN_COUNT_MASK);
+	const char	*thread_name = current->comm;
+
+	if (is_srcbuf)
+		pr_warn("%s: srcbuf[0x%x] overwritten! Expected %02x, got %02x\n",
+			thread_name, index, expected, actual);
+	else if ((pattern & PATTERN_COPY)
+			&& (diff & (PATTERN_COPY | PATTERN_OVERWRITE)))
+		pr_warn("%s: dstbuf[0x%x] not copied! Expected %02x, got %02x\n",
+			thread_name, index, expected, actual);
+	else if (diff & PATTERN_SRC)
+		pr_warn("%s: dstbuf[0x%x] was copied! Expected %02x, got %02x\n",
+			thread_name, index, expected, actual);
+	else
+		pr_warn("%s: dstbuf[0x%x] mismatch! Expected %02x, got %02x\n",
+			thread_name, index, expected, actual);
+}
+
+static unsigned int dmatest_verify(u8 **bufs, unsigned int start,
+		unsigned int end, unsigned int counter, u8 pattern,
+		bool is_srcbuf)
 {
 	unsigned int i;
 	unsigned int error_count = 0;
@@ -272,7 +293,6 @@
 	u8 expected;
 	u8 *buf;
 	unsigned int counter_orig = counter;
-	struct dmatest_verify_buffer *vb;
 
 	for (; (buf = *bufs); bufs++) {
 		counter = counter_orig;
@@ -280,12 +300,9 @@
 			actual = buf[i];
 			expected = pattern | (~counter & PATTERN_COUNT_MASK);
 			if (actual != expected) {
-				if (error_count < MAX_ERROR_COUNT && vr) {
-					vb = &vr->data[error_count];
-					vb->index = i;
-					vb->expected = expected;
-					vb->actual = actual;
-				}
+				if (error_count < MAX_ERROR_COUNT)
+					dmatest_mismatch(actual, pattern, i,
+							 counter, is_srcbuf);
 				error_count++;
 			}
 			counter++;
@@ -293,7 +310,7 @@
 	}
 
 	if (error_count > MAX_ERROR_COUNT)
-		pr_warning("%s: %u errors suppressed\n",
+		pr_warn("%s: %u errors suppressed\n",
 			current->comm, error_count - MAX_ERROR_COUNT);
 
 	return error_count;
@@ -313,20 +330,6 @@
 	wake_up_all(done->wait);
 }
 
-static inline void unmap_src(struct device *dev, dma_addr_t *addr, size_t len,
-			     unsigned int count)
-{
-	while (count--)
-		dma_unmap_single(dev, addr[count], len, DMA_TO_DEVICE);
-}
-
-static inline void unmap_dst(struct device *dev, dma_addr_t *addr, size_t len,
-			     unsigned int count)
-{
-	while (count--)
-		dma_unmap_single(dev, addr[count], len, DMA_BIDIRECTIONAL);
-}
-
 static unsigned int min_odd(unsigned int x, unsigned int y)
 {
 	unsigned int val = min(x, y);
@@ -334,172 +337,49 @@
 	return val % 2 ? val : val - 1;
 }
 
-static char *verify_result_get_one(struct dmatest_verify_result *vr,
-		unsigned int i)
+static void result(const char *err, unsigned int n, unsigned int src_off,
+		   unsigned int dst_off, unsigned int len, unsigned long data)
 {
-	struct dmatest_verify_buffer *vb = &vr->data[i];
-	u8 diff = vb->actual ^ vr->pattern;
-	static char buf[512];
-	char *msg;
-
-	if (vr->is_srcbuf)
-		msg = "srcbuf overwritten!";
-	else if ((vr->pattern & PATTERN_COPY)
-			&& (diff & (PATTERN_COPY | PATTERN_OVERWRITE)))
-		msg = "dstbuf not copied!";
-	else if (diff & PATTERN_SRC)
-		msg = "dstbuf was copied!";
-	else
-		msg = "dstbuf mismatch!";
-
-	snprintf(buf, sizeof(buf) - 1, "%s [0x%x] Expected %02x, got %02x", msg,
-		 vb->index, vb->expected, vb->actual);
-
-	return buf;
+	pr_info("%s: result #%u: '%s' with src_off=0x%x dst_off=0x%x len=0x%x (%lu)",
+		current->comm, n, err, src_off, dst_off, len, data);
 }
 
-static char *thread_result_get(const char *name,
-		struct dmatest_thread_result *tr)
+static void dbg_result(const char *err, unsigned int n, unsigned int src_off,
+		       unsigned int dst_off, unsigned int len,
+		       unsigned long data)
 {
-	static const char * const messages[] = {
-		[DMATEST_ET_OK]			= "No errors",
-		[DMATEST_ET_MAP_SRC]		= "src mapping error",
-		[DMATEST_ET_MAP_DST]		= "dst mapping error",
-		[DMATEST_ET_PREP]		= "prep error",
-		[DMATEST_ET_SUBMIT]		= "submit error",
-		[DMATEST_ET_TIMEOUT]		= "test timed out",
-		[DMATEST_ET_DMA_ERROR]		=
-			"got completion callback (DMA_ERROR)",
-		[DMATEST_ET_DMA_IN_PROGRESS]	=
-			"got completion callback (DMA_IN_PROGRESS)",
-		[DMATEST_ET_VERIFY]		= "errors",
-		[DMATEST_ET_VERIFY_BUF]		= "verify errors",
-	};
-	static char buf[512];
-
-	snprintf(buf, sizeof(buf) - 1,
-		 "%s: #%u: %s with src_off=0x%x ""dst_off=0x%x len=0x%x (%lu)",
-		 name, tr->n, messages[tr->type], tr->src_off, tr->dst_off,
-		 tr->len, tr->data);
-
-	return buf;
+	pr_debug("%s: result #%u: '%s' with src_off=0x%x dst_off=0x%x len=0x%x (%lu)",
+		   current->comm, n, err, src_off, dst_off, len, data);
 }
 
-static int thread_result_add(struct dmatest_info *info,
-		struct dmatest_result *r, enum dmatest_error_type type,
-		unsigned int n, unsigned int src_off, unsigned int dst_off,
-		unsigned int len, unsigned long data)
+#define verbose_result(err, n, src_off, dst_off, len, data) ({ \
+	if (verbose) \
+		result(err, n, src_off, dst_off, len, data); \
+	else \
+		dbg_result(err, n, src_off, dst_off, len, data); \
+})
+
+static unsigned long long dmatest_persec(s64 runtime, unsigned int val)
 {
-	struct dmatest_thread_result *tr;
+	unsigned long long per_sec = 1000000;
 
-	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
-	if (!tr)
-		return -ENOMEM;
+	if (runtime <= 0)
+		return 0;
 
-	tr->type = type;
-	tr->n = n;
-	tr->src_off = src_off;
-	tr->dst_off = dst_off;
-	tr->len = len;
-	tr->data = data;
+	/* drop precision until runtime is 32-bits */
+	while (runtime > UINT_MAX) {
+		runtime >>= 1;
+		per_sec <<= 1;
+	}
 
-	mutex_lock(&info->results_lock);
-	list_add_tail(&tr->node, &r->results);
-	mutex_unlock(&info->results_lock);
-
-	if (tr->type == DMATEST_ET_OK)
-		pr_debug("%s\n", thread_result_get(r->name, tr));
-	else
-		pr_warn("%s\n", thread_result_get(r->name, tr));
-
-	return 0;
+	per_sec *= val;
+	do_div(per_sec, runtime);
+	return per_sec;
 }
 
-static unsigned int verify_result_add(struct dmatest_info *info,
-		struct dmatest_result *r, unsigned int n,
-		unsigned int src_off, unsigned int dst_off, unsigned int len,
-		u8 **bufs, int whence, unsigned int counter, u8 pattern,
-		bool is_srcbuf)
+static unsigned long long dmatest_KBs(s64 runtime, unsigned long long len)
 {
-	struct dmatest_verify_result *vr;
-	unsigned int error_count;
-	unsigned int buf_off = is_srcbuf ? src_off : dst_off;
-	unsigned int start, end;
-
-	if (whence < 0) {
-		start = 0;
-		end = buf_off;
-	} else if (whence > 0) {
-		start = buf_off + len;
-		end = info->params.buf_size;
-	} else {
-		start = buf_off;
-		end = buf_off + len;
-	}
-
-	vr = kmalloc(sizeof(*vr), GFP_KERNEL);
-	if (!vr) {
-		pr_warn("dmatest: No memory to store verify result\n");
-		return dmatest_verify(NULL, bufs, start, end, counter, pattern,
-				      is_srcbuf);
-	}
-
-	vr->pattern = pattern;
-	vr->is_srcbuf = is_srcbuf;
-
-	error_count = dmatest_verify(vr, bufs, start, end, counter, pattern,
-				     is_srcbuf);
-	if (error_count) {
-		vr->error_count = error_count;
-		thread_result_add(info, r, DMATEST_ET_VERIFY_BUF, n, src_off,
-				  dst_off, len, (unsigned long)vr);
-		return error_count;
-	}
-
-	kfree(vr);
-	return 0;
-}
-
-static void result_free(struct dmatest_info *info, const char *name)
-{
-	struct dmatest_result *r, *_r;
-
-	mutex_lock(&info->results_lock);
-	list_for_each_entry_safe(r, _r, &info->results, node) {
-		struct dmatest_thread_result *tr, *_tr;
-
-		if (name && strcmp(r->name, name))
-			continue;
-
-		list_for_each_entry_safe(tr, _tr, &r->results, node) {
-			if (tr->type == DMATEST_ET_VERIFY_BUF)
-				kfree(tr->vr);
-			list_del(&tr->node);
-			kfree(tr);
-		}
-
-		kfree(r->name);
-		list_del(&r->node);
-		kfree(r);
-	}
-
-	mutex_unlock(&info->results_lock);
-}
-
-static struct dmatest_result *result_init(struct dmatest_info *info,
-		const char *name)
-{
-	struct dmatest_result *r;
-
-	r = kzalloc(sizeof(*r), GFP_KERNEL);
-	if (r) {
-		r->name = kstrdup(name, GFP_KERNEL);
-		INIT_LIST_HEAD(&r->results);
-		mutex_lock(&info->results_lock);
-		list_add_tail(&r->node, &info->results);
-		mutex_unlock(&info->results_lock);
-	}
-	return r;
+	return dmatest_persec(runtime, len >> 10);
 }
 
 /*
@@ -525,7 +405,6 @@
 	struct dmatest_params	*params;
 	struct dma_chan		*chan;
 	struct dma_device	*dev;
-	const char		*thread_name;
 	unsigned int		src_off, dst_off, len;
 	unsigned int		error_count;
 	unsigned int		failed_tests = 0;
@@ -538,9 +417,10 @@
 	int			src_cnt;
 	int			dst_cnt;
 	int			i;
-	struct dmatest_result	*result;
+	ktime_t			ktime;
+	s64			runtime = 0;
+	unsigned long long	total_len = 0;
 
-	thread_name = current->comm;
 	set_freezable();
 
 	ret = -ENOMEM;
@@ -570,10 +450,6 @@
 	} else
 		goto err_thread_type;
 
-	result = result_init(info, thread_name);
-	if (!result)
-		goto err_srcs;
-
 	thread->srcs = kcalloc(src_cnt+1, sizeof(u8 *), GFP_KERNEL);
 	if (!thread->srcs)
 		goto err_srcs;
@@ -597,17 +473,17 @@
 	set_user_nice(current, 10);
 
 	/*
-	 * src buffers are freed by the DMAEngine code with dma_unmap_single()
-	 * dst buffers are freed by ourselves below
+	 * src and dst buffers are freed by ourselves below
 	 */
-	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT
-	      | DMA_COMPL_SKIP_DEST_UNMAP | DMA_COMPL_SRC_UNMAP_SINGLE;
+	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
 
+	ktime = ktime_get();
 	while (!kthread_should_stop()
 	       && !(params->iterations && total_tests >= params->iterations)) {
 		struct dma_async_tx_descriptor *tx = NULL;
-		dma_addr_t dma_srcs[src_cnt];
-		dma_addr_t dma_dsts[dst_cnt];
+		struct dmaengine_unmap_data *um;
+		dma_addr_t srcs[src_cnt];
+		dma_addr_t *dsts;
 		u8 align = 0;
 
 		total_tests++;
@@ -626,81 +502,103 @@
 			break;
 		}
 
-		len = dmatest_random() % params->buf_size + 1;
+		if (params->noverify) {
+			len = params->buf_size;
+			src_off = 0;
+			dst_off = 0;
+		} else {
+			len = dmatest_random() % params->buf_size + 1;
+			len = (len >> align) << align;
+			if (!len)
+				len = 1 << align;
+			src_off = dmatest_random() % (params->buf_size - len + 1);
+			dst_off = dmatest_random() % (params->buf_size - len + 1);
+
+			src_off = (src_off >> align) << align;
+			dst_off = (dst_off >> align) << align;
+
+			dmatest_init_srcs(thread->srcs, src_off, len,
+					  params->buf_size);
+			dmatest_init_dsts(thread->dsts, dst_off, len,
+					  params->buf_size);
+		}
+
 		len = (len >> align) << align;
 		if (!len)
 			len = 1 << align;
-		src_off = dmatest_random() % (params->buf_size - len + 1);
-		dst_off = dmatest_random() % (params->buf_size - len + 1);
+		total_len += len;
 
-		src_off = (src_off >> align) << align;
-		dst_off = (dst_off >> align) << align;
+		um = dmaengine_get_unmap_data(dev->dev, src_cnt+dst_cnt,
+					      GFP_KERNEL);
+		if (!um) {
+			failed_tests++;
+			result("unmap data NULL", total_tests,
+			       src_off, dst_off, len, ret);
+			continue;
+		}
 
-		dmatest_init_srcs(thread->srcs, src_off, len, params->buf_size);
-		dmatest_init_dsts(thread->dsts, dst_off, len, params->buf_size);
-
+		um->len = params->buf_size;
 		for (i = 0; i < src_cnt; i++) {
-			u8 *buf = thread->srcs[i] + src_off;
+			unsigned long buf = (unsigned long) thread->srcs[i];
+			struct page *pg = virt_to_page(buf);
+			unsigned pg_off = buf & ~PAGE_MASK;
 
-			dma_srcs[i] = dma_map_single(dev->dev, buf, len,
-						     DMA_TO_DEVICE);
-			ret = dma_mapping_error(dev->dev, dma_srcs[i]);
+			um->addr[i] = dma_map_page(dev->dev, pg, pg_off,
+						   um->len, DMA_TO_DEVICE);
+			srcs[i] = um->addr[i] + src_off;
+			ret = dma_mapping_error(dev->dev, um->addr[i]);
 			if (ret) {
-				unmap_src(dev->dev, dma_srcs, len, i);
-				thread_result_add(info, result,
-						  DMATEST_ET_MAP_SRC,
-						  total_tests, src_off, dst_off,
-						  len, ret);
+				dmaengine_unmap_put(um);
+				result("src mapping error", total_tests,
+				       src_off, dst_off, len, ret);
 				failed_tests++;
 				continue;
 			}
+			um->to_cnt++;
 		}
 		/* map with DMA_BIDIRECTIONAL to force writeback/invalidate */
+		dsts = &um->addr[src_cnt];
 		for (i = 0; i < dst_cnt; i++) {
-			dma_dsts[i] = dma_map_single(dev->dev, thread->dsts[i],
-						     params->buf_size,
-						     DMA_BIDIRECTIONAL);
-			ret = dma_mapping_error(dev->dev, dma_dsts[i]);
+			unsigned long buf = (unsigned long) thread->dsts[i];
+			struct page *pg = virt_to_page(buf);
+			unsigned pg_off = buf & ~PAGE_MASK;
+
+			dsts[i] = dma_map_page(dev->dev, pg, pg_off, um->len,
+					       DMA_BIDIRECTIONAL);
+			ret = dma_mapping_error(dev->dev, dsts[i]);
 			if (ret) {
-				unmap_src(dev->dev, dma_srcs, len, src_cnt);
-				unmap_dst(dev->dev, dma_dsts, params->buf_size,
-					  i);
-				thread_result_add(info, result,
-						  DMATEST_ET_MAP_DST,
-						  total_tests, src_off, dst_off,
-						  len, ret);
+				dmaengine_unmap_put(um);
+				result("dst mapping error", total_tests,
+				       src_off, dst_off, len, ret);
 				failed_tests++;
 				continue;
 			}
+			um->bidi_cnt++;
 		}
 
 		if (thread->type == DMA_MEMCPY)
 			tx = dev->device_prep_dma_memcpy(chan,
-							 dma_dsts[0] + dst_off,
-							 dma_srcs[0], len,
-							 flags);
+							 dsts[0] + dst_off,
+							 srcs[0], len, flags);
 		else if (thread->type == DMA_XOR)
 			tx = dev->device_prep_dma_xor(chan,
-						      dma_dsts[0] + dst_off,
-						      dma_srcs, src_cnt,
+						      dsts[0] + dst_off,
+						      srcs, src_cnt,
 						      len, flags);
 		else if (thread->type == DMA_PQ) {
 			dma_addr_t dma_pq[dst_cnt];
 
 			for (i = 0; i < dst_cnt; i++)
-				dma_pq[i] = dma_dsts[i] + dst_off;
-			tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs,
+				dma_pq[i] = dsts[i] + dst_off;
+			tx = dev->device_prep_dma_pq(chan, dma_pq, srcs,
 						     src_cnt, pq_coefs,
 						     len, flags);
 		}
 
 		if (!tx) {
-			unmap_src(dev->dev, dma_srcs, len, src_cnt);
-			unmap_dst(dev->dev, dma_dsts, params->buf_size,
-				  dst_cnt);
-			thread_result_add(info, result, DMATEST_ET_PREP,
-					  total_tests, src_off, dst_off,
-					  len, 0);
+			dmaengine_unmap_put(um);
+			result("prep error", total_tests, src_off,
+			       dst_off, len, ret);
 			msleep(100);
 			failed_tests++;
 			continue;
@@ -712,9 +610,9 @@
 		cookie = tx->tx_submit(tx);
 
 		if (dma_submit_error(cookie)) {
-			thread_result_add(info, result, DMATEST_ET_SUBMIT,
-					  total_tests, src_off, dst_off,
-					  len, cookie);
+			dmaengine_unmap_put(um);
+			result("submit error", total_tests, src_off,
+			       dst_off, len, ret);
 			msleep(100);
 			failed_tests++;
 			continue;
@@ -735,59 +633,59 @@
 			 * free it this time?" dancing.  For now, just
 			 * leave it dangling.
 			 */
-			thread_result_add(info, result, DMATEST_ET_TIMEOUT,
-					  total_tests, src_off, dst_off,
-					  len, 0);
+			dmaengine_unmap_put(um);
+			result("test timed out", total_tests, src_off, dst_off,
+			       len, 0);
 			failed_tests++;
 			continue;
-		} else if (status != DMA_SUCCESS) {
-			enum dmatest_error_type type = (status == DMA_ERROR) ?
-				DMATEST_ET_DMA_ERROR : DMATEST_ET_DMA_IN_PROGRESS;
-			thread_result_add(info, result, type,
-					  total_tests, src_off, dst_off,
-					  len, status);
+		} else if (status != DMA_COMPLETE) {
+			dmaengine_unmap_put(um);
+			result(status == DMA_ERROR ?
+			       "completion error status" :
+			       "completion busy status", total_tests, src_off,
+			       dst_off, len, ret);
 			failed_tests++;
 			continue;
 		}
 
-		/* Unmap by myself (see DMA_COMPL_SKIP_DEST_UNMAP above) */
-		unmap_dst(dev->dev, dma_dsts, params->buf_size, dst_cnt);
+		dmaengine_unmap_put(um);
 
-		error_count = 0;
+		if (params->noverify) {
+			verbose_result("test passed", total_tests, src_off,
+				       dst_off, len, 0);
+			continue;
+		}
 
-		pr_debug("%s: verifying source buffer...\n", thread_name);
-		error_count += verify_result_add(info, result, total_tests,
-				src_off, dst_off, len, thread->srcs, -1,
+		pr_debug("%s: verifying source buffer...\n", current->comm);
+		error_count = dmatest_verify(thread->srcs, 0, src_off,
 				0, PATTERN_SRC, true);
-		error_count += verify_result_add(info, result, total_tests,
-				src_off, dst_off, len, thread->srcs, 0,
-				src_off, PATTERN_SRC | PATTERN_COPY, true);
-		error_count += verify_result_add(info, result, total_tests,
-				src_off, dst_off, len, thread->srcs, 1,
-				src_off + len, PATTERN_SRC, true);
+		error_count += dmatest_verify(thread->srcs, src_off,
+				src_off + len, src_off,
+				PATTERN_SRC | PATTERN_COPY, true);
+		error_count += dmatest_verify(thread->srcs, src_off + len,
+				params->buf_size, src_off + len,
+				PATTERN_SRC, true);
 
-		pr_debug("%s: verifying dest buffer...\n", thread_name);
-		error_count += verify_result_add(info, result, total_tests,
-				src_off, dst_off, len, thread->dsts, -1,
+		pr_debug("%s: verifying dest buffer...\n", current->comm);
+		error_count += dmatest_verify(thread->dsts, 0, dst_off,
 				0, PATTERN_DST, false);
-		error_count += verify_result_add(info, result, total_tests,
-				src_off, dst_off, len, thread->dsts, 0,
-				src_off, PATTERN_SRC | PATTERN_COPY, false);
-		error_count += verify_result_add(info, result, total_tests,
-				src_off, dst_off, len, thread->dsts, 1,
-				dst_off + len, PATTERN_DST, false);
+		error_count += dmatest_verify(thread->dsts, dst_off,
+				dst_off + len, src_off,
+				PATTERN_SRC | PATTERN_COPY, false);
+		error_count += dmatest_verify(thread->dsts, dst_off + len,
+				params->buf_size, dst_off + len,
+				PATTERN_DST, false);
 
 		if (error_count) {
-			thread_result_add(info, result, DMATEST_ET_VERIFY,
-					  total_tests, src_off, dst_off,
-					  len, error_count);
+			result("data error", total_tests, src_off, dst_off,
+			       len, error_count);
 			failed_tests++;
 		} else {
-			thread_result_add(info, result, DMATEST_ET_OK,
-					  total_tests, src_off, dst_off,
-					  len, 0);
+			verbose_result("test passed", total_tests, src_off,
+				       dst_off, len, 0);
 		}
 	}
+	runtime = ktime_us_delta(ktime_get(), ktime);
 
 	ret = 0;
 	for (i = 0; thread->dsts[i]; i++)
@@ -802,20 +700,17 @@
 err_srcs:
 	kfree(pq_coefs);
 err_thread_type:
-	pr_notice("%s: terminating after %u tests, %u failures (status %d)\n",
-			thread_name, total_tests, failed_tests, ret);
+	pr_info("%s: summary %u tests, %u failures %llu iops %llu KB/s (%d)\n",
+		current->comm, total_tests, failed_tests,
+		dmatest_persec(runtime, total_tests),
+		dmatest_KBs(runtime, total_len), ret);
 
 	/* terminate all transfers on specified channels */
 	if (ret)
 		dmaengine_terminate_all(chan);
 
 	thread->done = true;
-
-	if (params->iterations > 0)
-		while (!kthread_should_stop()) {
-			DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wait_dmatest_exit);
-			interruptible_sleep_on(&wait_dmatest_exit);
-		}
+	wake_up(&thread_wait);
 
 	return ret;
 }
@@ -828,9 +723,10 @@
 
 	list_for_each_entry_safe(thread, _thread, &dtc->threads, node) {
 		ret = kthread_stop(thread->task);
-		pr_debug("dmatest: thread %s exited with status %d\n",
-				thread->task->comm, ret);
+		pr_debug("thread %s exited with status %d\n",
+			 thread->task->comm, ret);
 		list_del(&thread->node);
+		put_task_struct(thread->task);
 		kfree(thread);
 	}
 
@@ -861,27 +757,27 @@
 	for (i = 0; i < params->threads_per_chan; i++) {
 		thread = kzalloc(sizeof(struct dmatest_thread), GFP_KERNEL);
 		if (!thread) {
-			pr_warning("dmatest: No memory for %s-%s%u\n",
-				   dma_chan_name(chan), op, i);
-
+			pr_warn("No memory for %s-%s%u\n",
+				dma_chan_name(chan), op, i);
 			break;
 		}
 		thread->info = info;
 		thread->chan = dtc->chan;
 		thread->type = type;
 		smp_wmb();
-		thread->task = kthread_run(dmatest_func, thread, "%s-%s%u",
+		thread->task = kthread_create(dmatest_func, thread, "%s-%s%u",
 				dma_chan_name(chan), op, i);
 		if (IS_ERR(thread->task)) {
-			pr_warning("dmatest: Failed to run thread %s-%s%u\n",
-					dma_chan_name(chan), op, i);
+			pr_warn("Failed to create thread %s-%s%u\n",
+				dma_chan_name(chan), op, i);
 			kfree(thread);
 			break;
 		}
 
 		/* srcbuf and dstbuf are allocated by the thread itself */
-
+		get_task_struct(thread->task);
 		list_add_tail(&thread->node, &dtc->threads);
+		wake_up_process(thread->task);
 	}
 
 	return i;
@@ -897,7 +793,7 @@
 
 	dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
 	if (!dtc) {
-		pr_warning("dmatest: No memory for %s\n", dma_chan_name(chan));
+		pr_warn("No memory for %s\n", dma_chan_name(chan));
 		return -ENOMEM;
 	}
 
@@ -917,7 +813,7 @@
 		thread_count += cnt > 0 ? cnt : 0;
 	}
 
-	pr_info("dmatest: Started %u threads using %s\n",
+	pr_info("Started %u threads using %s\n",
 		thread_count, dma_chan_name(chan));
 
 	list_add_tail(&dtc->node, &info->channels);
@@ -937,20 +833,20 @@
 		return true;
 }
 
-static int __run_threaded_test(struct dmatest_info *info)
+static void request_channels(struct dmatest_info *info,
+			     enum dma_transaction_type type)
 {
 	dma_cap_mask_t mask;
-	struct dma_chan *chan;
-	struct dmatest_params *params = &info->params;
-	int err = 0;
 
 	dma_cap_zero(mask);
-	dma_cap_set(DMA_MEMCPY, mask);
+	dma_cap_set(type, mask);
 	for (;;) {
+		struct dmatest_params *params = &info->params;
+		struct dma_chan *chan;
+
 		chan = dma_request_channel(mask, filter, params);
 		if (chan) {
-			err = dmatest_add_channel(info, chan);
-			if (err) {
+			if (dmatest_add_channel(info, chan)) {
 				dma_release_channel(chan);
 				break; /* add_channel failed, punt */
 			}
@@ -960,57 +856,12 @@
 		    info->nr_channels >= params->max_channels)
 			break; /* we have all we need */
 	}
-	return err;
 }
 
-#ifndef MODULE
-static int run_threaded_test(struct dmatest_info *info)
-{
-	int ret;
-
-	mutex_lock(&info->lock);
-	ret = __run_threaded_test(info);
-	mutex_unlock(&info->lock);
-	return ret;
-}
-#endif
-
-static void __stop_threaded_test(struct dmatest_info *info)
-{
-	struct dmatest_chan *dtc, *_dtc;
-	struct dma_chan *chan;
-
-	list_for_each_entry_safe(dtc, _dtc, &info->channels, node) {
-		list_del(&dtc->node);
-		chan = dtc->chan;
-		dmatest_cleanup_channel(dtc);
-		pr_debug("dmatest: dropped channel %s\n", dma_chan_name(chan));
-		dma_release_channel(chan);
-	}
-
-	info->nr_channels = 0;
-}
-
-static void stop_threaded_test(struct dmatest_info *info)
-{
-	mutex_lock(&info->lock);
-	__stop_threaded_test(info);
-	mutex_unlock(&info->lock);
-}
-
-static int __restart_threaded_test(struct dmatest_info *info, bool run)
+static void run_threaded_test(struct dmatest_info *info)
 {
 	struct dmatest_params *params = &info->params;
 
-	/* Stop any running test first */
-	__stop_threaded_test(info);
-
-	if (run == false)
-		return 0;
-
-	/* Clear results from previous run */
-	result_free(info, NULL);
-
 	/* Copy test parameters */
 	params->buf_size = test_buf_size;
 	strlcpy(params->channel, strim(test_channel), sizeof(params->channel));
@@ -1021,166 +872,102 @@
 	params->xor_sources = xor_sources;
 	params->pq_sources = pq_sources;
 	params->timeout = timeout;
+	params->noverify = noverify;
+
+	request_channels(info, DMA_MEMCPY);
+	request_channels(info, DMA_XOR);
+	request_channels(info, DMA_PQ);
+}
+
+static void stop_threaded_test(struct dmatest_info *info)
+{
+	struct dmatest_chan *dtc, *_dtc;
+	struct dma_chan *chan;
+
+	list_for_each_entry_safe(dtc, _dtc, &info->channels, node) {
+		list_del(&dtc->node);
+		chan = dtc->chan;
+		dmatest_cleanup_channel(dtc);
+		pr_debug("dropped channel %s\n", dma_chan_name(chan));
+		dma_release_channel(chan);
+	}
+
+	info->nr_channels = 0;
+}
+
+static void restart_threaded_test(struct dmatest_info *info, bool run)
+{
+	/* we might be called early to set run=, defer running until all
+	 * parameters have been evaluated
+	 */
+	if (!info->did_init)
+		return;
+
+	/* Stop any running test first */
+	stop_threaded_test(info);
 
 	/* Run test with new parameters */
-	return __run_threaded_test(info);
+	run_threaded_test(info);
 }
 
-static bool __is_threaded_test_run(struct dmatest_info *info)
+static int dmatest_run_get(char *val, const struct kernel_param *kp)
 {
-	struct dmatest_chan *dtc;
-
-	list_for_each_entry(dtc, &info->channels, node) {
-		struct dmatest_thread *thread;
-
-		list_for_each_entry(thread, &dtc->threads, node) {
-			if (!thread->done)
-				return true;
-		}
-	}
-
-	return false;
-}
-
-static ssize_t dtf_read_run(struct file *file, char __user *user_buf,
-		size_t count, loff_t *ppos)
-{
-	struct dmatest_info *info = file->private_data;
-	char buf[3];
+	struct dmatest_info *info = &test_info;
 
 	mutex_lock(&info->lock);
-
-	if (__is_threaded_test_run(info)) {
-		buf[0] = 'Y';
+	if (is_threaded_test_run(info)) {
+		dmatest_run = true;
 	} else {
-		__stop_threaded_test(info);
-		buf[0] = 'N';
+		stop_threaded_test(info);
+		dmatest_run = false;
 	}
+	mutex_unlock(&info->lock);
+
+	return param_get_bool(val, kp);
+}
+
+static int dmatest_run_set(const char *val, const struct kernel_param *kp)
+{
+	struct dmatest_info *info = &test_info;
+	int ret;
+
+	mutex_lock(&info->lock);
+	ret = param_set_bool(val, kp);
+	if (ret) {
+		mutex_unlock(&info->lock);
+		return ret;
+	}
+
+	if (is_threaded_test_run(info))
+		ret = -EBUSY;
+	else if (dmatest_run)
+		restart_threaded_test(info, dmatest_run);
 
 	mutex_unlock(&info->lock);
-	buf[1] = '\n';
-	buf[2] = 0x00;
-	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
-}
 
-static ssize_t dtf_write_run(struct file *file, const char __user *user_buf,
-		size_t count, loff_t *ppos)
-{
-	struct dmatest_info *info = file->private_data;
-	char buf[16];
-	bool bv;
-	int ret = 0;
-
-	if (copy_from_user(buf, user_buf, min(count, (sizeof(buf) - 1))))
-		return -EFAULT;
-
-	if (strtobool(buf, &bv) == 0) {
-		mutex_lock(&info->lock);
-
-		if (__is_threaded_test_run(info))
-			ret = -EBUSY;
-		else
-			ret = __restart_threaded_test(info, bv);
-
-		mutex_unlock(&info->lock);
-	}
-
-	return ret ? ret : count;
-}
-
-static const struct file_operations dtf_run_fops = {
-	.read	= dtf_read_run,
-	.write	= dtf_write_run,
-	.open	= simple_open,
-	.llseek	= default_llseek,
-};
-
-static int dtf_results_show(struct seq_file *sf, void *data)
-{
-	struct dmatest_info *info = sf->private;
-	struct dmatest_result *result;
-	struct dmatest_thread_result *tr;
-	unsigned int i;
-
-	mutex_lock(&info->results_lock);
-	list_for_each_entry(result, &info->results, node) {
-		list_for_each_entry(tr, &result->results, node) {
-			seq_printf(sf, "%s\n",
-				thread_result_get(result->name, tr));
-			if (tr->type == DMATEST_ET_VERIFY_BUF) {
-				for (i = 0; i < tr->vr->error_count; i++) {
-					seq_printf(sf, "\t%s\n",
-						verify_result_get_one(tr->vr, i));
-				}
-			}
-		}
-	}
-
-	mutex_unlock(&info->results_lock);
-	return 0;
-}
-
-static int dtf_results_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, dtf_results_show, inode->i_private);
-}
-
-static const struct file_operations dtf_results_fops = {
-	.open		= dtf_results_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static int dmatest_register_dbgfs(struct dmatest_info *info)
-{
-	struct dentry *d;
-
-	d = debugfs_create_dir("dmatest", NULL);
-	if (IS_ERR(d))
-		return PTR_ERR(d);
-	if (!d)
-		goto err_root;
-
-	info->root = d;
-
-	/* Run or stop threaded test */
-	debugfs_create_file("run", S_IWUSR | S_IRUGO, info->root, info,
-			    &dtf_run_fops);
-
-	/* Results of test in progress */
-	debugfs_create_file("results", S_IRUGO, info->root, info,
-			    &dtf_results_fops);
-
-	return 0;
-
-err_root:
-	pr_err("dmatest: Failed to initialize debugfs\n");
-	return -ENOMEM;
+	return ret;
 }
 
 static int __init dmatest_init(void)
 {
 	struct dmatest_info *info = &test_info;
-	int ret;
+	struct dmatest_params *params = &info->params;
 
-	memset(info, 0, sizeof(*info));
+	if (dmatest_run) {
+		mutex_lock(&info->lock);
+		run_threaded_test(info);
+		mutex_unlock(&info->lock);
+	}
 
-	mutex_init(&info->lock);
-	INIT_LIST_HEAD(&info->channels);
+	if (params->iterations && wait)
+		wait_event(thread_wait, !is_threaded_test_run(info));
 
-	mutex_init(&info->results_lock);
-	INIT_LIST_HEAD(&info->results);
+	/* module parameters are stable, inittime tests are started,
+	 * let userspace take over 'run' control
+	 */
+	info->did_init = true;
 
-	ret = dmatest_register_dbgfs(info);
-	if (ret)
-		return ret;
-
-#ifdef MODULE
 	return 0;
-#else
-	return run_threaded_test(info);
-#endif
 }
 /* when compiled-in wait for drivers to load first */
 late_initcall(dmatest_init);
@@ -1189,9 +976,9 @@
 {
 	struct dmatest_info *info = &test_info;
 
-	debugfs_remove_recursive(info->root);
+	mutex_lock(&info->lock);
 	stop_threaded_test(info);
-	result_free(info, NULL);
+	mutex_unlock(&info->lock);
 }
 module_exit(dmatest_exit);
 
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index 89eb89f..7516be4 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -85,10 +85,6 @@
 {
 	return &chan->dev->device;
 }
-static struct device *chan2parent(struct dma_chan *chan)
-{
-	return chan->dev->device.parent;
-}
 
 static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc)
 {
@@ -311,26 +307,7 @@
 	list_splice_init(&desc->tx_list, &dwc->free_list);
 	list_move(&desc->desc_node, &dwc->free_list);
 
-	if (!is_slave_direction(dwc->direction)) {
-		struct device *parent = chan2parent(&dwc->chan);
-		if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-			if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-				dma_unmap_single(parent, desc->lli.dar,
-					desc->total_len, DMA_FROM_DEVICE);
-			else
-				dma_unmap_page(parent, desc->lli.dar,
-					desc->total_len, DMA_FROM_DEVICE);
-		}
-		if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-			if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-				dma_unmap_single(parent, desc->lli.sar,
-					desc->total_len, DMA_TO_DEVICE);
-			else
-				dma_unmap_page(parent, desc->lli.sar,
-					desc->total_len, DMA_TO_DEVICE);
-		}
-	}
-
+	dma_descriptor_unmap(txd);
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
 	if (callback)
@@ -1098,13 +1075,13 @@
 	enum dma_status		ret;
 
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret == DMA_SUCCESS)
+	if (ret == DMA_COMPLETE)
 		return ret;
 
 	dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
 
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret != DMA_SUCCESS)
+	if (ret != DMA_COMPLETE)
 		dma_set_residue(txstate, dwc_get_residue(dwc));
 
 	if (dwc->paused && ret == DMA_IN_PROGRESS)
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index bef8a36..2539ea0 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -46,14 +46,21 @@
 #define EDMA_CHANS	64
 #endif /* CONFIG_ARCH_DAVINCI_DA8XX */
 
-/* Max of 16 segments per channel to conserve PaRAM slots */
-#define MAX_NR_SG		16
+/*
+ * Max of 20 segments per channel to conserve PaRAM slots
+ * Also note that MAX_NR_SG should be atleast the no.of periods
+ * that are required for ASoC, otherwise DMA prep calls will
+ * fail. Today davinci-pcm is the only user of this driver and
+ * requires atleast 17 slots, so we setup the default to 20.
+ */
+#define MAX_NR_SG		20
 #define EDMA_MAX_SLOTS		MAX_NR_SG
 #define EDMA_DESCRIPTORS	16
 
 struct edma_desc {
 	struct virt_dma_desc		vdesc;
 	struct list_head		node;
+	int				cyclic;
 	int				absync;
 	int				pset_nr;
 	int				processed;
@@ -167,8 +174,13 @@
 	 * then setup a link to the dummy slot, this results in all future
 	 * events being absorbed and that's OK because we're done
 	 */
-	if (edesc->processed == edesc->pset_nr)
-		edma_link(echan->slot[nslots-1], echan->ecc->dummy_slot);
+	if (edesc->processed == edesc->pset_nr) {
+		if (edesc->cyclic)
+			edma_link(echan->slot[nslots-1], echan->slot[1]);
+		else
+			edma_link(echan->slot[nslots-1],
+				  echan->ecc->dummy_slot);
+	}
 
 	edma_resume(echan->ch_num);
 
@@ -250,6 +262,117 @@
 	return ret;
 }
 
+/*
+ * A PaRAM set configuration abstraction used by other modes
+ * @chan: Channel who's PaRAM set we're configuring
+ * @pset: PaRAM set to initialize and setup.
+ * @src_addr: Source address of the DMA
+ * @dst_addr: Destination address of the DMA
+ * @burst: In units of dev_width, how much to send
+ * @dev_width: How much is the dev_width
+ * @dma_length: Total length of the DMA transfer
+ * @direction: Direction of the transfer
+ */
+static int edma_config_pset(struct dma_chan *chan, struct edmacc_param *pset,
+	dma_addr_t src_addr, dma_addr_t dst_addr, u32 burst,
+	enum dma_slave_buswidth dev_width, unsigned int dma_length,
+	enum dma_transfer_direction direction)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+	struct device *dev = chan->device->dev;
+	int acnt, bcnt, ccnt, cidx;
+	int src_bidx, dst_bidx, src_cidx, dst_cidx;
+	int absync;
+
+	acnt = dev_width;
+	/*
+	 * If the maxburst is equal to the fifo width, use
+	 * A-synced transfers. This allows for large contiguous
+	 * buffer transfers using only one PaRAM set.
+	 */
+	if (burst == 1) {
+		/*
+		 * For the A-sync case, bcnt and ccnt are the remainder
+		 * and quotient respectively of the division of:
+		 * (dma_length / acnt) by (SZ_64K -1). This is so
+		 * that in case bcnt over flows, we have ccnt to use.
+		 * Note: In A-sync tranfer only, bcntrld is used, but it
+		 * only applies for sg_dma_len(sg) >= SZ_64K.
+		 * In this case, the best way adopted is- bccnt for the
+		 * first frame will be the remainder below. Then for
+		 * every successive frame, bcnt will be SZ_64K-1. This
+		 * is assured as bcntrld = 0xffff in end of function.
+		 */
+		absync = false;
+		ccnt = dma_length / acnt / (SZ_64K - 1);
+		bcnt = dma_length / acnt - ccnt * (SZ_64K - 1);
+		/*
+		 * If bcnt is non-zero, we have a remainder and hence an
+		 * extra frame to transfer, so increment ccnt.
+		 */
+		if (bcnt)
+			ccnt++;
+		else
+			bcnt = SZ_64K - 1;
+		cidx = acnt;
+	} else {
+		/*
+		 * If maxburst is greater than the fifo address_width,
+		 * use AB-synced transfers where A count is the fifo
+		 * address_width and B count is the maxburst. In this
+		 * case, we are limited to transfers of C count frames
+		 * of (address_width * maxburst) where C count is limited
+		 * to SZ_64K-1. This places an upper bound on the length
+		 * of an SG segment that can be handled.
+		 */
+		absync = true;
+		bcnt = burst;
+		ccnt = dma_length / (acnt * bcnt);
+		if (ccnt > (SZ_64K - 1)) {
+			dev_err(dev, "Exceeded max SG segment size\n");
+			return -EINVAL;
+		}
+		cidx = acnt * bcnt;
+	}
+
+	if (direction == DMA_MEM_TO_DEV) {
+		src_bidx = acnt;
+		src_cidx = cidx;
+		dst_bidx = 0;
+		dst_cidx = 0;
+	} else if (direction == DMA_DEV_TO_MEM)  {
+		src_bidx = 0;
+		src_cidx = 0;
+		dst_bidx = acnt;
+		dst_cidx = cidx;
+	} else {
+		dev_err(dev, "%s: direction not implemented yet\n", __func__);
+		return -EINVAL;
+	}
+
+	pset->opt = EDMA_TCC(EDMA_CHAN_SLOT(echan->ch_num));
+	/* Configure A or AB synchronized transfers */
+	if (absync)
+		pset->opt |= SYNCDIM;
+
+	pset->src = src_addr;
+	pset->dst = dst_addr;
+
+	pset->src_dst_bidx = (dst_bidx << 16) | src_bidx;
+	pset->src_dst_cidx = (dst_cidx << 16) | src_cidx;
+
+	pset->a_b_cnt = bcnt << 16 | acnt;
+	pset->ccnt = ccnt;
+	/*
+	 * Only time when (bcntrld) auto reload is required is for
+	 * A-sync case, and in this case, a requirement of reload value
+	 * of SZ_64K-1 only is assured. 'link' is initially set to NULL
+	 * and then later will be populated by edma_execute.
+	 */
+	pset->link_bcntrld = 0xffffffff;
+	return absync;
+}
+
 static struct dma_async_tx_descriptor *edma_prep_slave_sg(
 	struct dma_chan *chan, struct scatterlist *sgl,
 	unsigned int sg_len, enum dma_transfer_direction direction,
@@ -258,23 +381,21 @@
 	struct edma_chan *echan = to_edma_chan(chan);
 	struct device *dev = chan->device->dev;
 	struct edma_desc *edesc;
-	dma_addr_t dev_addr;
+	dma_addr_t src_addr = 0, dst_addr = 0;
 	enum dma_slave_buswidth dev_width;
 	u32 burst;
 	struct scatterlist *sg;
-	int acnt, bcnt, ccnt, src, dst, cidx;
-	int src_bidx, dst_bidx, src_cidx, dst_cidx;
-	int i, nslots;
+	int i, nslots, ret;
 
 	if (unlikely(!echan || !sgl || !sg_len))
 		return NULL;
 
 	if (direction == DMA_DEV_TO_MEM) {
-		dev_addr = echan->cfg.src_addr;
+		src_addr = echan->cfg.src_addr;
 		dev_width = echan->cfg.src_addr_width;
 		burst = echan->cfg.src_maxburst;
 	} else if (direction == DMA_MEM_TO_DEV) {
-		dev_addr = echan->cfg.dst_addr;
+		dst_addr = echan->cfg.dst_addr;
 		dev_width = echan->cfg.dst_addr_width;
 		burst = echan->cfg.dst_maxburst;
 	} else {
@@ -307,7 +428,6 @@
 			if (echan->slot[i] < 0) {
 				kfree(edesc);
 				dev_err(dev, "Failed to allocate slot\n");
-				kfree(edesc);
 				return NULL;
 			}
 		}
@@ -315,64 +435,21 @@
 
 	/* Configure PaRAM sets for each SG */
 	for_each_sg(sgl, sg, sg_len, i) {
+		/* Get address for each SG */
+		if (direction == DMA_DEV_TO_MEM)
+			dst_addr = sg_dma_address(sg);
+		else
+			src_addr = sg_dma_address(sg);
 
-		acnt = dev_width;
-
-		/*
-		 * If the maxburst is equal to the fifo width, use
-		 * A-synced transfers. This allows for large contiguous
-		 * buffer transfers using only one PaRAM set.
-		 */
-		if (burst == 1) {
-			edesc->absync = false;
-			ccnt = sg_dma_len(sg) / acnt / (SZ_64K - 1);
-			bcnt = sg_dma_len(sg) / acnt - ccnt * (SZ_64K - 1);
-			if (bcnt)
-				ccnt++;
-			else
-				bcnt = SZ_64K - 1;
-			cidx = acnt;
-		/*
-		 * If maxburst is greater than the fifo address_width,
-		 * use AB-synced transfers where A count is the fifo
-		 * address_width and B count is the maxburst. In this
-		 * case, we are limited to transfers of C count frames
-		 * of (address_width * maxburst) where C count is limited
-		 * to SZ_64K-1. This places an upper bound on the length
-		 * of an SG segment that can be handled.
-		 */
-		} else {
-			edesc->absync = true;
-			bcnt = burst;
-			ccnt = sg_dma_len(sg) / (acnt * bcnt);
-			if (ccnt > (SZ_64K - 1)) {
-				dev_err(dev, "Exceeded max SG segment size\n");
-				kfree(edesc);
-				return NULL;
-			}
-			cidx = acnt * bcnt;
+		ret = edma_config_pset(chan, &edesc->pset[i], src_addr,
+				       dst_addr, burst, dev_width,
+				       sg_dma_len(sg), direction);
+		if (ret < 0) {
+			kfree(edesc);
+			return NULL;
 		}
 
-		if (direction == DMA_MEM_TO_DEV) {
-			src = sg_dma_address(sg);
-			dst = dev_addr;
-			src_bidx = acnt;
-			src_cidx = cidx;
-			dst_bidx = 0;
-			dst_cidx = 0;
-		} else {
-			src = dev_addr;
-			dst = sg_dma_address(sg);
-			src_bidx = 0;
-			src_cidx = 0;
-			dst_bidx = acnt;
-			dst_cidx = cidx;
-		}
-
-		edesc->pset[i].opt = EDMA_TCC(EDMA_CHAN_SLOT(echan->ch_num));
-		/* Configure A or AB synchronized transfers */
-		if (edesc->absync)
-			edesc->pset[i].opt |= SYNCDIM;
+		edesc->absync = ret;
 
 		/* If this is the last in a current SG set of transactions,
 		   enable interrupts so that next set is processed */
@@ -382,17 +459,138 @@
 		/* If this is the last set, enable completion interrupt flag */
 		if (i == sg_len - 1)
 			edesc->pset[i].opt |= TCINTEN;
+	}
 
-		edesc->pset[i].src = src;
-		edesc->pset[i].dst = dst;
+	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
+}
 
-		edesc->pset[i].src_dst_bidx = (dst_bidx << 16) | src_bidx;
-		edesc->pset[i].src_dst_cidx = (dst_cidx << 16) | src_cidx;
+static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
+	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+	size_t period_len, enum dma_transfer_direction direction,
+	unsigned long tx_flags, void *context)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+	struct device *dev = chan->device->dev;
+	struct edma_desc *edesc;
+	dma_addr_t src_addr, dst_addr;
+	enum dma_slave_buswidth dev_width;
+	u32 burst;
+	int i, ret, nslots;
 
-		edesc->pset[i].a_b_cnt = bcnt << 16 | acnt;
-		edesc->pset[i].ccnt = ccnt;
-		edesc->pset[i].link_bcntrld = 0xffffffff;
+	if (unlikely(!echan || !buf_len || !period_len))
+		return NULL;
 
+	if (direction == DMA_DEV_TO_MEM) {
+		src_addr = echan->cfg.src_addr;
+		dst_addr = buf_addr;
+		dev_width = echan->cfg.src_addr_width;
+		burst = echan->cfg.src_maxburst;
+	} else if (direction == DMA_MEM_TO_DEV) {
+		src_addr = buf_addr;
+		dst_addr = echan->cfg.dst_addr;
+		dev_width = echan->cfg.dst_addr_width;
+		burst = echan->cfg.dst_maxburst;
+	} else {
+		dev_err(dev, "%s: bad direction?\n", __func__);
+		return NULL;
+	}
+
+	if (dev_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) {
+		dev_err(dev, "Undefined slave buswidth\n");
+		return NULL;
+	}
+
+	if (unlikely(buf_len % period_len)) {
+		dev_err(dev, "Period should be multiple of Buffer length\n");
+		return NULL;
+	}
+
+	nslots = (buf_len / period_len) + 1;
+
+	/*
+	 * Cyclic DMA users such as audio cannot tolerate delays introduced
+	 * by cases where the number of periods is more than the maximum
+	 * number of SGs the EDMA driver can handle at a time. For DMA types
+	 * such as Slave SGs, such delays are tolerable and synchronized,
+	 * but the synchronization is difficult to achieve with Cyclic and
+	 * cannot be guaranteed, so we error out early.
+	 */
+	if (nslots > MAX_NR_SG)
+		return NULL;
+
+	edesc = kzalloc(sizeof(*edesc) + nslots *
+		sizeof(edesc->pset[0]), GFP_ATOMIC);
+	if (!edesc) {
+		dev_dbg(dev, "Failed to allocate a descriptor\n");
+		return NULL;
+	}
+
+	edesc->cyclic = 1;
+	edesc->pset_nr = nslots;
+
+	dev_dbg(dev, "%s: nslots=%d\n", __func__, nslots);
+	dev_dbg(dev, "%s: period_len=%d\n", __func__, period_len);
+	dev_dbg(dev, "%s: buf_len=%d\n", __func__, buf_len);
+
+	for (i = 0; i < nslots; i++) {
+		/* Allocate a PaRAM slot, if needed */
+		if (echan->slot[i] < 0) {
+			echan->slot[i] =
+				edma_alloc_slot(EDMA_CTLR(echan->ch_num),
+						EDMA_SLOT_ANY);
+			if (echan->slot[i] < 0) {
+				dev_err(dev, "Failed to allocate slot\n");
+				return NULL;
+			}
+		}
+
+		if (i == nslots - 1) {
+			memcpy(&edesc->pset[i], &edesc->pset[0],
+			       sizeof(edesc->pset[0]));
+			break;
+		}
+
+		ret = edma_config_pset(chan, &edesc->pset[i], src_addr,
+				       dst_addr, burst, dev_width, period_len,
+				       direction);
+		if (ret < 0)
+			return NULL;
+
+		if (direction == DMA_DEV_TO_MEM)
+			dst_addr += period_len;
+		else
+			src_addr += period_len;
+
+		dev_dbg(dev, "%s: Configure period %d of buf:\n", __func__, i);
+		dev_dbg(dev,
+			"\n pset[%d]:\n"
+			"  chnum\t%d\n"
+			"  slot\t%d\n"
+			"  opt\t%08x\n"
+			"  src\t%08x\n"
+			"  dst\t%08x\n"
+			"  abcnt\t%08x\n"
+			"  ccnt\t%08x\n"
+			"  bidx\t%08x\n"
+			"  cidx\t%08x\n"
+			"  lkrld\t%08x\n",
+			i, echan->ch_num, echan->slot[i],
+			edesc->pset[i].opt,
+			edesc->pset[i].src,
+			edesc->pset[i].dst,
+			edesc->pset[i].a_b_cnt,
+			edesc->pset[i].ccnt,
+			edesc->pset[i].src_dst_bidx,
+			edesc->pset[i].src_dst_cidx,
+			edesc->pset[i].link_bcntrld);
+
+		edesc->absync = ret;
+
+		/*
+		 * Enable interrupts for every period because callback
+		 * has to be called for every period.
+		 */
+		edesc->pset[i].opt |= TCINTEN;
 	}
 
 	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
@@ -406,30 +604,34 @@
 	unsigned long flags;
 	struct edmacc_param p;
 
-	/* Pause the channel */
-	edma_pause(echan->ch_num);
+	edesc = echan->edesc;
+
+	/* Pause the channel for non-cyclic */
+	if (!edesc || (edesc && !edesc->cyclic))
+		edma_pause(echan->ch_num);
 
 	switch (ch_status) {
-	case DMA_COMPLETE:
+	case EDMA_DMA_COMPLETE:
 		spin_lock_irqsave(&echan->vchan.lock, flags);
 
-		edesc = echan->edesc;
 		if (edesc) {
-			if (edesc->processed == edesc->pset_nr) {
+			if (edesc->cyclic) {
+				vchan_cyclic_callback(&edesc->vdesc);
+			} else if (edesc->processed == edesc->pset_nr) {
 				dev_dbg(dev, "Transfer complete, stopping channel %d\n", ch_num);
 				edma_stop(echan->ch_num);
 				vchan_cookie_complete(&edesc->vdesc);
+				edma_execute(echan);
 			} else {
 				dev_dbg(dev, "Intermediate transfer complete on channel %d\n", ch_num);
+				edma_execute(echan);
 			}
-
-			edma_execute(echan);
 		}
 
 		spin_unlock_irqrestore(&echan->vchan.lock, flags);
 
 		break;
-	case DMA_CC_ERROR:
+	case EDMA_DMA_CC_ERROR:
 		spin_lock_irqsave(&echan->vchan.lock, flags);
 
 		edma_read_slot(EDMA_CHAN_SLOT(echan->slot[0]), &p);
@@ -579,7 +781,7 @@
 	unsigned long flags;
 
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret == DMA_SUCCESS || !txstate)
+	if (ret == DMA_COMPLETE || !txstate)
 		return ret;
 
 	spin_lock_irqsave(&echan->vchan.lock, flags);
@@ -619,6 +821,7 @@
 			  struct device *dev)
 {
 	dma->device_prep_slave_sg = edma_prep_slave_sg;
+	dma->device_prep_dma_cyclic = edma_prep_dma_cyclic;
 	dma->device_alloc_chan_resources = edma_alloc_chan_resources;
 	dma->device_free_chan_resources = edma_free_chan_resources;
 	dma->device_issue_pending = edma_issue_pending;
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index 591cd8c..cb4bf68 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -733,28 +733,6 @@
 	spin_unlock_irqrestore(&edmac->lock, flags);
 }
 
-static void ep93xx_dma_unmap_buffers(struct ep93xx_dma_desc *desc)
-{
-	struct device *dev = desc->txd.chan->device->dev;
-
-	if (!(desc->txd.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-		if (desc->txd.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-			dma_unmap_single(dev, desc->src_addr, desc->size,
-					 DMA_TO_DEVICE);
-		else
-			dma_unmap_page(dev, desc->src_addr, desc->size,
-				       DMA_TO_DEVICE);
-	}
-	if (!(desc->txd.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-		if (desc->txd.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-			dma_unmap_single(dev, desc->dst_addr, desc->size,
-					 DMA_FROM_DEVICE);
-		else
-			dma_unmap_page(dev, desc->dst_addr, desc->size,
-				       DMA_FROM_DEVICE);
-	}
-}
-
 static void ep93xx_dma_tasklet(unsigned long data)
 {
 	struct ep93xx_dma_chan *edmac = (struct ep93xx_dma_chan *)data;
@@ -787,13 +765,7 @@
 
 	/* Now we can release all the chained descriptors */
 	list_for_each_entry_safe(desc, d, &list, node) {
-		/*
-		 * For the memcpy channels the API requires us to unmap the
-		 * buffers unless requested otherwise.
-		 */
-		if (!edmac->chan.private)
-			ep93xx_dma_unmap_buffers(desc);
-
+		dma_descriptor_unmap(&desc->txd);
 		ep93xx_dma_desc_put(edmac, desc);
 	}
 
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 61517dd..7086a16 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -870,22 +870,7 @@
 	/* Run any dependencies */
 	dma_run_dependencies(txd);
 
-	/* Unmap the dst buffer, if requested */
-	if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-		if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-			dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE);
-		else
-			dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE);
-	}
-
-	/* Unmap the src buffer, if requested */
-	if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-		if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-			dma_unmap_single(dev, src, len, DMA_TO_DEVICE);
-		else
-			dma_unmap_page(dev, src, len, DMA_TO_DEVICE);
-	}
-
+	dma_descriptor_unmap(txd);
 #ifdef FSL_DMA_LD_DEBUG
 	chan_dbg(chan, "LD %p free\n", desc);
 #endif
@@ -1255,7 +1240,9 @@
 	WARN_ON(fdev->feature != chan->feature);
 
 	chan->dev = fdev->dev;
-	chan->id = ((res.start - 0x100) & 0xfff) >> 7;
+	chan->id = (res.start & 0xfff) < 0x300 ?
+		   ((res.start - 0x100) & 0xfff) >> 7 :
+		   ((res.start - 0x200) & 0xfff) >> 7;
 	if (chan->id >= FSL_DMA_MAX_CHANS_PER_DEVICE) {
 		dev_err(fdev->dev, "too many channels for device\n");
 		err = -EINVAL;
@@ -1428,6 +1415,7 @@
 }
 
 static const struct of_device_id fsldma_of_ids[] = {
+	{ .compatible = "fsl,elo3-dma", },
 	{ .compatible = "fsl,eloplus-dma", },
 	{ .compatible = "fsl,elo-dma", },
 	{}
@@ -1449,7 +1437,7 @@
 
 static __init int fsldma_init(void)
 {
-	pr_info("Freescale Elo / Elo Plus DMA driver\n");
+	pr_info("Freescale Elo series DMA driver\n");
 	return platform_driver_register(&fsldma_of_driver);
 }
 
@@ -1461,5 +1449,5 @@
 subsys_initcall(fsldma_init);
 module_exit(fsldma_exit);
 
-MODULE_DESCRIPTION("Freescale Elo / Elo Plus DMA driver");
+MODULE_DESCRIPTION("Freescale Elo series DMA driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
index f5c3879..1ffc244 100644
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -112,7 +112,7 @@
 };
 
 struct fsldma_chan;
-#define FSL_DMA_MAX_CHANS_PER_DEVICE 4
+#define FSL_DMA_MAX_CHANS_PER_DEVICE 8
 
 struct fsldma_device {
 	void __iomem *regs;	/* DGSR register base */
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
index 55852c0..6f9ac20 100644
--- a/drivers/dma/imx-dma.c
+++ b/drivers/dma/imx-dma.c
@@ -572,9 +572,11 @@
 
 		imx_dmav1_writel(imxdma, d->len, DMA_CNTR(imxdmac->channel));
 
-		dev_dbg(imxdma->dev, "%s channel: %d dest=0x%08x src=0x%08x "
-			"dma_length=%d\n", __func__, imxdmac->channel,
-			d->dest, d->src, d->len);
+		dev_dbg(imxdma->dev,
+			"%s channel: %d dest=0x%08llx src=0x%08llx dma_length=%zu\n",
+			__func__, imxdmac->channel,
+			(unsigned long long)d->dest,
+			(unsigned long long)d->src, d->len);
 
 		break;
 	/* Cyclic transfer is the same as slave_sg with special sg configuration. */
@@ -586,20 +588,22 @@
 			imx_dmav1_writel(imxdma, imxdmac->ccr_from_device,
 					 DMA_CCR(imxdmac->channel));
 
-			dev_dbg(imxdma->dev, "%s channel: %d sg=%p sgcount=%d "
-				"total length=%d dev_addr=0x%08x (dev2mem)\n",
-				__func__, imxdmac->channel, d->sg, d->sgcount,
-				d->len, imxdmac->per_address);
+			dev_dbg(imxdma->dev,
+				"%s channel: %d sg=%p sgcount=%d total length=%zu dev_addr=0x%08llx (dev2mem)\n",
+				__func__, imxdmac->channel,
+				d->sg, d->sgcount, d->len,
+				(unsigned long long)imxdmac->per_address);
 		} else if (d->direction == DMA_MEM_TO_DEV) {
 			imx_dmav1_writel(imxdma, imxdmac->per_address,
 					 DMA_DAR(imxdmac->channel));
 			imx_dmav1_writel(imxdma, imxdmac->ccr_to_device,
 					 DMA_CCR(imxdmac->channel));
 
-			dev_dbg(imxdma->dev, "%s channel: %d sg=%p sgcount=%d "
-				"total length=%d dev_addr=0x%08x (mem2dev)\n",
-				__func__, imxdmac->channel, d->sg, d->sgcount,
-				d->len, imxdmac->per_address);
+			dev_dbg(imxdma->dev,
+				"%s channel: %d sg=%p sgcount=%d total length=%zu dev_addr=0x%08llx (mem2dev)\n",
+				__func__, imxdmac->channel,
+				d->sg, d->sgcount, d->len,
+				(unsigned long long)imxdmac->per_address);
 		} else {
 			dev_err(imxdma->dev, "%s channel: %d bad dma mode\n",
 				__func__, imxdmac->channel);
@@ -771,7 +775,7 @@
 		desc->desc.tx_submit = imxdma_tx_submit;
 		/* txd.flags will be overwritten in prep funcs */
 		desc->desc.flags = DMA_CTRL_ACK;
-		desc->status = DMA_SUCCESS;
+		desc->status = DMA_COMPLETE;
 
 		list_add_tail(&desc->node, &imxdmac->ld_free);
 		imxdmac->descs_allocated++;
@@ -870,7 +874,7 @@
 	int i;
 	unsigned int periods = buf_len / period_len;
 
-	dev_dbg(imxdma->dev, "%s channel: %d buf_len=%d period_len=%d\n",
+	dev_dbg(imxdma->dev, "%s channel: %d buf_len=%zu period_len=%zu\n",
 			__func__, imxdmac->channel, buf_len, period_len);
 
 	if (list_empty(&imxdmac->ld_free) ||
@@ -926,8 +930,9 @@
 	struct imxdma_engine *imxdma = imxdmac->imxdma;
 	struct imxdma_desc *desc;
 
-	dev_dbg(imxdma->dev, "%s channel: %d src=0x%x dst=0x%x len=%d\n",
-			__func__, imxdmac->channel, src, dest, len);
+	dev_dbg(imxdma->dev, "%s channel: %d src=0x%llx dst=0x%llx len=%zu\n",
+		__func__, imxdmac->channel, (unsigned long long)src,
+		(unsigned long long)dest, len);
 
 	if (list_empty(&imxdmac->ld_free) ||
 	    imxdma_chan_is_doing_cyclic(imxdmac))
@@ -956,9 +961,10 @@
 	struct imxdma_engine *imxdma = imxdmac->imxdma;
 	struct imxdma_desc *desc;
 
-	dev_dbg(imxdma->dev, "%s channel: %d src_start=0x%x dst_start=0x%x\n"
-		"   src_sgl=%s dst_sgl=%s numf=%d frame_size=%d\n", __func__,
-		imxdmac->channel, xt->src_start, xt->dst_start,
+	dev_dbg(imxdma->dev, "%s channel: %d src_start=0x%llx dst_start=0x%llx\n"
+		"   src_sgl=%s dst_sgl=%s numf=%zu frame_size=%zu\n", __func__,
+		imxdmac->channel, (unsigned long long)xt->src_start,
+		(unsigned long long) xt->dst_start,
 		xt->src_sgl ? "true" : "false", xt->dst_sgl ? "true" : "false",
 		xt->numf, xt->frame_size);
 
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index c1fd504..c75679d 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -638,7 +638,7 @@
 	if (error)
 		sdmac->status = DMA_ERROR;
 	else
-		sdmac->status = DMA_SUCCESS;
+		sdmac->status = DMA_COMPLETE;
 
 	dma_cookie_complete(&sdmac->desc);
 	if (sdmac->desc.callback)
@@ -1089,8 +1089,8 @@
 			param &= ~BD_CONT;
 		}
 
-		dev_dbg(sdma->dev, "entry %d: count: %d dma: 0x%08x %s%s\n",
-				i, count, sg->dma_address,
+		dev_dbg(sdma->dev, "entry %d: count: %d dma: %#llx %s%s\n",
+				i, count, (u64)sg->dma_address,
 				param & BD_WRAP ? "wrap" : "",
 				param & BD_INTR ? " intr" : "");
 
@@ -1163,8 +1163,8 @@
 		if (i + 1 == num_periods)
 			param |= BD_WRAP;
 
-		dev_dbg(sdma->dev, "entry %d: count: %d dma: 0x%08x %s%s\n",
-				i, period_len, dma_addr,
+		dev_dbg(sdma->dev, "entry %d: count: %d dma: %#llx %s%s\n",
+				i, period_len, (u64)dma_addr,
 				param & BD_WRAP ? "wrap" : "",
 				param & BD_INTR ? " intr" : "");
 
diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c
index a975ebe..1aab813 100644
--- a/drivers/dma/intel_mid_dma.c
+++ b/drivers/dma/intel_mid_dma.c
@@ -309,7 +309,7 @@
 		callback_txd(param_txd);
 	}
 	if (midc->raw_tfr) {
-		desc->status = DMA_SUCCESS;
+		desc->status = DMA_COMPLETE;
 		if (desc->lli != NULL) {
 			pci_pool_free(desc->lli_pool, desc->lli,
 						desc->lli_phys);
@@ -481,7 +481,7 @@
 	enum dma_status ret;
 
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret != DMA_SUCCESS) {
+	if (ret != DMA_COMPLETE) {
 		spin_lock_bh(&midc->lock);
 		midc_scan_descriptors(to_middma_device(chan->device), midc);
 		spin_unlock_bh(&midc->lock);
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index 5ff6fc1..1a49c7776 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -531,21 +531,6 @@
 	writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
 }
 
-void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
-		    size_t len, struct ioat_dma_descriptor *hw)
-{
-	struct pci_dev *pdev = chan->device->pdev;
-	size_t offset = len - hw->size;
-
-	if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
-		ioat_unmap(pdev, hw->dst_addr - offset, len,
-			   PCI_DMA_FROMDEVICE, flags, 1);
-
-	if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP))
-		ioat_unmap(pdev, hw->src_addr - offset, len,
-			   PCI_DMA_TODEVICE, flags, 0);
-}
-
 dma_addr_t ioat_get_current_completion(struct ioat_chan_common *chan)
 {
 	dma_addr_t phys_complete;
@@ -602,7 +587,7 @@
 		dump_desc_dbg(ioat, desc);
 		if (tx->cookie) {
 			dma_cookie_complete(tx);
-			ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
+			dma_descriptor_unmap(tx);
 			ioat->active -= desc->hw->tx_cnt;
 			if (tx->callback) {
 				tx->callback(tx->callback_param);
@@ -733,7 +718,7 @@
 	enum dma_status ret;
 
 	ret = dma_cookie_status(c, cookie, txstate);
-	if (ret == DMA_SUCCESS)
+	if (ret == DMA_COMPLETE)
 		return ret;
 
 	device->cleanup_fn((unsigned long) c);
@@ -833,8 +818,7 @@
 
 	dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
 	dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
-	flags = DMA_COMPL_SKIP_SRC_UNMAP | DMA_COMPL_SKIP_DEST_UNMAP |
-		DMA_PREP_INTERRUPT;
+	flags = DMA_PREP_INTERRUPT;
 	tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
 						   IOAT_TEST_SIZE, flags);
 	if (!tx) {
@@ -859,7 +843,7 @@
 
 	if (tmo == 0 ||
 	    dma->device_tx_status(dma_chan, cookie, NULL)
-					!= DMA_SUCCESS) {
+					!= DMA_COMPLETE) {
 		dev_err(dev, "Self-test copy timed out, disabling\n");
 		err = -ENODEV;
 		goto unmap_dma;
@@ -885,8 +869,7 @@
 module_param_string(ioat_interrupt_style, ioat_interrupt_style,
 		    sizeof(ioat_interrupt_style), 0644);
 MODULE_PARM_DESC(ioat_interrupt_style,
-		 "set ioat interrupt style: msix (default), "
-		 "msix-single-vector, msi, intx)");
+		 "set ioat interrupt style: msix (default), msi, intx");
 
 /**
  * ioat_dma_setup_interrupts - setup interrupt handler
@@ -904,8 +887,6 @@
 
 	if (!strcmp(ioat_interrupt_style, "msix"))
 		goto msix;
-	if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
-		goto msix_single_vector;
 	if (!strcmp(ioat_interrupt_style, "msi"))
 		goto msi;
 	if (!strcmp(ioat_interrupt_style, "intx"))
@@ -920,10 +901,8 @@
 		device->msix_entries[i].entry = i;
 
 	err = pci_enable_msix(pdev, device->msix_entries, msixcnt);
-	if (err < 0)
+	if (err)
 		goto msi;
-	if (err > 0)
-		goto msix_single_vector;
 
 	for (i = 0; i < msixcnt; i++) {
 		msix = &device->msix_entries[i];
@@ -937,29 +916,13 @@
 				chan = ioat_chan_by_index(device, j);
 				devm_free_irq(dev, msix->vector, chan);
 			}
-			goto msix_single_vector;
+			goto msi;
 		}
 	}
 	intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
 	device->irq_mode = IOAT_MSIX;
 	goto done;
 
-msix_single_vector:
-	msix = &device->msix_entries[0];
-	msix->entry = 0;
-	err = pci_enable_msix(pdev, device->msix_entries, 1);
-	if (err)
-		goto msi;
-
-	err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0,
-			       "ioat-msix", device);
-	if (err) {
-		pci_disable_msix(pdev);
-		goto msi;
-	}
-	device->irq_mode = IOAT_MSIX_SINGLE;
-	goto done;
-
 msi:
 	err = pci_enable_msi(pdev);
 	if (err)
@@ -971,7 +934,7 @@
 		pci_disable_msi(pdev);
 		goto intx;
 	}
-	device->irq_mode = IOAT_MSIX;
+	device->irq_mode = IOAT_MSI;
 	goto done;
 
 intx:
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
index 54fb7b9..11fb877 100644
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -52,7 +52,6 @@
 enum ioat_irq_mode {
 	IOAT_NOIRQ = 0,
 	IOAT_MSIX,
-	IOAT_MSIX_SINGLE,
 	IOAT_MSI,
 	IOAT_INTX
 };
@@ -83,7 +82,6 @@
 	struct pci_pool *completion_pool;
 #define MAX_SED_POOLS	5
 	struct dma_pool *sed_hw_pool[MAX_SED_POOLS];
-	struct kmem_cache *sed_pool;
 	struct dma_device common;
 	u8 version;
 	struct msix_entry msix_entries[4];
@@ -342,16 +340,6 @@
 	return !!err;
 }
 
-static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
-			      int direction, enum dma_ctrl_flags flags, bool dst)
-{
-	if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
-	    (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
-		pci_unmap_single(pdev, addr, len, direction);
-	else
-		pci_unmap_page(pdev, addr, len, direction);
-}
-
 int ioat_probe(struct ioatdma_device *device);
 int ioat_register(struct ioatdma_device *device);
 int ioat1_dma_probe(struct ioatdma_device *dev, int dca);
@@ -363,8 +351,6 @@
 		       struct ioat_chan_common *chan, int idx);
 enum dma_status ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie,
 				   struct dma_tx_state *txstate);
-void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
-		    size_t len, struct ioat_dma_descriptor *hw);
 bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
 			   dma_addr_t *phys_complete);
 void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
index b925e1b..5d3affe 100644
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c
@@ -148,7 +148,7 @@
 		tx = &desc->txd;
 		dump_desc_dbg(ioat, desc);
 		if (tx->cookie) {
-			ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
+			dma_descriptor_unmap(tx);
 			dma_cookie_complete(tx);
 			if (tx->callback) {
 				tx->callback(tx->callback_param);
diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h
index 212d584..4702927 100644
--- a/drivers/dma/ioat/dma_v2.h
+++ b/drivers/dma/ioat/dma_v2.h
@@ -157,7 +157,6 @@
 
 int ioat2_dma_probe(struct ioatdma_device *dev, int dca);
 int ioat3_dma_probe(struct ioatdma_device *dev, int dca);
-void ioat3_dma_remove(struct ioatdma_device *dev);
 struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
 struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
 int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs);
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
index d8ececaf..820817e9 100644
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -67,6 +67,8 @@
 #include "dma.h"
 #include "dma_v2.h"
 
+extern struct kmem_cache *ioat3_sed_cache;
+
 /* ioat hardware assumes at least two sources for raid operations */
 #define src_cnt_to_sw(x) ((x) + 2)
 #define src_cnt_to_hw(x) ((x) - 2)
@@ -87,22 +89,8 @@
 static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7,
 					0, 1, 2, 3, 4, 5, 6 };
 
-/*
- * technically sources 1 and 2 do not require SED, but the op will have
- * at least 9 descriptors so that's irrelevant.
- */
-static const u8 pq16_idx_to_sed[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0,
-				      1, 1, 1, 1, 1, 1, 1 };
-
 static void ioat3_eh(struct ioat2_dma_chan *ioat);
 
-static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx)
-{
-	struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
-
-	return raw->field[xor_idx_to_field[idx]];
-}
-
 static void xor_set_src(struct ioat_raw_descriptor *descs[2],
 			dma_addr_t addr, u32 offset, int idx)
 {
@@ -135,12 +123,6 @@
 	pq->coef[idx] = coef;
 }
 
-static int sed_get_pq16_pool_idx(int src_cnt)
-{
-
-	return pq16_idx_to_sed[src_cnt];
-}
-
 static bool is_jf_ioat(struct pci_dev *pdev)
 {
 	switch (pdev->device) {
@@ -272,7 +254,7 @@
 	struct ioat_sed_ent *sed;
 	gfp_t flags = __GFP_ZERO | GFP_ATOMIC;
 
-	sed = kmem_cache_alloc(device->sed_pool, flags);
+	sed = kmem_cache_alloc(ioat3_sed_cache, flags);
 	if (!sed)
 		return NULL;
 
@@ -280,7 +262,7 @@
 	sed->hw = dma_pool_alloc(device->sed_hw_pool[hw_pool],
 				 flags, &sed->dma);
 	if (!sed->hw) {
-		kmem_cache_free(device->sed_pool, sed);
+		kmem_cache_free(ioat3_sed_cache, sed);
 		return NULL;
 	}
 
@@ -293,165 +275,7 @@
 		return;
 
 	dma_pool_free(device->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma);
-	kmem_cache_free(device->sed_pool, sed);
-}
-
-static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
-			    struct ioat_ring_ent *desc, int idx)
-{
-	struct ioat_chan_common *chan = &ioat->base;
-	struct pci_dev *pdev = chan->device->pdev;
-	size_t len = desc->len;
-	size_t offset = len - desc->hw->size;
-	struct dma_async_tx_descriptor *tx = &desc->txd;
-	enum dma_ctrl_flags flags = tx->flags;
-
-	switch (desc->hw->ctl_f.op) {
-	case IOAT_OP_COPY:
-		if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */
-			ioat_dma_unmap(chan, flags, len, desc->hw);
-		break;
-	case IOAT_OP_XOR_VAL:
-	case IOAT_OP_XOR: {
-		struct ioat_xor_descriptor *xor = desc->xor;
-		struct ioat_ring_ent *ext;
-		struct ioat_xor_ext_descriptor *xor_ex = NULL;
-		int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt);
-		struct ioat_raw_descriptor *descs[2];
-		int i;
-
-		if (src_cnt > 5) {
-			ext = ioat2_get_ring_ent(ioat, idx + 1);
-			xor_ex = ext->xor_ex;
-		}
-
-		if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-			descs[0] = (struct ioat_raw_descriptor *) xor;
-			descs[1] = (struct ioat_raw_descriptor *) xor_ex;
-			for (i = 0; i < src_cnt; i++) {
-				dma_addr_t src = xor_get_src(descs, i);
-
-				ioat_unmap(pdev, src - offset, len,
-					   PCI_DMA_TODEVICE, flags, 0);
-			}
-
-			/* dest is a source in xor validate operations */
-			if (xor->ctl_f.op == IOAT_OP_XOR_VAL) {
-				ioat_unmap(pdev, xor->dst_addr - offset, len,
-					   PCI_DMA_TODEVICE, flags, 1);
-				break;
-			}
-		}
-
-		if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
-			ioat_unmap(pdev, xor->dst_addr - offset, len,
-				   PCI_DMA_FROMDEVICE, flags, 1);
-		break;
-	}
-	case IOAT_OP_PQ_VAL:
-	case IOAT_OP_PQ: {
-		struct ioat_pq_descriptor *pq = desc->pq;
-		struct ioat_ring_ent *ext;
-		struct ioat_pq_ext_descriptor *pq_ex = NULL;
-		int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
-		struct ioat_raw_descriptor *descs[2];
-		int i;
-
-		if (src_cnt > 3) {
-			ext = ioat2_get_ring_ent(ioat, idx + 1);
-			pq_ex = ext->pq_ex;
-		}
-
-		/* in the 'continue' case don't unmap the dests as sources */
-		if (dmaf_p_disabled_continue(flags))
-			src_cnt--;
-		else if (dmaf_continue(flags))
-			src_cnt -= 3;
-
-		if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-			descs[0] = (struct ioat_raw_descriptor *) pq;
-			descs[1] = (struct ioat_raw_descriptor *) pq_ex;
-			for (i = 0; i < src_cnt; i++) {
-				dma_addr_t src = pq_get_src(descs, i);
-
-				ioat_unmap(pdev, src - offset, len,
-					   PCI_DMA_TODEVICE, flags, 0);
-			}
-
-			/* the dests are sources in pq validate operations */
-			if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
-				if (!(flags & DMA_PREP_PQ_DISABLE_P))
-					ioat_unmap(pdev, pq->p_addr - offset,
-						   len, PCI_DMA_TODEVICE, flags, 0);
-				if (!(flags & DMA_PREP_PQ_DISABLE_Q))
-					ioat_unmap(pdev, pq->q_addr - offset,
-						   len, PCI_DMA_TODEVICE, flags, 0);
-				break;
-			}
-		}
-
-		if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-			if (!(flags & DMA_PREP_PQ_DISABLE_P))
-				ioat_unmap(pdev, pq->p_addr - offset, len,
-					   PCI_DMA_BIDIRECTIONAL, flags, 1);
-			if (!(flags & DMA_PREP_PQ_DISABLE_Q))
-				ioat_unmap(pdev, pq->q_addr - offset, len,
-					   PCI_DMA_BIDIRECTIONAL, flags, 1);
-		}
-		break;
-	}
-	case IOAT_OP_PQ_16S:
-	case IOAT_OP_PQ_VAL_16S: {
-		struct ioat_pq_descriptor *pq = desc->pq;
-		int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt);
-		struct ioat_raw_descriptor *descs[4];
-		int i;
-
-		/* in the 'continue' case don't unmap the dests as sources */
-		if (dmaf_p_disabled_continue(flags))
-			src_cnt--;
-		else if (dmaf_continue(flags))
-			src_cnt -= 3;
-
-		if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-			descs[0] = (struct ioat_raw_descriptor *)pq;
-			descs[1] = (struct ioat_raw_descriptor *)(desc->sed->hw);
-			descs[2] = (struct ioat_raw_descriptor *)(&desc->sed->hw->b[0]);
-			for (i = 0; i < src_cnt; i++) {
-				dma_addr_t src = pq16_get_src(descs, i);
-
-				ioat_unmap(pdev, src - offset, len,
-					   PCI_DMA_TODEVICE, flags, 0);
-			}
-
-			/* the dests are sources in pq validate operations */
-			if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
-				if (!(flags & DMA_PREP_PQ_DISABLE_P))
-					ioat_unmap(pdev, pq->p_addr - offset,
-						   len, PCI_DMA_TODEVICE,
-						   flags, 0);
-				if (!(flags & DMA_PREP_PQ_DISABLE_Q))
-					ioat_unmap(pdev, pq->q_addr - offset,
-						   len, PCI_DMA_TODEVICE,
-						   flags, 0);
-				break;
-			}
-		}
-
-		if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-			if (!(flags & DMA_PREP_PQ_DISABLE_P))
-				ioat_unmap(pdev, pq->p_addr - offset, len,
-					   PCI_DMA_BIDIRECTIONAL, flags, 1);
-			if (!(flags & DMA_PREP_PQ_DISABLE_Q))
-				ioat_unmap(pdev, pq->q_addr - offset, len,
-					   PCI_DMA_BIDIRECTIONAL, flags, 1);
-		}
-		break;
-	}
-	default:
-		dev_err(&pdev->dev, "%s: unknown op type: %#x\n",
-			__func__, desc->hw->ctl_f.op);
-	}
+	kmem_cache_free(ioat3_sed_cache, sed);
 }
 
 static bool desc_has_ext(struct ioat_ring_ent *desc)
@@ -577,7 +401,7 @@
 		tx = &desc->txd;
 		if (tx->cookie) {
 			dma_cookie_complete(tx);
-			ioat3_dma_unmap(ioat, desc, idx + i);
+			dma_descriptor_unmap(tx);
 			if (tx->callback) {
 				tx->callback(tx->callback_param);
 				tx->callback = NULL;
@@ -807,7 +631,7 @@
 	enum dma_status ret;
 
 	ret = dma_cookie_status(c, cookie, txstate);
-	if (ret == DMA_SUCCESS)
+	if (ret == DMA_COMPLETE)
 		return ret;
 
 	ioat3_cleanup(ioat);
@@ -1129,9 +953,6 @@
 	u8 op;
 	int i, s, idx, num_descs;
 
-	/* this function only handles src_cnt 9 - 16 */
-	BUG_ON(src_cnt < 9);
-
 	/* this function is only called with 9-16 sources */
 	op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S;
 
@@ -1159,8 +980,7 @@
 
 		descs[0] = (struct ioat_raw_descriptor *) pq;
 
-		desc->sed = ioat3_alloc_sed(device,
-					    sed_get_pq16_pool_idx(src_cnt));
+		desc->sed = ioat3_alloc_sed(device, (src_cnt-2) >> 3);
 		if (!desc->sed) {
 			dev_err(to_dev(chan),
 				"%s: no free sed entries\n", __func__);
@@ -1218,13 +1038,21 @@
 	return &desc->txd;
 }
 
+static int src_cnt_flags(unsigned int src_cnt, unsigned long flags)
+{
+	if (dmaf_p_disabled_continue(flags))
+		return src_cnt + 1;
+	else if (dmaf_continue(flags))
+		return src_cnt + 3;
+	else
+		return src_cnt;
+}
+
 static struct dma_async_tx_descriptor *
 ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
 	      unsigned int src_cnt, const unsigned char *scf, size_t len,
 	      unsigned long flags)
 {
-	struct dma_device *dma = chan->device;
-
 	/* specify valid address for disabled result */
 	if (flags & DMA_PREP_PQ_DISABLE_P)
 		dst[0] = dst[1];
@@ -1244,7 +1072,7 @@
 		single_source_coef[0] = scf[0];
 		single_source_coef[1] = 0;
 
-		return (src_cnt > 8) && (dma->max_pq > 8) ?
+		return src_cnt_flags(src_cnt, flags) > 8 ?
 			__ioat3_prep_pq16_lock(chan, NULL, dst, single_source,
 					       2, single_source_coef, len,
 					       flags) :
@@ -1252,7 +1080,7 @@
 					     single_source_coef, len, flags);
 
 	} else {
-		return (src_cnt > 8) && (dma->max_pq > 8) ?
+		return src_cnt_flags(src_cnt, flags) > 8 ?
 			__ioat3_prep_pq16_lock(chan, NULL, dst, src, src_cnt,
 					       scf, len, flags) :
 			__ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt,
@@ -1265,8 +1093,6 @@
 		  unsigned int src_cnt, const unsigned char *scf, size_t len,
 		  enum sum_check_flags *pqres, unsigned long flags)
 {
-	struct dma_device *dma = chan->device;
-
 	/* specify valid address for disabled result */
 	if (flags & DMA_PREP_PQ_DISABLE_P)
 		pq[0] = pq[1];
@@ -1278,7 +1104,7 @@
 	 */
 	*pqres = 0;
 
-	return (src_cnt > 8) && (dma->max_pq > 8) ?
+	return src_cnt_flags(src_cnt, flags) > 8 ?
 		__ioat3_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len,
 				       flags) :
 		__ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
@@ -1289,7 +1115,6 @@
 ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
 		 unsigned int src_cnt, size_t len, unsigned long flags)
 {
-	struct dma_device *dma = chan->device;
 	unsigned char scf[src_cnt];
 	dma_addr_t pq[2];
 
@@ -1298,7 +1123,7 @@
 	flags |= DMA_PREP_PQ_DISABLE_Q;
 	pq[1] = dst; /* specify valid address for disabled result */
 
-	return (src_cnt > 8) && (dma->max_pq > 8) ?
+	return src_cnt_flags(src_cnt, flags) > 8 ?
 		__ioat3_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len,
 				       flags) :
 		__ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
@@ -1310,7 +1135,6 @@
 		     unsigned int src_cnt, size_t len,
 		     enum sum_check_flags *result, unsigned long flags)
 {
-	struct dma_device *dma = chan->device;
 	unsigned char scf[src_cnt];
 	dma_addr_t pq[2];
 
@@ -1324,8 +1148,7 @@
 	flags |= DMA_PREP_PQ_DISABLE_Q;
 	pq[1] = pq[0]; /* specify valid address for disabled result */
 
-
-	return (src_cnt > 8) && (dma->max_pq > 8) ?
+	return src_cnt_flags(src_cnt, flags) > 8 ?
 		__ioat3_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1,
 				       scf, len, flags) :
 		__ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1,
@@ -1444,9 +1267,7 @@
 					   DMA_TO_DEVICE);
 	tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
 				      IOAT_NUM_SRC_TEST, PAGE_SIZE,
-				      DMA_PREP_INTERRUPT |
-				      DMA_COMPL_SKIP_SRC_UNMAP |
-				      DMA_COMPL_SKIP_DEST_UNMAP);
+				      DMA_PREP_INTERRUPT);
 
 	if (!tx) {
 		dev_err(dev, "Self-test xor prep failed\n");
@@ -1468,7 +1289,7 @@
 
 	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
 
-	if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+	if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
 		dev_err(dev, "Self-test xor timed out\n");
 		err = -ENODEV;
 		goto dma_unmap;
@@ -1507,9 +1328,7 @@
 					   DMA_TO_DEVICE);
 	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
 					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
-					  &xor_val_result, DMA_PREP_INTERRUPT |
-					  DMA_COMPL_SKIP_SRC_UNMAP |
-					  DMA_COMPL_SKIP_DEST_UNMAP);
+					  &xor_val_result, DMA_PREP_INTERRUPT);
 	if (!tx) {
 		dev_err(dev, "Self-test zero prep failed\n");
 		err = -ENODEV;
@@ -1530,7 +1349,7 @@
 
 	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
 
-	if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+	if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
 		dev_err(dev, "Self-test validate timed out\n");
 		err = -ENODEV;
 		goto dma_unmap;
@@ -1545,6 +1364,8 @@
 		goto free_resources;
 	}
 
+	memset(page_address(dest), 0, PAGE_SIZE);
+
 	/* test for non-zero parity sum */
 	op = IOAT_OP_XOR_VAL;
 
@@ -1554,9 +1375,7 @@
 					   DMA_TO_DEVICE);
 	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
 					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
-					  &xor_val_result, DMA_PREP_INTERRUPT |
-					  DMA_COMPL_SKIP_SRC_UNMAP |
-					  DMA_COMPL_SKIP_DEST_UNMAP);
+					  &xor_val_result, DMA_PREP_INTERRUPT);
 	if (!tx) {
 		dev_err(dev, "Self-test 2nd zero prep failed\n");
 		err = -ENODEV;
@@ -1577,7 +1396,7 @@
 
 	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
 
-	if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+	if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
 		dev_err(dev, "Self-test 2nd validate timed out\n");
 		err = -ENODEV;
 		goto dma_unmap;
@@ -1630,52 +1449,36 @@
 
 static int ioat3_irq_reinit(struct ioatdma_device *device)
 {
-	int msixcnt = device->common.chancnt;
 	struct pci_dev *pdev = device->pdev;
-	int i;
-	struct msix_entry *msix;
-	struct ioat_chan_common *chan;
-	int err = 0;
+	int irq = pdev->irq, i;
+
+	if (!is_bwd_ioat(pdev))
+		return 0;
 
 	switch (device->irq_mode) {
 	case IOAT_MSIX:
+		for (i = 0; i < device->common.chancnt; i++) {
+			struct msix_entry *msix = &device->msix_entries[i];
+			struct ioat_chan_common *chan;
 
-		for (i = 0; i < msixcnt; i++) {
-			msix = &device->msix_entries[i];
 			chan = ioat_chan_by_index(device, i);
 			devm_free_irq(&pdev->dev, msix->vector, chan);
 		}
 
 		pci_disable_msix(pdev);
 		break;
-
-	case IOAT_MSIX_SINGLE:
-		msix = &device->msix_entries[0];
-		chan = ioat_chan_by_index(device, 0);
-		devm_free_irq(&pdev->dev, msix->vector, chan);
-		pci_disable_msix(pdev);
-		break;
-
 	case IOAT_MSI:
-		chan = ioat_chan_by_index(device, 0);
-		devm_free_irq(&pdev->dev, pdev->irq, chan);
 		pci_disable_msi(pdev);
-		break;
-
+		/* fall through */
 	case IOAT_INTX:
-		chan = ioat_chan_by_index(device, 0);
-		devm_free_irq(&pdev->dev, pdev->irq, chan);
+		devm_free_irq(&pdev->dev, irq, device);
 		break;
-
 	default:
 		return 0;
 	}
-
 	device->irq_mode = IOAT_NOIRQ;
 
-	err = ioat_dma_setup_interrupts(device);
-
-	return err;
+	return ioat_dma_setup_interrupts(device);
 }
 
 static int ioat3_reset_hw(struct ioat_chan_common *chan)
@@ -1718,14 +1521,12 @@
 	}
 
 	err = ioat2_reset_sync(chan, msecs_to_jiffies(200));
-	if (err) {
-		dev_err(&pdev->dev, "Failed to reset!\n");
-		return err;
-	}
-
-	if (device->irq_mode != IOAT_NOIRQ && is_bwd_ioat(pdev))
+	if (!err)
 		err = ioat3_irq_reinit(device);
 
+	if (err)
+		dev_err(&pdev->dev, "Failed to reset: %d\n", err);
+
 	return err;
 }
 
@@ -1835,21 +1636,15 @@
 		char pool_name[14];
 		int i;
 
-		/* allocate sw descriptor pool for SED */
-		device->sed_pool = kmem_cache_create("ioat_sed",
-				sizeof(struct ioat_sed_ent), 0, 0, NULL);
-		if (!device->sed_pool)
-			return -ENOMEM;
-
 		for (i = 0; i < MAX_SED_POOLS; i++) {
 			snprintf(pool_name, 14, "ioat_hw%d_sed", i);
 
 			/* allocate SED DMA pool */
-			device->sed_hw_pool[i] = dma_pool_create(pool_name,
+			device->sed_hw_pool[i] = dmam_pool_create(pool_name,
 					&pdev->dev,
 					SED_SIZE * (i + 1), 64, 0);
 			if (!device->sed_hw_pool[i])
-				goto sed_pool_cleanup;
+				return -ENOMEM;
 
 		}
 	}
@@ -1875,28 +1670,4 @@
 		device->dca = ioat3_dca_init(pdev, device->reg_base);
 
 	return 0;
-
-sed_pool_cleanup:
-	if (device->sed_pool) {
-		int i;
-		kmem_cache_destroy(device->sed_pool);
-
-		for (i = 0; i < MAX_SED_POOLS; i++)
-			if (device->sed_hw_pool[i])
-				dma_pool_destroy(device->sed_hw_pool[i]);
-	}
-
-	return -ENOMEM;
-}
-
-void ioat3_dma_remove(struct ioatdma_device *device)
-{
-	if (device->sed_pool) {
-		int i;
-		kmem_cache_destroy(device->sed_pool);
-
-		for (i = 0; i < MAX_SED_POOLS; i++)
-			if (device->sed_hw_pool[i])
-				dma_pool_destroy(device->sed_hw_pool[i]);
-	}
 }
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
index 2c8d560..1d051cd 100644
--- a/drivers/dma/ioat/pci.c
+++ b/drivers/dma/ioat/pci.c
@@ -123,6 +123,7 @@
 MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
 
 struct kmem_cache *ioat2_cache;
+struct kmem_cache *ioat3_sed_cache;
 
 #define DRV_NAME "ioatdma"
 
@@ -207,9 +208,6 @@
 	if (!device)
 		return;
 
-	if (device->version >= IOAT_VER_3_0)
-		ioat3_dma_remove(device);
-
 	dev_err(&pdev->dev, "Removing dma and dca services\n");
 	if (device->dca) {
 		unregister_dca_provider(device->dca, &pdev->dev);
@@ -221,7 +219,7 @@
 
 static int __init ioat_init_module(void)
 {
-	int err;
+	int err = -ENOMEM;
 
 	pr_info("%s: Intel(R) QuickData Technology Driver %s\n",
 		DRV_NAME, IOAT_DMA_VERSION);
@@ -231,9 +229,21 @@
 	if (!ioat2_cache)
 		return -ENOMEM;
 
+	ioat3_sed_cache = KMEM_CACHE(ioat_sed_ent, 0);
+	if (!ioat3_sed_cache)
+		goto err_ioat2_cache;
+
 	err = pci_register_driver(&ioat_pci_driver);
 	if (err)
-		kmem_cache_destroy(ioat2_cache);
+		goto err_ioat3_cache;
+
+	return 0;
+
+ err_ioat3_cache:
+	kmem_cache_destroy(ioat3_sed_cache);
+
+ err_ioat2_cache:
+	kmem_cache_destroy(ioat2_cache);
 
 	return err;
 }
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index dd8b44a..c56137b 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -61,80 +61,6 @@
 	}
 }
 
-static void
-iop_desc_unmap(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
-{
-	struct dma_async_tx_descriptor *tx = &desc->async_tx;
-	struct iop_adma_desc_slot *unmap = desc->group_head;
-	struct device *dev = &iop_chan->device->pdev->dev;
-	u32 len = unmap->unmap_len;
-	enum dma_ctrl_flags flags = tx->flags;
-	u32 src_cnt;
-	dma_addr_t addr;
-	dma_addr_t dest;
-
-	src_cnt = unmap->unmap_src_cnt;
-	dest = iop_desc_get_dest_addr(unmap, iop_chan);
-	if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-		enum dma_data_direction dir;
-
-		if (src_cnt > 1) /* is xor? */
-			dir = DMA_BIDIRECTIONAL;
-		else
-			dir = DMA_FROM_DEVICE;
-
-		dma_unmap_page(dev, dest, len, dir);
-	}
-
-	if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-		while (src_cnt--) {
-			addr = iop_desc_get_src_addr(unmap, iop_chan, src_cnt);
-			if (addr == dest)
-				continue;
-			dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
-		}
-	}
-	desc->group_head = NULL;
-}
-
-static void
-iop_desc_unmap_pq(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc)
-{
-	struct dma_async_tx_descriptor *tx = &desc->async_tx;
-	struct iop_adma_desc_slot *unmap = desc->group_head;
-	struct device *dev = &iop_chan->device->pdev->dev;
-	u32 len = unmap->unmap_len;
-	enum dma_ctrl_flags flags = tx->flags;
-	u32 src_cnt = unmap->unmap_src_cnt;
-	dma_addr_t pdest = iop_desc_get_dest_addr(unmap, iop_chan);
-	dma_addr_t qdest = iop_desc_get_qdest_addr(unmap, iop_chan);
-	int i;
-
-	if (tx->flags & DMA_PREP_CONTINUE)
-		src_cnt -= 3;
-
-	if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP) && !desc->pq_check_result) {
-		dma_unmap_page(dev, pdest, len, DMA_BIDIRECTIONAL);
-		dma_unmap_page(dev, qdest, len, DMA_BIDIRECTIONAL);
-	}
-
-	if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-		dma_addr_t addr;
-
-		for (i = 0; i < src_cnt; i++) {
-			addr = iop_desc_get_src_addr(unmap, iop_chan, i);
-			dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
-		}
-		if (desc->pq_check_result) {
-			dma_unmap_page(dev, pdest, len, DMA_TO_DEVICE);
-			dma_unmap_page(dev, qdest, len, DMA_TO_DEVICE);
-		}
-	}
-
-	desc->group_head = NULL;
-}
-
-
 static dma_cookie_t
 iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
 	struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
@@ -152,15 +78,9 @@
 		if (tx->callback)
 			tx->callback(tx->callback_param);
 
-		/* unmap dma addresses
-		 * (unmap_single vs unmap_page?)
-		 */
-		if (desc->group_head && desc->unmap_len) {
-			if (iop_desc_is_pq(desc))
-				iop_desc_unmap_pq(iop_chan, desc);
-			else
-				iop_desc_unmap(iop_chan, desc);
-		}
+		dma_descriptor_unmap(tx);
+		if (desc->group_head)
+			desc->group_head = NULL;
 	}
 
 	/* run dependent operations */
@@ -591,7 +511,6 @@
 	if (sw_desc) {
 		grp_start = sw_desc->group_head;
 		iop_desc_init_interrupt(grp_start, iop_chan);
-		grp_start->unmap_len = 0;
 		sw_desc->async_tx.flags = flags;
 	}
 	spin_unlock_bh(&iop_chan->lock);
@@ -623,8 +542,6 @@
 		iop_desc_set_byte_count(grp_start, iop_chan, len);
 		iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
 		iop_desc_set_memcpy_src_addr(grp_start, dma_src);
-		sw_desc->unmap_src_cnt = 1;
-		sw_desc->unmap_len = len;
 		sw_desc->async_tx.flags = flags;
 	}
 	spin_unlock_bh(&iop_chan->lock);
@@ -657,8 +574,6 @@
 		iop_desc_init_xor(grp_start, src_cnt, flags);
 		iop_desc_set_byte_count(grp_start, iop_chan, len);
 		iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
-		sw_desc->unmap_src_cnt = src_cnt;
-		sw_desc->unmap_len = len;
 		sw_desc->async_tx.flags = flags;
 		while (src_cnt--)
 			iop_desc_set_xor_src_addr(grp_start, src_cnt,
@@ -694,8 +609,6 @@
 		grp_start->xor_check_result = result;
 		pr_debug("\t%s: grp_start->xor_check_result: %p\n",
 			__func__, grp_start->xor_check_result);
-		sw_desc->unmap_src_cnt = src_cnt;
-		sw_desc->unmap_len = len;
 		sw_desc->async_tx.flags = flags;
 		while (src_cnt--)
 			iop_desc_set_zero_sum_src_addr(grp_start, src_cnt,
@@ -748,8 +661,6 @@
 			dst[0] = dst[1] & 0x7;
 
 		iop_desc_set_pq_addr(g, dst);
-		sw_desc->unmap_src_cnt = src_cnt;
-		sw_desc->unmap_len = len;
 		sw_desc->async_tx.flags = flags;
 		for (i = 0; i < src_cnt; i++)
 			iop_desc_set_pq_src_addr(g, i, src[i], scf[i]);
@@ -804,8 +715,6 @@
 		g->pq_check_result = pqres;
 		pr_debug("\t%s: g->pq_check_result: %p\n",
 			__func__, g->pq_check_result);
-		sw_desc->unmap_src_cnt = src_cnt+2;
-		sw_desc->unmap_len = len;
 		sw_desc->async_tx.flags = flags;
 		while (src_cnt--)
 			iop_desc_set_pq_zero_sum_src_addr(g, src_cnt,
@@ -864,7 +773,7 @@
 	int ret;
 
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret == DMA_SUCCESS)
+	if (ret == DMA_COMPLETE)
 		return ret;
 
 	iop_adma_slot_cleanup(iop_chan);
@@ -983,7 +892,7 @@
 	msleep(1);
 
 	if (iop_adma_status(dma_chan, cookie, NULL) !=
-			DMA_SUCCESS) {
+			DMA_COMPLETE) {
 		dev_err(dma_chan->device->dev,
 			"Self-test copy timed out, disabling\n");
 		err = -ENODEV;
@@ -1083,7 +992,7 @@
 	msleep(8);
 
 	if (iop_adma_status(dma_chan, cookie, NULL) !=
-		DMA_SUCCESS) {
+		DMA_COMPLETE) {
 		dev_err(dma_chan->device->dev,
 			"Self-test xor timed out, disabling\n");
 		err = -ENODEV;
@@ -1129,7 +1038,7 @@
 	iop_adma_issue_pending(dma_chan);
 	msleep(8);
 
-	if (iop_adma_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+	if (iop_adma_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
 		dev_err(dma_chan->device->dev,
 			"Self-test zero sum timed out, disabling\n");
 		err = -ENODEV;
@@ -1158,7 +1067,7 @@
 	iop_adma_issue_pending(dma_chan);
 	msleep(8);
 
-	if (iop_adma_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
+	if (iop_adma_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
 		dev_err(dma_chan->device->dev,
 			"Self-test non-zero sum timed out, disabling\n");
 		err = -ENODEV;
@@ -1254,7 +1163,7 @@
 	msleep(8);
 
 	if (iop_adma_status(dma_chan, cookie, NULL) !=
-		DMA_SUCCESS) {
+		DMA_COMPLETE) {
 		dev_err(dev, "Self-test pq timed out, disabling\n");
 		err = -ENODEV;
 		goto free_resources;
@@ -1291,7 +1200,7 @@
 	msleep(8);
 
 	if (iop_adma_status(dma_chan, cookie, NULL) !=
-		DMA_SUCCESS) {
+		DMA_COMPLETE) {
 		dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n");
 		err = -ENODEV;
 		goto free_resources;
@@ -1323,7 +1232,7 @@
 	msleep(8);
 
 	if (iop_adma_status(dma_chan, cookie, NULL) !=
-		DMA_SUCCESS) {
+		DMA_COMPLETE) {
 		dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n");
 		err = -ENODEV;
 		goto free_resources;
diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c
index cb9c0bc..128ca14 100644
--- a/drivers/dma/ipu/ipu_idmac.c
+++ b/drivers/dma/ipu/ipu_idmac.c
@@ -1232,8 +1232,10 @@
 	desc = list_entry(ichan->queue.next, struct idmac_tx_desc, list);
 	descnew = desc;
 
-	dev_dbg(dev, "IDMAC irq %d, dma 0x%08x, next dma 0x%08x, current %d, curbuf 0x%08x\n",
-		irq, sg_dma_address(*sg), sgnext ? sg_dma_address(sgnext) : 0, ichan->active_buffer, curbuf);
+	dev_dbg(dev, "IDMAC irq %d, dma %#llx, next dma %#llx, current %d, curbuf %#x\n",
+		irq, (u64)sg_dma_address(*sg),
+		sgnext ? (u64)sg_dma_address(sgnext) : 0,
+		ichan->active_buffer, curbuf);
 
 	/* Find the descriptor of sgnext */
 	sgnew = idmac_sg_next(ichan, &descnew, *sg);
diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c
index a2c330f..e260754 100644
--- a/drivers/dma/k3dma.c
+++ b/drivers/dma/k3dma.c
@@ -344,7 +344,7 @@
 	size_t bytes = 0;
 
 	ret = dma_cookie_status(&c->vc.chan, cookie, state);
-	if (ret == DMA_SUCCESS)
+	if (ret == DMA_COMPLETE)
 		return ret;
 
 	spin_lock_irqsave(&c->vc.lock, flags);
@@ -693,7 +693,7 @@
 
 	irq = platform_get_irq(op, 0);
 	ret = devm_request_irq(&op->dev, irq,
-			k3_dma_int_handler, IRQF_DISABLED, DRIVER_NAME, d);
+			k3_dma_int_handler, 0, DRIVER_NAME, d);
 	if (ret)
 		return ret;
 
diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c
index ff8d7827..dcb1e05 100644
--- a/drivers/dma/mmp_pdma.c
+++ b/drivers/dma/mmp_pdma.c
@@ -798,8 +798,7 @@
 		 * move the descriptors to a temporary list so we can drop
 		 * the lock during the entire cleanup operation
 		 */
-		list_del(&desc->node);
-		list_add(&desc->node, &chain_cleanup);
+		list_move(&desc->node, &chain_cleanup);
 
 		/*
 		 * Look for the first list entry which has the ENDIRQEN flag
@@ -863,7 +862,7 @@
 
 	if (irq) {
 		ret = devm_request_irq(pdev->dev, irq,
-			mmp_pdma_chan_handler, IRQF_DISABLED, "pdma", phy);
+			mmp_pdma_chan_handler, 0, "pdma", phy);
 		if (ret) {
 			dev_err(pdev->dev, "channel request irq fail!\n");
 			return ret;
@@ -970,7 +969,7 @@
 		/* all chan share one irq, demux inside */
 		irq = platform_get_irq(op, 0);
 		ret = devm_request_irq(pdev->dev, irq,
-			mmp_pdma_int_handler, IRQF_DISABLED, "pdma", pdev);
+			mmp_pdma_int_handler, 0, "pdma", pdev);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c
index d3b6358..3ddacc1 100644
--- a/drivers/dma/mmp_tdma.c
+++ b/drivers/dma/mmp_tdma.c
@@ -62,6 +62,11 @@
 #define TDCR_BURSTSZ_16B	(0x3 << 6)
 #define TDCR_BURSTSZ_32B	(0x6 << 6)
 #define TDCR_BURSTSZ_64B	(0x7 << 6)
+#define TDCR_BURSTSZ_SQU_1B		(0x5 << 6)
+#define TDCR_BURSTSZ_SQU_2B		(0x6 << 6)
+#define TDCR_BURSTSZ_SQU_4B		(0x0 << 6)
+#define TDCR_BURSTSZ_SQU_8B		(0x1 << 6)
+#define TDCR_BURSTSZ_SQU_16B	(0x3 << 6)
 #define TDCR_BURSTSZ_SQU_32B	(0x7 << 6)
 #define TDCR_BURSTSZ_128B	(0x5 << 6)
 #define TDCR_DSTDIR_MSK		(0x3 << 4)	/* Dst Direction */
@@ -158,7 +163,7 @@
 	/* disable irq */
 	writel(0, tdmac->reg_base + TDIMR);
 
-	tdmac->status = DMA_SUCCESS;
+	tdmac->status = DMA_COMPLETE;
 }
 
 static void mmp_tdma_resume_chan(struct mmp_tdma_chan *tdmac)
@@ -228,8 +233,31 @@
 			return -EINVAL;
 		}
 	} else if (tdmac->type == PXA910_SQU) {
-		tdcr |= TDCR_BURSTSZ_SQU_32B;
 		tdcr |= TDCR_SSPMOD;
+
+		switch (tdmac->burst_sz) {
+		case 1:
+			tdcr |= TDCR_BURSTSZ_SQU_1B;
+			break;
+		case 2:
+			tdcr |= TDCR_BURSTSZ_SQU_2B;
+			break;
+		case 4:
+			tdcr |= TDCR_BURSTSZ_SQU_4B;
+			break;
+		case 8:
+			tdcr |= TDCR_BURSTSZ_SQU_8B;
+			break;
+		case 16:
+			tdcr |= TDCR_BURSTSZ_SQU_16B;
+			break;
+		case 32:
+			tdcr |= TDCR_BURSTSZ_SQU_32B;
+			break;
+		default:
+			dev_err(tdmac->dev, "mmp_tdma: unknown burst size.\n");
+			return -EINVAL;
+		}
 	}
 
 	writel(tdcr, tdmac->reg_base + TDCR);
@@ -324,7 +352,7 @@
 
 	if (tdmac->irq) {
 		ret = devm_request_irq(tdmac->dev, tdmac->irq,
-			mmp_tdma_chan_handler, IRQF_DISABLED, "tdma", tdmac);
+			mmp_tdma_chan_handler, 0, "tdma", tdmac);
 		if (ret)
 			return ret;
 	}
@@ -365,7 +393,7 @@
 	int num_periods = buf_len / period_len;
 	int i = 0, buf = 0;
 
-	if (tdmac->status != DMA_SUCCESS)
+	if (tdmac->status != DMA_COMPLETE)
 		return NULL;
 
 	if (period_len > TDMA_MAX_XFER_BYTES) {
@@ -499,7 +527,7 @@
 	tdmac->idx	   = idx;
 	tdmac->type	   = type;
 	tdmac->reg_base	   = (unsigned long)tdev->base + idx * 4;
-	tdmac->status = DMA_SUCCESS;
+	tdmac->status = DMA_COMPLETE;
 	tdev->tdmac[tdmac->idx] = tdmac;
 	tasklet_init(&tdmac->tasklet, dma_do_tasklet, (unsigned long)tdmac);
 
@@ -554,7 +582,7 @@
 	if (irq_num != chan_num) {
 		irq = platform_get_irq(pdev, 0);
 		ret = devm_request_irq(&pdev->dev, irq,
-			mmp_tdma_int_handler, IRQF_DISABLED, "tdma", tdev);
+			mmp_tdma_int_handler, 0, "tdma", tdev);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 536dcb8..7807f0e 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -60,14 +60,6 @@
 	return hw_desc->phy_dest_addr;
 }
 
-static u32 mv_desc_get_src_addr(struct mv_xor_desc_slot *desc,
-				int src_idx)
-{
-	struct mv_xor_desc *hw_desc = desc->hw_desc;
-	return hw_desc->phy_src_addr[mv_phy_src_idx(src_idx)];
-}
-
-
 static void mv_desc_set_byte_count(struct mv_xor_desc_slot *desc,
 				   u32 byte_count)
 {
@@ -278,42 +270,9 @@
 			desc->async_tx.callback(
 				desc->async_tx.callback_param);
 
-		/* unmap dma addresses
-		 * (unmap_single vs unmap_page?)
-		 */
-		if (desc->group_head && desc->unmap_len) {
-			struct mv_xor_desc_slot *unmap = desc->group_head;
-			struct device *dev = mv_chan_to_devp(mv_chan);
-			u32 len = unmap->unmap_len;
-			enum dma_ctrl_flags flags = desc->async_tx.flags;
-			u32 src_cnt;
-			dma_addr_t addr;
-			dma_addr_t dest;
-
-			src_cnt = unmap->unmap_src_cnt;
-			dest = mv_desc_get_dest_addr(unmap);
-			if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-				enum dma_data_direction dir;
-
-				if (src_cnt > 1) /* is xor ? */
-					dir = DMA_BIDIRECTIONAL;
-				else
-					dir = DMA_FROM_DEVICE;
-				dma_unmap_page(dev, dest, len, dir);
-			}
-
-			if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-				while (src_cnt--) {
-					addr = mv_desc_get_src_addr(unmap,
-								    src_cnt);
-					if (addr == dest)
-						continue;
-					dma_unmap_page(dev, addr, len,
-						       DMA_TO_DEVICE);
-				}
-			}
+		dma_descriptor_unmap(&desc->async_tx);
+		if (desc->group_head)
 			desc->group_head = NULL;
-		}
 	}
 
 	/* run dependent operations */
@@ -749,7 +708,7 @@
 	enum dma_status ret;
 
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret == DMA_SUCCESS) {
+	if (ret == DMA_COMPLETE) {
 		mv_xor_clean_completed_slots(mv_chan);
 		return ret;
 	}
@@ -874,7 +833,7 @@
 	msleep(1);
 
 	if (mv_xor_status(dma_chan, cookie, NULL) !=
-	    DMA_SUCCESS) {
+	    DMA_COMPLETE) {
 		dev_err(dma_chan->device->dev,
 			"Self-test copy timed out, disabling\n");
 		err = -ENODEV;
@@ -968,7 +927,7 @@
 	msleep(8);
 
 	if (mv_xor_status(dma_chan, cookie, NULL) !=
-	    DMA_SUCCESS) {
+	    DMA_COMPLETE) {
 		dev_err(dma_chan->device->dev,
 			"Self-test xor timed out, disabling\n");
 		err = -ENODEV;
@@ -1076,10 +1035,7 @@
 	}
 
 	mv_chan->mmr_base = xordev->xor_base;
-	if (!mv_chan->mmr_base) {
-		ret = -ENOMEM;
-		goto err_free_dma;
-	}
+	mv_chan->mmr_high_base = xordev->xor_high_base;
 	tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long)
 		     mv_chan);
 
@@ -1138,7 +1094,7 @@
 mv_xor_conf_mbus_windows(struct mv_xor_device *xordev,
 			 const struct mbus_dram_target_info *dram)
 {
-	void __iomem *base = xordev->xor_base;
+	void __iomem *base = xordev->xor_high_base;
 	u32 win_enable = 0;
 	int i;
 
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
index 06b067f..d074922 100644
--- a/drivers/dma/mv_xor.h
+++ b/drivers/dma/mv_xor.h
@@ -34,13 +34,13 @@
 #define XOR_OPERATION_MODE_MEMCPY	2
 #define XOR_DESCRIPTOR_SWAP		BIT(14)
 
-#define XOR_CURR_DESC(chan)	(chan->mmr_base + 0x210 + (chan->idx * 4))
-#define XOR_NEXT_DESC(chan)	(chan->mmr_base + 0x200 + (chan->idx * 4))
-#define XOR_BYTE_COUNT(chan)	(chan->mmr_base + 0x220 + (chan->idx * 4))
-#define XOR_DEST_POINTER(chan)	(chan->mmr_base + 0x2B0 + (chan->idx * 4))
-#define XOR_BLOCK_SIZE(chan)	(chan->mmr_base + 0x2C0 + (chan->idx * 4))
-#define XOR_INIT_VALUE_LOW(chan)	(chan->mmr_base + 0x2E0)
-#define XOR_INIT_VALUE_HIGH(chan)	(chan->mmr_base + 0x2E4)
+#define XOR_CURR_DESC(chan)	(chan->mmr_high_base + 0x10 + (chan->idx * 4))
+#define XOR_NEXT_DESC(chan)	(chan->mmr_high_base + 0x00 + (chan->idx * 4))
+#define XOR_BYTE_COUNT(chan)	(chan->mmr_high_base + 0x20 + (chan->idx * 4))
+#define XOR_DEST_POINTER(chan)	(chan->mmr_high_base + 0xB0 + (chan->idx * 4))
+#define XOR_BLOCK_SIZE(chan)	(chan->mmr_high_base + 0xC0 + (chan->idx * 4))
+#define XOR_INIT_VALUE_LOW(chan)	(chan->mmr_high_base + 0xE0)
+#define XOR_INIT_VALUE_HIGH(chan)	(chan->mmr_high_base + 0xE4)
 
 #define XOR_CONFIG(chan)	(chan->mmr_base + 0x10 + (chan->idx * 4))
 #define XOR_ACTIVATION(chan)	(chan->mmr_base + 0x20 + (chan->idx * 4))
@@ -50,11 +50,11 @@
 #define XOR_ERROR_ADDR(chan)	(chan->mmr_base + 0x60)
 #define XOR_INTR_MASK_VALUE	0x3F5
 
-#define WINDOW_BASE(w)		(0x250 + ((w) << 2))
-#define WINDOW_SIZE(w)		(0x270 + ((w) << 2))
-#define WINDOW_REMAP_HIGH(w)	(0x290 + ((w) << 2))
-#define WINDOW_BAR_ENABLE(chan)	(0x240 + ((chan) << 2))
-#define WINDOW_OVERRIDE_CTRL(chan)	(0x2A0 + ((chan) << 2))
+#define WINDOW_BASE(w)		(0x50 + ((w) << 2))
+#define WINDOW_SIZE(w)		(0x70 + ((w) << 2))
+#define WINDOW_REMAP_HIGH(w)	(0x90 + ((w) << 2))
+#define WINDOW_BAR_ENABLE(chan)	(0x40 + ((chan) << 2))
+#define WINDOW_OVERRIDE_CTRL(chan)	(0xA0 + ((chan) << 2))
 
 struct mv_xor_device {
 	void __iomem	     *xor_base;
@@ -82,6 +82,7 @@
 	int			pending;
 	spinlock_t		lock; /* protects the descriptor slot pool */
 	void __iomem		*mmr_base;
+	void __iomem		*mmr_high_base;
 	unsigned int		idx;
 	int                     irq;
 	enum dma_transaction_type	current_type;
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index ccd13df..ead4913 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -27,6 +27,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/of_dma.h>
+#include <linux/list.h>
 
 #include <asm/irq.h>
 
@@ -57,6 +58,9 @@
 	(((dma_is_apbh(d) && apbh_is_old(d)) ? 0x050 : 0x110) + (n) * 0x70)
 #define HW_APBHX_CHn_SEMA(d, n) \
 	(((dma_is_apbh(d) && apbh_is_old(d)) ? 0x080 : 0x140) + (n) * 0x70)
+#define HW_APBHX_CHn_BAR(d, n) \
+	(((dma_is_apbh(d) && apbh_is_old(d)) ? 0x070 : 0x130) + (n) * 0x70)
+#define HW_APBX_CHn_DEBUG1(d, n) (0x150 + (n) * 0x70)
 
 /*
  * ccw bits definitions
@@ -115,7 +119,9 @@
 	int				desc_count;
 	enum dma_status			status;
 	unsigned int			flags;
+	bool				reset;
 #define MXS_DMA_SG_LOOP			(1 << 0)
+#define MXS_DMA_USE_SEMAPHORE		(1 << 1)
 };
 
 #define MXS_DMA_CHANNELS		16
@@ -201,12 +207,47 @@
 	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
 	int chan_id = mxs_chan->chan.chan_id;
 
-	if (dma_is_apbh(mxs_dma) && apbh_is_old(mxs_dma))
+	/*
+	 * mxs dma channel resets can cause a channel stall. To recover from a
+	 * channel stall, we have to reset the whole DMA engine. To avoid this,
+	 * we use cyclic DMA with semaphores, that are enhanced in
+	 * mxs_dma_int_handler. To reset the channel, we can simply stop writing
+	 * into the semaphore counter.
+	 */
+	if (mxs_chan->flags & MXS_DMA_USE_SEMAPHORE &&
+			mxs_chan->flags & MXS_DMA_SG_LOOP) {
+		mxs_chan->reset = true;
+	} else if (dma_is_apbh(mxs_dma) && apbh_is_old(mxs_dma)) {
 		writel(1 << (chan_id + BP_APBH_CTRL0_RESET_CHANNEL),
 			mxs_dma->base + HW_APBHX_CTRL0 + STMP_OFFSET_REG_SET);
-	else
+	} else {
+		unsigned long elapsed = 0;
+		const unsigned long max_wait = 50000; /* 50ms */
+		void __iomem *reg_dbg1 = mxs_dma->base +
+				HW_APBX_CHn_DEBUG1(mxs_dma, chan_id);
+
+		/*
+		 * On i.MX28 APBX, the DMA channel can stop working if we reset
+		 * the channel while it is in READ_FLUSH (0x08) state.
+		 * We wait here until we leave the state. Then we trigger the
+		 * reset. Waiting a maximum of 50ms, the kernel shouldn't crash
+		 * because of this.
+		 */
+		while ((readl(reg_dbg1) & 0xf) == 0x8 && elapsed < max_wait) {
+			udelay(100);
+			elapsed += 100;
+		}
+
+		if (elapsed >= max_wait)
+			dev_err(&mxs_chan->mxs_dma->pdev->dev,
+					"Failed waiting for the DMA channel %d to leave state READ_FLUSH, trying to reset channel in READ_FLUSH state now\n",
+					chan_id);
+
 		writel(1 << (chan_id + BP_APBHX_CHANNEL_CTRL_RESET_CHANNEL),
 			mxs_dma->base + HW_APBHX_CHANNEL_CTRL + STMP_OFFSET_REG_SET);
+	}
+
+	mxs_chan->status = DMA_COMPLETE;
 }
 
 static void mxs_dma_enable_chan(struct mxs_dma_chan *mxs_chan)
@@ -219,12 +260,21 @@
 		mxs_dma->base + HW_APBHX_CHn_NXTCMDAR(mxs_dma, chan_id));
 
 	/* write 1 to SEMA to kick off the channel */
-	writel(1, mxs_dma->base + HW_APBHX_CHn_SEMA(mxs_dma, chan_id));
+	if (mxs_chan->flags & MXS_DMA_USE_SEMAPHORE &&
+			mxs_chan->flags & MXS_DMA_SG_LOOP) {
+		/* A cyclic DMA consists of at least 2 segments, so initialize
+		 * the semaphore with 2 so we have enough time to add 1 to the
+		 * semaphore if we need to */
+		writel(2, mxs_dma->base + HW_APBHX_CHn_SEMA(mxs_dma, chan_id));
+	} else {
+		writel(1, mxs_dma->base + HW_APBHX_CHn_SEMA(mxs_dma, chan_id));
+	}
+	mxs_chan->reset = false;
 }
 
 static void mxs_dma_disable_chan(struct mxs_dma_chan *mxs_chan)
 {
-	mxs_chan->status = DMA_SUCCESS;
+	mxs_chan->status = DMA_COMPLETE;
 }
 
 static void mxs_dma_pause_chan(struct mxs_dma_chan *mxs_chan)
@@ -272,58 +322,88 @@
 		mxs_chan->desc.callback(mxs_chan->desc.callback_param);
 }
 
+static int mxs_dma_irq_to_chan(struct mxs_dma_engine *mxs_dma, int irq)
+{
+	int i;
+
+	for (i = 0; i != mxs_dma->nr_channels; ++i)
+		if (mxs_dma->mxs_chans[i].chan_irq == irq)
+			return i;
+
+	return -EINVAL;
+}
+
 static irqreturn_t mxs_dma_int_handler(int irq, void *dev_id)
 {
 	struct mxs_dma_engine *mxs_dma = dev_id;
-	u32 stat1, stat2;
+	struct mxs_dma_chan *mxs_chan;
+	u32 completed;
+	u32 err;
+	int chan = mxs_dma_irq_to_chan(mxs_dma, irq);
+
+	if (chan < 0)
+		return IRQ_NONE;
 
 	/* completion status */
-	stat1 = readl(mxs_dma->base + HW_APBHX_CTRL1);
-	stat1 &= MXS_DMA_CHANNELS_MASK;
-	writel(stat1, mxs_dma->base + HW_APBHX_CTRL1 + STMP_OFFSET_REG_CLR);
+	completed = readl(mxs_dma->base + HW_APBHX_CTRL1);
+	completed = (completed >> chan) & 0x1;
+
+	/* Clear interrupt */
+	writel((1 << chan),
+			mxs_dma->base + HW_APBHX_CTRL1 + STMP_OFFSET_REG_CLR);
 
 	/* error status */
-	stat2 = readl(mxs_dma->base + HW_APBHX_CTRL2);
-	writel(stat2, mxs_dma->base + HW_APBHX_CTRL2 + STMP_OFFSET_REG_CLR);
+	err = readl(mxs_dma->base + HW_APBHX_CTRL2);
+	err &= (1 << (MXS_DMA_CHANNELS + chan)) | (1 << chan);
+
+	/*
+	 * error status bit is in the upper 16 bits, error irq bit in the lower
+	 * 16 bits. We transform it into a simpler error code:
+	 * err: 0x00 = no error, 0x01 = TERMINATION, 0x02 = BUS_ERROR
+	 */
+	err = (err >> (MXS_DMA_CHANNELS + chan)) + (err >> chan);
+
+	/* Clear error irq */
+	writel((1 << chan),
+			mxs_dma->base + HW_APBHX_CTRL2 + STMP_OFFSET_REG_CLR);
 
 	/*
 	 * When both completion and error of termination bits set at the
 	 * same time, we do not take it as an error.  IOW, it only becomes
-	 * an error we need to handle here in case of either it's (1) a bus
-	 * error or (2) a termination error with no completion.
+	 * an error we need to handle here in case of either it's a bus
+	 * error or a termination error with no completion. 0x01 is termination
+	 * error, so we can subtract err & completed to get the real error case.
 	 */
-	stat2 = ((stat2 >> MXS_DMA_CHANNELS) & stat2) | /* (1) */
-		(~(stat2 >> MXS_DMA_CHANNELS) & stat2 & ~stat1); /* (2) */
+	err -= err & completed;
 
-	/* combine error and completion status for checking */
-	stat1 = (stat2 << MXS_DMA_CHANNELS) | stat1;
-	while (stat1) {
-		int channel = fls(stat1) - 1;
-		struct mxs_dma_chan *mxs_chan =
-			&mxs_dma->mxs_chans[channel % MXS_DMA_CHANNELS];
+	mxs_chan = &mxs_dma->mxs_chans[chan];
 
-		if (channel >= MXS_DMA_CHANNELS) {
-			dev_dbg(mxs_dma->dma_device.dev,
-				"%s: error in channel %d\n", __func__,
-				channel - MXS_DMA_CHANNELS);
-			mxs_chan->status = DMA_ERROR;
-			mxs_dma_reset_chan(mxs_chan);
+	if (err) {
+		dev_dbg(mxs_dma->dma_device.dev,
+			"%s: error in channel %d\n", __func__,
+			chan);
+		mxs_chan->status = DMA_ERROR;
+		mxs_dma_reset_chan(mxs_chan);
+	} else if (mxs_chan->status != DMA_COMPLETE) {
+		if (mxs_chan->flags & MXS_DMA_SG_LOOP) {
+			mxs_chan->status = DMA_IN_PROGRESS;
+			if (mxs_chan->flags & MXS_DMA_USE_SEMAPHORE)
+				writel(1, mxs_dma->base +
+					HW_APBHX_CHn_SEMA(mxs_dma, chan));
 		} else {
-			if (mxs_chan->flags & MXS_DMA_SG_LOOP)
-				mxs_chan->status = DMA_IN_PROGRESS;
-			else
-				mxs_chan->status = DMA_SUCCESS;
+			mxs_chan->status = DMA_COMPLETE;
 		}
-
-		stat1 &= ~(1 << channel);
-
-		if (mxs_chan->status == DMA_SUCCESS)
-			dma_cookie_complete(&mxs_chan->desc);
-
-		/* schedule tasklet on this channel */
-		tasklet_schedule(&mxs_chan->tasklet);
 	}
 
+	if (mxs_chan->status == DMA_COMPLETE) {
+		if (mxs_chan->reset)
+			return IRQ_HANDLED;
+		dma_cookie_complete(&mxs_chan->desc);
+	}
+
+	/* schedule tasklet on this channel */
+	tasklet_schedule(&mxs_chan->tasklet);
+
 	return IRQ_HANDLED;
 }
 
@@ -523,6 +603,7 @@
 
 	mxs_chan->status = DMA_IN_PROGRESS;
 	mxs_chan->flags |= MXS_DMA_SG_LOOP;
+	mxs_chan->flags |= MXS_DMA_USE_SEMAPHORE;
 
 	if (num_periods > NUM_CCW) {
 		dev_err(mxs_dma->dma_device.dev,
@@ -554,6 +635,7 @@
 		ccw->bits |= CCW_IRQ;
 		ccw->bits |= CCW_HALT_ON_TERM;
 		ccw->bits |= CCW_TERM_FLUSH;
+		ccw->bits |= CCW_DEC_SEM;
 		ccw->bits |= BF_CCW(direction == DMA_DEV_TO_MEM ?
 				MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ, COMMAND);
 
@@ -599,8 +681,24 @@
 			dma_cookie_t cookie, struct dma_tx_state *txstate)
 {
 	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	u32 residue = 0;
 
-	dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie, 0);
+	if (mxs_chan->status == DMA_IN_PROGRESS &&
+			mxs_chan->flags & MXS_DMA_SG_LOOP) {
+		struct mxs_dma_ccw *last_ccw;
+		u32 bar;
+
+		last_ccw = &mxs_chan->ccw[mxs_chan->desc_count - 1];
+		residue = last_ccw->xfer_bytes + last_ccw->bufaddr;
+
+		bar = readl(mxs_dma->base +
+				HW_APBHX_CHn_BAR(mxs_dma, chan->chan_id));
+		residue -= bar;
+	}
+
+	dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie,
+			residue);
 
 	return mxs_chan->status;
 }
diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c
index ec3fc4f..2f66cf4 100644
--- a/drivers/dma/omap-dma.c
+++ b/drivers/dma/omap-dma.c
@@ -248,7 +248,7 @@
 	unsigned long flags;
 
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret == DMA_SUCCESS || !txstate)
+	if (ret == DMA_COMPLETE || !txstate)
 		return ret;
 
 	spin_lock_irqsave(&c->vc.lock, flags);
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index df8b10f..cdf0483 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2268,6 +2268,8 @@
 			list_move_tail(&desc->node, &pch->dmac->desc_pool);
 		}
 
+		dma_descriptor_unmap(&desc->txd);
+
 		if (callback) {
 			spin_unlock_irqrestore(&pch->lock, flags);
 			callback(callback_param);
@@ -2314,7 +2316,7 @@
 		return false;
 
 	peri_id = chan->private;
-	return *peri_id == (unsigned)param;
+	return *peri_id == (unsigned long)param;
 }
 EXPORT_SYMBOL(pl330_filter);
 
@@ -2926,16 +2928,23 @@
 
 	amba_set_drvdata(adev, pdmac);
 
-	irq = adev->irq[0];
-	ret = request_irq(irq, pl330_irq_handler, 0,
-			dev_name(&adev->dev), pi);
-	if (ret)
-		return ret;
+	for (i = 0; i < AMBA_NR_IRQS; i++) {
+		irq = adev->irq[i];
+		if (irq) {
+			ret = devm_request_irq(&adev->dev, irq,
+					       pl330_irq_handler, 0,
+					       dev_name(&adev->dev), pi);
+			if (ret)
+				return ret;
+		} else {
+			break;
+		}
+	}
 
 	pi->pcfg.periph_id = adev->periphid;
 	ret = pl330_add(pi);
 	if (ret)
-		goto probe_err1;
+		return ret;
 
 	INIT_LIST_HEAD(&pdmac->desc_pool);
 	spin_lock_init(&pdmac->pool_lock);
@@ -3033,8 +3042,6 @@
 
 	return 0;
 probe_err3:
-	amba_set_drvdata(adev, NULL);
-
 	/* Idle the DMAC */
 	list_for_each_entry_safe(pch, _p, &pdmac->ddma.channels,
 			chan.device_node) {
@@ -3048,8 +3055,6 @@
 	}
 probe_err2:
 	pl330_del(pi);
-probe_err1:
-	free_irq(irq, pi);
 
 	return ret;
 }
@@ -3059,7 +3064,6 @@
 	struct dma_pl330_dmac *pdmac = amba_get_drvdata(adev);
 	struct dma_pl330_chan *pch, *_p;
 	struct pl330_info *pi;
-	int irq;
 
 	if (!pdmac)
 		return 0;
@@ -3068,7 +3072,6 @@
 		of_dma_controller_free(adev->dev.of_node);
 
 	dma_async_device_unregister(&pdmac->ddma);
-	amba_set_drvdata(adev, NULL);
 
 	/* Idle the DMAC */
 	list_for_each_entry_safe(pch, _p, &pdmac->ddma.channels,
@@ -3086,9 +3089,6 @@
 
 	pl330_del(pi);
 
-	irq = adev->irq[0];
-	free_irq(irq, pi);
-
 	return 0;
 }
 
diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c
index e24b5ef..8da48c6 100644
--- a/drivers/dma/ppc4xx/adma.c
+++ b/drivers/dma/ppc4xx/adma.c
@@ -804,218 +804,6 @@
 }
 
 /**
- * ppc440spe_desc_get_src_addr - extract the source address from the descriptor
- */
-static u32 ppc440spe_desc_get_src_addr(struct ppc440spe_adma_desc_slot *desc,
-				struct ppc440spe_adma_chan *chan, int src_idx)
-{
-	struct dma_cdb *dma_hw_desc;
-	struct xor_cb *xor_hw_desc;
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		dma_hw_desc = desc->hw_desc;
-		/* May have 0, 1, 2, or 3 sources */
-		switch (dma_hw_desc->opc) {
-		case DMA_CDB_OPC_NO_OP:
-		case DMA_CDB_OPC_DFILL128:
-			return 0;
-		case DMA_CDB_OPC_DCHECK128:
-			if (unlikely(src_idx)) {
-				printk(KERN_ERR "%s: try to get %d source for"
-				    " DCHECK128\n", __func__, src_idx);
-				BUG();
-			}
-			return le32_to_cpu(dma_hw_desc->sg1l);
-		case DMA_CDB_OPC_MULTICAST:
-		case DMA_CDB_OPC_MV_SG1_SG2:
-			if (unlikely(src_idx > 2)) {
-				printk(KERN_ERR "%s: try to get %d source from"
-				    " DMA descr\n", __func__, src_idx);
-				BUG();
-			}
-			if (src_idx) {
-				if (le32_to_cpu(dma_hw_desc->sg1u) &
-				    DMA_CUED_XOR_WIN_MSK) {
-					u8 region;
-
-					if (src_idx == 1)
-						return le32_to_cpu(
-						    dma_hw_desc->sg1l) +
-							desc->unmap_len;
-
-					region = (le32_to_cpu(
-					    dma_hw_desc->sg1u)) >>
-						DMA_CUED_REGION_OFF;
-
-					region &= DMA_CUED_REGION_MSK;
-					switch (region) {
-					case DMA_RXOR123:
-						return le32_to_cpu(
-						    dma_hw_desc->sg1l) +
-							(desc->unmap_len << 1);
-					case DMA_RXOR124:
-						return le32_to_cpu(
-						    dma_hw_desc->sg1l) +
-							(desc->unmap_len * 3);
-					case DMA_RXOR125:
-						return le32_to_cpu(
-						    dma_hw_desc->sg1l) +
-							(desc->unmap_len << 2);
-					default:
-						printk(KERN_ERR
-						    "%s: try to"
-						    " get src3 for region %02x"
-						    "PPC440SPE_DESC_RXOR12?\n",
-						    __func__, region);
-						BUG();
-					}
-				} else {
-					printk(KERN_ERR
-						"%s: try to get %d"
-						" source for non-cued descr\n",
-						__func__, src_idx);
-					BUG();
-				}
-			}
-			return le32_to_cpu(dma_hw_desc->sg1l);
-		default:
-			printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
-				__func__, dma_hw_desc->opc);
-			BUG();
-		}
-		return le32_to_cpu(dma_hw_desc->sg1l);
-	case PPC440SPE_XOR_ID:
-		/* May have up to 16 sources */
-		xor_hw_desc = desc->hw_desc;
-		return xor_hw_desc->ops[src_idx].l;
-	}
-	return 0;
-}
-
-/**
- * ppc440spe_desc_get_dest_addr - extract the destination address from the
- * descriptor
- */
-static u32 ppc440spe_desc_get_dest_addr(struct ppc440spe_adma_desc_slot *desc,
-				struct ppc440spe_adma_chan *chan, int idx)
-{
-	struct dma_cdb *dma_hw_desc;
-	struct xor_cb *xor_hw_desc;
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		dma_hw_desc = desc->hw_desc;
-
-		if (likely(!idx))
-			return le32_to_cpu(dma_hw_desc->sg2l);
-		return le32_to_cpu(dma_hw_desc->sg3l);
-	case PPC440SPE_XOR_ID:
-		xor_hw_desc = desc->hw_desc;
-		return xor_hw_desc->cbtal;
-	}
-	return 0;
-}
-
-/**
- * ppc440spe_desc_get_src_num - extract the number of source addresses from
- * the descriptor
- */
-static u32 ppc440spe_desc_get_src_num(struct ppc440spe_adma_desc_slot *desc,
-				struct ppc440spe_adma_chan *chan)
-{
-	struct dma_cdb *dma_hw_desc;
-	struct xor_cb *xor_hw_desc;
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		dma_hw_desc = desc->hw_desc;
-
-		switch (dma_hw_desc->opc) {
-		case DMA_CDB_OPC_NO_OP:
-		case DMA_CDB_OPC_DFILL128:
-			return 0;
-		case DMA_CDB_OPC_DCHECK128:
-			return 1;
-		case DMA_CDB_OPC_MV_SG1_SG2:
-		case DMA_CDB_OPC_MULTICAST:
-			/*
-			 * Only for RXOR operations we have more than
-			 * one source
-			 */
-			if (le32_to_cpu(dma_hw_desc->sg1u) &
-			    DMA_CUED_XOR_WIN_MSK) {
-				/* RXOR op, there are 2 or 3 sources */
-				if (((le32_to_cpu(dma_hw_desc->sg1u) >>
-				    DMA_CUED_REGION_OFF) &
-				      DMA_CUED_REGION_MSK) == DMA_RXOR12) {
-					/* RXOR 1-2 */
-					return 2;
-				} else {
-					/* RXOR 1-2-3/1-2-4/1-2-5 */
-					return 3;
-				}
-			}
-			return 1;
-		default:
-			printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
-				__func__, dma_hw_desc->opc);
-			BUG();
-		}
-	case PPC440SPE_XOR_ID:
-		/* up to 16 sources */
-		xor_hw_desc = desc->hw_desc;
-		return xor_hw_desc->cbc & XOR_CDCR_OAC_MSK;
-	default:
-		BUG();
-	}
-	return 0;
-}
-
-/**
- * ppc440spe_desc_get_dst_num - get the number of destination addresses in
- * this descriptor
- */
-static u32 ppc440spe_desc_get_dst_num(struct ppc440spe_adma_desc_slot *desc,
-				struct ppc440spe_adma_chan *chan)
-{
-	struct dma_cdb *dma_hw_desc;
-
-	switch (chan->device->id) {
-	case PPC440SPE_DMA0_ID:
-	case PPC440SPE_DMA1_ID:
-		/* May be 1 or 2 destinations */
-		dma_hw_desc = desc->hw_desc;
-		switch (dma_hw_desc->opc) {
-		case DMA_CDB_OPC_NO_OP:
-		case DMA_CDB_OPC_DCHECK128:
-			return 0;
-		case DMA_CDB_OPC_MV_SG1_SG2:
-		case DMA_CDB_OPC_DFILL128:
-			return 1;
-		case DMA_CDB_OPC_MULTICAST:
-			if (desc->dst_cnt == 2)
-				return 2;
-			else
-				return 1;
-		default:
-			printk(KERN_ERR "%s: unknown OPC 0x%02x\n",
-				__func__, dma_hw_desc->opc);
-			BUG();
-		}
-	case PPC440SPE_XOR_ID:
-		/* Always only 1 destination */
-		return 1;
-	default:
-		BUG();
-	}
-	return 0;
-}
-
-/**
  * ppc440spe_desc_get_link - get the address of the descriptor that
  * follows this one
  */
@@ -1707,43 +1495,6 @@
 	}
 }
 
-static void ppc440spe_adma_unmap(struct ppc440spe_adma_chan *chan,
-				 struct ppc440spe_adma_desc_slot *desc)
-{
-	u32 src_cnt, dst_cnt;
-	dma_addr_t addr;
-
-	/*
-	 * get the number of sources & destination
-	 * included in this descriptor and unmap
-	 * them all
-	 */
-	src_cnt = ppc440spe_desc_get_src_num(desc, chan);
-	dst_cnt = ppc440spe_desc_get_dst_num(desc, chan);
-
-	/* unmap destinations */
-	if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-		while (dst_cnt--) {
-			addr = ppc440spe_desc_get_dest_addr(
-				desc, chan, dst_cnt);
-			dma_unmap_page(chan->device->dev,
-					addr, desc->unmap_len,
-					DMA_FROM_DEVICE);
-		}
-	}
-
-	/* unmap sources */
-	if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-		while (src_cnt--) {
-			addr = ppc440spe_desc_get_src_addr(
-				desc, chan, src_cnt);
-			dma_unmap_page(chan->device->dev,
-					addr, desc->unmap_len,
-					DMA_TO_DEVICE);
-		}
-	}
-}
-
 /**
  * ppc440spe_adma_run_tx_complete_actions - call functions to be called
  * upon completion
@@ -1767,26 +1518,7 @@
 			desc->async_tx.callback(
 				desc->async_tx.callback_param);
 
-		/* unmap dma addresses
-		 * (unmap_single vs unmap_page?)
-		 *
-		 * actually, ppc's dma_unmap_page() functions are empty, so
-		 * the following code is just for the sake of completeness
-		 */
-		if (chan && chan->needs_unmap && desc->group_head &&
-		     desc->unmap_len) {
-			struct ppc440spe_adma_desc_slot *unmap =
-							desc->group_head;
-			/* assume 1 slot per op always */
-			u32 slot_count = unmap->slot_cnt;
-
-			/* Run through the group list and unmap addresses */
-			for (i = 0; i < slot_count; i++) {
-				BUG_ON(!unmap);
-				ppc440spe_adma_unmap(chan, unmap);
-				unmap = unmap->hw_next;
-			}
-		}
+		dma_descriptor_unmap(&desc->async_tx);
 	}
 
 	/* run dependent operations */
@@ -3893,7 +3625,7 @@
 
 	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret == DMA_SUCCESS)
+	if (ret == DMA_COMPLETE)
 		return ret;
 
 	ppc440spe_adma_slot_cleanup(ppc440spe_chan);
diff --git a/drivers/dma/sa11x0-dma.c b/drivers/dma/sa11x0-dma.c
index 461a91a..ab26d46 100644
--- a/drivers/dma/sa11x0-dma.c
+++ b/drivers/dma/sa11x0-dma.c
@@ -436,7 +436,7 @@
 	enum dma_status ret;
 
 	ret = dma_cookie_status(&c->vc.chan, cookie, state);
-	if (ret == DMA_SUCCESS)
+	if (ret == DMA_COMPLETE)
 		return ret;
 
 	if (!state)
diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c
index d94ab59..2e7b394 100644
--- a/drivers/dma/sh/shdma-base.c
+++ b/drivers/dma/sh/shdma-base.c
@@ -724,7 +724,7 @@
 	 * If we don't find cookie on the queue, it has been aborted and we have
 	 * to report error
 	 */
-	if (status != DMA_SUCCESS) {
+	if (status != DMA_COMPLETE) {
 		struct shdma_desc *sdesc;
 		status = DMA_ERROR;
 		list_for_each_entry(sdesc, &schan->ld_queue, node)
diff --git a/drivers/dma/sh/shdmac.c b/drivers/dma/sh/shdmac.c
index 1069e88..0d765c0 100644
--- a/drivers/dma/sh/shdmac.c
+++ b/drivers/dma/sh/shdmac.c
@@ -685,7 +685,7 @@
 static int sh_dmae_probe(struct platform_device *pdev)
 {
 	const struct sh_dmae_pdata *pdata;
-	unsigned long irqflags = IRQF_DISABLED,
+	unsigned long irqflags = 0,
 		chan_flag[SH_DMAE_MAX_CHANNELS] = {};
 	int errirq, chan_irq[SH_DMAE_MAX_CHANNELS];
 	int err, i, irq_cnt = 0, irqres = 0, irq_cap = 0;
@@ -838,7 +838,7 @@
 				    IORESOURCE_IRQ_SHAREABLE)
 					chan_flag[irq_cnt] = IRQF_SHARED;
 				else
-					chan_flag[irq_cnt] = IRQF_DISABLED;
+					chan_flag[irq_cnt] = 0;
 				dev_dbg(&pdev->dev,
 					"Found IRQ %d for channel %d\n",
 					i, irq_cnt);
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 82d2b97..b8c031b 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -14,6 +14,7 @@
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
+#include <linux/log2.h>
 #include <linux/pm.h>
 #include <linux/pm_runtime.h>
 #include <linux/err.h>
@@ -2626,7 +2627,7 @@
 	}
 
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret != DMA_SUCCESS)
+	if (ret != DMA_COMPLETE)
 		dma_set_residue(txstate, stedma40_residue(chan));
 
 	if (d40_is_paused(d40c))
@@ -2796,8 +2797,8 @@
 	    src_addr_width >  DMA_SLAVE_BUSWIDTH_8_BYTES   ||
 	    dst_addr_width <= DMA_SLAVE_BUSWIDTH_UNDEFINED ||
 	    dst_addr_width >  DMA_SLAVE_BUSWIDTH_8_BYTES   ||
-	    ((src_addr_width > 1) && (src_addr_width & 1)) ||
-	    ((dst_addr_width > 1) && (dst_addr_width & 1)))
+	    !is_power_of_2(src_addr_width) ||
+	    !is_power_of_2(dst_addr_width))
 		return -EINVAL;
 
 	cfg->src_info.data_width = src_addr_width;
diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
index 5d4986e..73654e3 100644
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -570,7 +570,7 @@
 
 	list_del(&sgreq->node);
 	if (sgreq->last_sg) {
-		dma_desc->dma_status = DMA_SUCCESS;
+		dma_desc->dma_status = DMA_COMPLETE;
 		dma_cookie_complete(&dma_desc->txd);
 		if (!dma_desc->cb_count)
 			list_add_tail(&dma_desc->cb_node, &tdc->cb_desc);
@@ -768,7 +768,7 @@
 	unsigned int residual;
 
 	ret = dma_cookie_status(dc, cookie, txstate);
-	if (ret == DMA_SUCCESS)
+	if (ret == DMA_COMPLETE)
 		return ret;
 
 	spin_lock_irqsave(&tdc->lock, flags);
@@ -1018,7 +1018,7 @@
 	return &dma_desc->txd;
 }
 
-struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic(
+static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic(
 	struct dma_chan *dc, dma_addr_t buf_addr, size_t buf_len,
 	size_t period_len, enum dma_transfer_direction direction,
 	unsigned long flags, void *context)
diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c
index 28af214..4506a7b 100644
--- a/drivers/dma/timb_dma.c
+++ b/drivers/dma/timb_dma.c
@@ -154,38 +154,6 @@
 	return done;
 }
 
-static void __td_unmap_desc(struct timb_dma_chan *td_chan, const u8 *dma_desc,
-	bool single)
-{
-	dma_addr_t addr;
-	int len;
-
-	addr = (dma_desc[7] << 24) | (dma_desc[6] << 16) | (dma_desc[5] << 8) |
-		dma_desc[4];
-
-	len = (dma_desc[3] << 8) | dma_desc[2];
-
-	if (single)
-		dma_unmap_single(chan2dev(&td_chan->chan), addr, len,
-			DMA_TO_DEVICE);
-	else
-		dma_unmap_page(chan2dev(&td_chan->chan), addr, len,
-			DMA_TO_DEVICE);
-}
-
-static void __td_unmap_descs(struct timb_dma_desc *td_desc, bool single)
-{
-	struct timb_dma_chan *td_chan = container_of(td_desc->txd.chan,
-		struct timb_dma_chan, chan);
-	u8 *descs;
-
-	for (descs = td_desc->desc_list; ; descs += TIMB_DMA_DESC_SIZE) {
-		__td_unmap_desc(td_chan, descs, single);
-		if (descs[0] & 0x02)
-			break;
-	}
-}
-
 static int td_fill_desc(struct timb_dma_chan *td_chan, u8 *dma_desc,
 	struct scatterlist *sg, bool last)
 {
@@ -293,10 +261,7 @@
 
 	list_move(&td_desc->desc_node, &td_chan->free_list);
 
-	if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP))
-		__td_unmap_descs(td_desc,
-			txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE);
-
+	dma_descriptor_unmap(txd);
 	/*
 	 * The API requires that no submissions are done from a
 	 * callback, so we don't need to drop the lock here
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c
index 71e8e77..bae6c29 100644
--- a/drivers/dma/txx9dmac.c
+++ b/drivers/dma/txx9dmac.c
@@ -419,30 +419,7 @@
 	list_splice_init(&desc->tx_list, &dc->free_list);
 	list_move(&desc->desc_node, &dc->free_list);
 
-	if (!ds) {
-		dma_addr_t dmaaddr;
-		if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
-			dmaaddr = is_dmac64(dc) ?
-				desc->hwdesc.DAR : desc->hwdesc32.DAR;
-			if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
-				dma_unmap_single(chan2parent(&dc->chan),
-					dmaaddr, desc->len, DMA_FROM_DEVICE);
-			else
-				dma_unmap_page(chan2parent(&dc->chan),
-					dmaaddr, desc->len, DMA_FROM_DEVICE);
-		}
-		if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
-			dmaaddr = is_dmac64(dc) ?
-				desc->hwdesc.SAR : desc->hwdesc32.SAR;
-			if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
-				dma_unmap_single(chan2parent(&dc->chan),
-					dmaaddr, desc->len, DMA_TO_DEVICE);
-			else
-				dma_unmap_page(chan2parent(&dc->chan),
-					dmaaddr, desc->len, DMA_TO_DEVICE);
-		}
-	}
-
+	dma_descriptor_unmap(txd);
 	/*
 	 * The API requires that no submissions are done from a
 	 * callback, so we don't need to drop the lock here
@@ -962,8 +939,8 @@
 	enum dma_status ret;
 
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret == DMA_SUCCESS)
-		return DMA_SUCCESS;
+	if (ret == DMA_COMPLETE)
+		return DMA_COMPLETE;
 
 	spin_lock_bh(&dc->lock);
 	txx9dmac_scan_descriptors(dc);
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 7dd4461..4e10b10 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -13,6 +13,7 @@
 #include <linux/acpi_gpio.h>
 #include <linux/idr.h>
 #include <linux/slab.h>
+#include <linux/acpi.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/gpio.h>
diff --git a/drivers/gpu/drm/i915/intel_acpi.c b/drivers/gpu/drm/i915/intel_acpi.c
index 43959ed..dfff090 100644
--- a/drivers/gpu/drm/i915/intel_acpi.c
+++ b/drivers/gpu/drm/i915/intel_acpi.c
@@ -196,7 +196,7 @@
 	acpi_handle dhandle;
 	int ret;
 
-	dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+	dhandle = ACPI_HANDLE(&pdev->dev);
 	if (!dhandle)
 		return false;
 
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
index 1b2f41c..6d69a9b 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@@ -638,7 +638,7 @@
 	u32 temp;
 	int i = 0;
 
-	handle = DEVICE_ACPI_HANDLE(&dev->pdev->dev);
+	handle = ACPI_HANDLE(&dev->pdev->dev);
 	if (!handle || acpi_bus_get_device(handle, &acpi_dev))
 		return;
 
diff --git a/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c b/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c
index e286e13..1291204 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c
@@ -116,7 +116,7 @@
 	acpi_handle handle;
 	int ret;
 
-	handle = DEVICE_ACPI_HANDLE(&device->pdev->dev);
+	handle = ACPI_HANDLE(&device->pdev->dev);
 	if (!handle)
 		return false;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index 07273a2..95c7404 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -256,7 +256,7 @@
 	acpi_handle dhandle;
 	int retval = 0;
 
-	dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+	dhandle = ACPI_HANDLE(&pdev->dev);
 	if (!dhandle)
 		return false;
 
@@ -414,7 +414,7 @@
 	if (!nouveau_dsm_priv.dsm_detected && !nouveau_dsm_priv.optimus_detected)
 		return false;
 
-	dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+	dhandle = ACPI_HANDLE(&pdev->dev);
 	if (!dhandle)
 		return false;
 
@@ -448,7 +448,7 @@
 		return NULL;
 	}
 
-	handle = DEVICE_ACPI_HANDLE(&dev->pdev->dev);
+	handle = ACPI_HANDLE(&dev->pdev->dev);
 	if (!handle)
 		return NULL;
 
diff --git a/drivers/gpu/drm/radeon/radeon_acpi.c b/drivers/gpu/drm/radeon/radeon_acpi.c
index 10f98c7..98a9074 100644
--- a/drivers/gpu/drm/radeon/radeon_acpi.c
+++ b/drivers/gpu/drm/radeon/radeon_acpi.c
@@ -369,7 +369,7 @@
 		return NOTIFY_DONE;
 
 	/* Check pending SBIOS requests */
-	handle = DEVICE_ACPI_HANDLE(&rdev->pdev->dev);
+	handle = ACPI_HANDLE(&rdev->pdev->dev);
 	count = radeon_atif_get_sbios_requests(handle, &req);
 
 	if (count <= 0)
@@ -556,7 +556,7 @@
 	struct radeon_atcs *atcs = &rdev->atcs;
 
 	/* Get the device handle */
-	handle = DEVICE_ACPI_HANDLE(&rdev->pdev->dev);
+	handle = ACPI_HANDLE(&rdev->pdev->dev);
 	if (!handle)
 		return -EINVAL;
 
@@ -596,7 +596,7 @@
 	u32 retry = 3;
 
 	/* Get the device handle */
-	handle = DEVICE_ACPI_HANDLE(&rdev->pdev->dev);
+	handle = ACPI_HANDLE(&rdev->pdev->dev);
 	if (!handle)
 		return -EINVAL;
 
@@ -699,7 +699,7 @@
 	int ret;
 
 	/* Get the device handle */
-	handle = DEVICE_ACPI_HANDLE(&rdev->pdev->dev);
+	handle = ACPI_HANDLE(&rdev->pdev->dev);
 
 	/* No need to proceed if we're sure that ATIF is not supported */
 	if (!ASIC_IS_AVIVO(rdev) || !rdev->bios || !handle)
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index 6153ec1..9d302ea 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -8,8 +8,7 @@
  */
 #include <linux/vga_switcheroo.h>
 #include <linux/slab.h>
-#include <acpi/acpi.h>
-#include <acpi/acpi_bus.h>
+#include <linux/acpi.h>
 #include <linux/pci.h>
 
 #include "radeon_acpi.h"
@@ -447,7 +446,7 @@
 	acpi_handle dhandle, atpx_handle;
 	acpi_status status;
 
-	dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+	dhandle = ACPI_HANDLE(&pdev->dev);
 	if (!dhandle)
 		return false;
 
@@ -493,7 +492,7 @@
  */
 static int radeon_atpx_get_client_id(struct pci_dev *pdev)
 {
-	if (radeon_atpx_priv.dhandle == DEVICE_ACPI_HANDLE(&pdev->dev))
+	if (radeon_atpx_priv.dhandle == ACPI_HANDLE(&pdev->dev))
 		return VGA_SWITCHEROO_IGD;
 	else
 		return VGA_SWITCHEROO_DIS;
diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c
index c155d6f..b3633d9 100644
--- a/drivers/gpu/drm/radeon/radeon_bios.c
+++ b/drivers/gpu/drm/radeon/radeon_bios.c
@@ -185,7 +185,7 @@
 		return false;
 
 	while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
-		dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+		dhandle = ACPI_HANDLE(&pdev->dev);
 		if (!dhandle)
 			continue;
 
diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c
index ae48d18..5f7e55f 100644
--- a/drivers/hid/i2c-hid/i2c-hid.c
+++ b/drivers/hid/i2c-hid/i2c-hid.c
@@ -1008,7 +1008,7 @@
 	hid->hid_get_raw_report = i2c_hid_get_raw_report;
 	hid->hid_output_raw_report = i2c_hid_output_raw_report;
 	hid->dev.parent = &client->dev;
-	ACPI_HANDLE_SET(&hid->dev, ACPI_HANDLE(&client->dev));
+	ACPI_COMPANION_SET(&hid->dev, ACPI_COMPANION(&client->dev));
 	hid->bus = BUS_I2C;
 	hid->version = le16_to_cpu(ihid->hdesc.bcdVersion);
 	hid->vendor = le16_to_cpu(ihid->hdesc.wVendorID);
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 5923cfa..d74c0b3 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -615,6 +615,22 @@
 }
 EXPORT_SYMBOL_GPL(i2c_unlock_adapter);
 
+static void i2c_dev_set_name(struct i2c_adapter *adap,
+			     struct i2c_client *client)
+{
+	struct acpi_device *adev = ACPI_COMPANION(&client->dev);
+
+	if (adev) {
+		dev_set_name(&client->dev, "i2c-%s", acpi_dev_name(adev));
+		return;
+	}
+
+	/* For 10-bit clients, add an arbitrary offset to avoid collisions */
+	dev_set_name(&client->dev, "%d-%04x", i2c_adapter_id(adap),
+		     client->addr | ((client->flags & I2C_CLIENT_TEN)
+				     ? 0xa000 : 0));
+}
+
 /**
  * i2c_new_device - instantiate an i2c device
  * @adap: the adapter managing the device
@@ -671,12 +687,9 @@
 	client->dev.bus = &i2c_bus_type;
 	client->dev.type = &i2c_client_type;
 	client->dev.of_node = info->of_node;
-	ACPI_HANDLE_SET(&client->dev, info->acpi_node.handle);
+	ACPI_COMPANION_SET(&client->dev, info->acpi_node.companion);
 
-	/* For 10-bit clients, add an arbitrary offset to avoid collisions */
-	dev_set_name(&client->dev, "%d-%04x", i2c_adapter_id(adap),
-		     client->addr | ((client->flags & I2C_CLIENT_TEN)
-				     ? 0xa000 : 0));
+	i2c_dev_set_name(adap, client);
 	status = device_register(&client->dev);
 	if (status)
 		goto out_err;
@@ -1100,7 +1113,7 @@
 		return AE_OK;
 
 	memset(&info, 0, sizeof(info));
-	info.acpi_node.handle = handle;
+	info.acpi_node.companion = adev;
 	info.irq = -1;
 
 	INIT_LIST_HEAD(&resource_list);
diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c
index 140c8ef5..d9e1f7c 100644
--- a/drivers/ide/ide-acpi.c
+++ b/drivers/ide/ide-acpi.c
@@ -7,6 +7,7 @@
  * Copyright (C) 2006 Hannes Reinecke
  */
 
+#include <linux/acpi.h>
 #include <linux/ata.h>
 #include <linux/delay.h>
 #include <linux/device.h>
@@ -19,8 +20,6 @@
 #include <linux/dmi.h>
 #include <linux/module.h>
 
-#include <acpi/acpi_bus.h>
-
 #define REGS_PER_GTF		7
 
 struct GTM_buffer {
@@ -128,7 +127,7 @@
 
 	DEBPRINT("ENTER: pci %02x:%02x.%01x\n", bus, devnum, func);
 
-	dev_handle = DEVICE_ACPI_HANDLE(dev);
+	dev_handle = ACPI_HANDLE(dev);
 	if (!dev_handle) {
 		DEBPRINT("no acpi handle for device\n");
 		goto err;
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 3226ce9..cbd4e9a 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -1,7 +1,7 @@
 /*
  * intel_idle.c - native hardware idle loop for modern Intel processors
  *
- * Copyright (c) 2010, Intel Corporation.
+ * Copyright (c) 2013, Intel Corporation.
  * Len Brown <len.brown@intel.com>
  *
  * This program is free software; you can redistribute it and/or modify it
@@ -329,6 +329,22 @@
 	{
 		.enter = NULL }
 };
+static struct cpuidle_state avn_cstates[CPUIDLE_STATE_MAX] = {
+	{
+		.name = "C1-AVN",
+		.desc = "MWAIT 0x00",
+		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+		.exit_latency = 2,
+		.target_residency = 2,
+		.enter = &intel_idle },
+	{
+		.name = "C6-AVN",
+		.desc = "MWAIT 0x51",
+		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 15,
+		.target_residency = 45,
+		.enter = &intel_idle },
+};
 
 /**
  * intel_idle
@@ -462,6 +478,11 @@
 	.disable_promotion_to_c1e = true,
 };
 
+static const struct idle_cpu idle_cpu_avn = {
+	.state_table = avn_cstates,
+	.disable_promotion_to_c1e = true,
+};
+
 #define ICPU(model, cpu) \
 	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_MWAIT, (unsigned long)&cpu }
 
@@ -483,6 +504,7 @@
 	ICPU(0x3f, idle_cpu_hsw),
 	ICPU(0x45, idle_cpu_hsw),
 	ICPU(0x46, idle_cpu_hsw),
+	ICPU(0x4D, idle_cpu_avn),
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 8766eab..b6b7a28 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -112,7 +112,7 @@
 
 static struct ctl_table_header *raid_table_header;
 
-static ctl_table raid_table[] = {
+static struct ctl_table raid_table[] = {
 	{
 		.procname	= "speed_limit_min",
 		.data		= &sysctl_speed_limit_min,
@@ -130,7 +130,7 @@
 	{ }
 };
 
-static ctl_table raid_dir_table[] = {
+static struct ctl_table raid_dir_table[] = {
 	{
 		.procname	= "raid",
 		.maxlen		= 0,
@@ -140,7 +140,7 @@
 	{ }
 };
 
-static ctl_table raid_root_table[] = {
+static struct ctl_table raid_root_table[] = {
 	{
 		.procname	= "dev",
 		.maxlen		= 0,
@@ -562,11 +562,19 @@
 	goto retry;
 }
 
-static inline int mddev_lock(struct mddev * mddev)
+static inline int __must_check mddev_lock(struct mddev * mddev)
 {
 	return mutex_lock_interruptible(&mddev->reconfig_mutex);
 }
 
+/* Sometimes we need to take the lock in a situation where
+ * failure due to interrupts is not acceptable.
+ */
+static inline void mddev_lock_nointr(struct mddev * mddev)
+{
+	mutex_lock(&mddev->reconfig_mutex);
+}
+
 static inline int mddev_is_locked(struct mddev *mddev)
 {
 	return mutex_is_locked(&mddev->reconfig_mutex);
@@ -2978,7 +2986,7 @@
 		for_each_mddev(mddev, tmp) {
 			struct md_rdev *rdev2;
 
-			mddev_lock(mddev);
+			mddev_lock_nointr(mddev);
 			rdev_for_each(rdev2, mddev)
 				if (rdev->bdev == rdev2->bdev &&
 				    rdev != rdev2 &&
@@ -2994,7 +3002,7 @@
 				break;
 			}
 		}
-		mddev_lock(my_mddev);
+		mddev_lock_nointr(my_mddev);
 		if (overlap) {
 			/* Someone else could have slipped in a size
 			 * change here, but doing so is just silly.
@@ -3580,6 +3588,7 @@
 		mddev->in_sync = 1;
 		del_timer_sync(&mddev->safemode_timer);
 	}
+	blk_set_stacking_limits(&mddev->queue->limits);
 	pers->run(mddev);
 	set_bit(MD_CHANGE_DEVS, &mddev->flags);
 	mddev_resume(mddev);
@@ -5258,7 +5267,7 @@
 
 void md_stop_writes(struct mddev *mddev)
 {
-	mddev_lock(mddev);
+	mddev_lock_nointr(mddev);
 	__md_stop_writes(mddev);
 	mddev_unlock(mddev);
 }
@@ -5291,20 +5300,35 @@
 static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
 {
 	int err = 0;
+	int did_freeze = 0;
+
+	if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
+		did_freeze = 1;
+		set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+		md_wakeup_thread(mddev->thread);
+	}
+	if (mddev->sync_thread) {
+		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+		/* Thread might be blocked waiting for metadata update
+		 * which will now never happen */
+		wake_up_process(mddev->sync_thread->tsk);
+	}
+	mddev_unlock(mddev);
+	wait_event(resync_wait, mddev->sync_thread == NULL);
+	mddev_lock_nointr(mddev);
+
 	mutex_lock(&mddev->open_mutex);
-	if (atomic_read(&mddev->openers) > !!bdev) {
+	if (atomic_read(&mddev->openers) > !!bdev ||
+	    mddev->sync_thread ||
+	    (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
 		printk("md: %s still in use.\n",mdname(mddev));
+		if (did_freeze) {
+			clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+			md_wakeup_thread(mddev->thread);
+		}
 		err = -EBUSY;
 		goto out;
 	}
-	if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
-		/* Someone opened the device since we flushed it
-		 * so page cache could be dirty and it is too late
-		 * to flush.  So abort
-		 */
-		mutex_unlock(&mddev->open_mutex);
-		return -EBUSY;
-	}
 	if (mddev->pers) {
 		__md_stop_writes(mddev);
 
@@ -5315,7 +5339,7 @@
 		set_disk_ro(mddev->gendisk, 1);
 		clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 		sysfs_notify_dirent_safe(mddev->sysfs_state);
-		err = 0;	
+		err = 0;
 	}
 out:
 	mutex_unlock(&mddev->open_mutex);
@@ -5331,20 +5355,34 @@
 {
 	struct gendisk *disk = mddev->gendisk;
 	struct md_rdev *rdev;
+	int did_freeze = 0;
+
+	if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
+		did_freeze = 1;
+		set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+		md_wakeup_thread(mddev->thread);
+	}
+	if (mddev->sync_thread) {
+		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
+		/* Thread might be blocked waiting for metadata update
+		 * which will now never happen */
+		wake_up_process(mddev->sync_thread->tsk);
+	}
+	mddev_unlock(mddev);
+	wait_event(resync_wait, mddev->sync_thread == NULL);
+	mddev_lock_nointr(mddev);
 
 	mutex_lock(&mddev->open_mutex);
 	if (atomic_read(&mddev->openers) > !!bdev ||
-	    mddev->sysfs_active) {
+	    mddev->sysfs_active ||
+	    mddev->sync_thread ||
+	    (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
 		printk("md: %s still in use.\n",mdname(mddev));
 		mutex_unlock(&mddev->open_mutex);
-		return -EBUSY;
-	}
-	if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
-		/* Someone opened the device since we flushed it
-		 * so page cache could be dirty and it is too late
-		 * to flush.  So abort
-		 */
-		mutex_unlock(&mddev->open_mutex);
+		if (did_freeze) {
+			clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+			md_wakeup_thread(mddev->thread);
+		}
 		return -EBUSY;
 	}
 	if (mddev->pers) {
@@ -6551,7 +6589,7 @@
 				wait_event(mddev->sb_wait,
 					   !test_bit(MD_CHANGE_DEVS, &mddev->flags) &&
 					   !test_bit(MD_CHANGE_PENDING, &mddev->flags));
-				mddev_lock(mddev);
+				mddev_lock_nointr(mddev);
 			}
 		} else {
 			err = -EROFS;
@@ -7361,9 +7399,6 @@
 		mddev->curr_resync = 2;
 
 	try_again:
-		if (kthread_should_stop())
-			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-
 		if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
 			goto skip;
 		for_each_mddev(mddev2, tmp) {
@@ -7388,7 +7423,7 @@
 				 * be caught by 'softlockup'
 				 */
 				prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
-				if (!kthread_should_stop() &&
+				if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
 				    mddev2->curr_resync >= mddev->curr_resync) {
 					printk(KERN_INFO "md: delaying %s of %s"
 					       " until %s has finished (they"
@@ -7464,7 +7499,7 @@
 	last_check = 0;
 
 	if (j>2) {
-		printk(KERN_INFO 
+		printk(KERN_INFO
 		       "md: resuming %s of %s from checkpoint.\n",
 		       desc, mdname(mddev));
 		mddev->curr_resync = j;
@@ -7501,7 +7536,8 @@
 			sysfs_notify(&mddev->kobj, NULL, "sync_completed");
 		}
 
-		while (j >= mddev->resync_max && !kthread_should_stop()) {
+		while (j >= mddev->resync_max &&
+		       !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
 			/* As this condition is controlled by user-space,
 			 * we can block indefinitely, so use '_interruptible'
 			 * to avoid triggering warnings.
@@ -7509,17 +7545,18 @@
 			flush_signals(current); /* just in case */
 			wait_event_interruptible(mddev->recovery_wait,
 						 mddev->resync_max > j
-						 || kthread_should_stop());
+						 || test_bit(MD_RECOVERY_INTR,
+							     &mddev->recovery));
 		}
 
-		if (kthread_should_stop())
-			goto interrupted;
+		if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+			break;
 
 		sectors = mddev->pers->sync_request(mddev, j, &skipped,
 						  currspeed < speed_min(mddev));
 		if (sectors == 0) {
 			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-			goto out;
+			break;
 		}
 
 		if (!skipped) { /* actual IO requested */
@@ -7556,10 +7593,8 @@
 			last_mark = next;
 		}
 
-
-		if (kthread_should_stop())
-			goto interrupted;
-
+		if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+			break;
 
 		/*
 		 * this loop exits only if either when we are slower than
@@ -7582,11 +7617,12 @@
 			}
 		}
 	}
-	printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc);
+	printk(KERN_INFO "md: %s: %s %s.\n",mdname(mddev), desc,
+	       test_bit(MD_RECOVERY_INTR, &mddev->recovery)
+	       ? "interrupted" : "done");
 	/*
 	 * this also signals 'finished resyncing' to md_stop
 	 */
- out:
 	blk_finish_plug(&plug);
 	wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
 
@@ -7640,16 +7676,6 @@
 	set_bit(MD_RECOVERY_DONE, &mddev->recovery);
 	md_wakeup_thread(mddev->thread);
 	return;
-
- interrupted:
-	/*
-	 * got a signal, exit.
-	 */
-	printk(KERN_INFO
-	       "md: md_do_sync() got signal ... exiting\n");
-	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
-	goto out;
-
 }
 EXPORT_SYMBOL_GPL(md_do_sync);
 
@@ -7894,6 +7920,7 @@
 
 	/* resync has finished, collect result */
 	md_unregister_thread(&mddev->sync_thread);
+	wake_up(&resync_wait);
 	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
 	    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
 		/* success...*/
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index af6681b1..1e5a540 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -66,7 +66,8 @@
  */
 static int max_queued_requests = 1024;
 
-static void allow_barrier(struct r1conf *conf);
+static void allow_barrier(struct r1conf *conf, sector_t start_next_window,
+			  sector_t bi_sector);
 static void lower_barrier(struct r1conf *conf);
 
 static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
@@ -84,10 +85,12 @@
 }
 
 #define RESYNC_BLOCK_SIZE (64*1024)
-//#define RESYNC_BLOCK_SIZE PAGE_SIZE
+#define RESYNC_DEPTH 32
 #define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9)
 #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
-#define RESYNC_WINDOW (2048*1024)
+#define RESYNC_WINDOW (RESYNC_BLOCK_SIZE * RESYNC_DEPTH)
+#define RESYNC_WINDOW_SECTORS (RESYNC_WINDOW >> 9)
+#define NEXT_NORMALIO_DISTANCE (3 * RESYNC_WINDOW_SECTORS)
 
 static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
 {
@@ -225,6 +228,8 @@
 	struct bio *bio = r1_bio->master_bio;
 	int done;
 	struct r1conf *conf = r1_bio->mddev->private;
+	sector_t start_next_window = r1_bio->start_next_window;
+	sector_t bi_sector = bio->bi_sector;
 
 	if (bio->bi_phys_segments) {
 		unsigned long flags;
@@ -232,6 +237,11 @@
 		bio->bi_phys_segments--;
 		done = (bio->bi_phys_segments == 0);
 		spin_unlock_irqrestore(&conf->device_lock, flags);
+		/*
+		 * make_request() might be waiting for
+		 * bi_phys_segments to decrease
+		 */
+		wake_up(&conf->wait_barrier);
 	} else
 		done = 1;
 
@@ -243,7 +253,7 @@
 		 * Wake up any possible resync thread that waits for the device
 		 * to go idle.
 		 */
-		allow_barrier(conf);
+		allow_barrier(conf, start_next_window, bi_sector);
 	}
 }
 
@@ -814,8 +824,6 @@
  *    there is no normal IO happeing.  It must arrange to call
  *    lower_barrier when the particular background IO completes.
  */
-#define RESYNC_DEPTH 32
-
 static void raise_barrier(struct r1conf *conf)
 {
 	spin_lock_irq(&conf->resync_lock);
@@ -827,9 +835,19 @@
 	/* block any new IO from starting */
 	conf->barrier++;
 
-	/* Now wait for all pending IO to complete */
+	/* For these conditions we must wait:
+	 * A: while the array is in frozen state
+	 * B: while barrier >= RESYNC_DEPTH, meaning resync reach
+	 *    the max count which allowed.
+	 * C: next_resync + RESYNC_SECTORS > start_next_window, meaning
+	 *    next resync will reach to the window which normal bios are
+	 *    handling.
+	 */
 	wait_event_lock_irq(conf->wait_barrier,
-			    !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
+			    !conf->array_frozen &&
+			    conf->barrier < RESYNC_DEPTH &&
+			    (conf->start_next_window >=
+			     conf->next_resync + RESYNC_SECTORS),
 			    conf->resync_lock);
 
 	spin_unlock_irq(&conf->resync_lock);
@@ -845,10 +863,33 @@
 	wake_up(&conf->wait_barrier);
 }
 
-static void wait_barrier(struct r1conf *conf)
+static bool need_to_wait_for_sync(struct r1conf *conf, struct bio *bio)
 {
+	bool wait = false;
+
+	if (conf->array_frozen || !bio)
+		wait = true;
+	else if (conf->barrier && bio_data_dir(bio) == WRITE) {
+		if (conf->next_resync < RESYNC_WINDOW_SECTORS)
+			wait = true;
+		else if ((conf->next_resync - RESYNC_WINDOW_SECTORS
+				>= bio_end_sector(bio)) ||
+			 (conf->next_resync + NEXT_NORMALIO_DISTANCE
+				<= bio->bi_sector))
+			wait = false;
+		else
+			wait = true;
+	}
+
+	return wait;
+}
+
+static sector_t wait_barrier(struct r1conf *conf, struct bio *bio)
+{
+	sector_t sector = 0;
+
 	spin_lock_irq(&conf->resync_lock);
-	if (conf->barrier) {
+	if (need_to_wait_for_sync(conf, bio)) {
 		conf->nr_waiting++;
 		/* Wait for the barrier to drop.
 		 * However if there are already pending
@@ -860,22 +901,67 @@
 		 * count down.
 		 */
 		wait_event_lock_irq(conf->wait_barrier,
-				    !conf->barrier ||
-				    (conf->nr_pending &&
+				    !conf->array_frozen &&
+				    (!conf->barrier ||
+				    ((conf->start_next_window <
+				      conf->next_resync + RESYNC_SECTORS) &&
 				     current->bio_list &&
-				     !bio_list_empty(current->bio_list)),
+				     !bio_list_empty(current->bio_list))),
 				    conf->resync_lock);
 		conf->nr_waiting--;
 	}
+
+	if (bio && bio_data_dir(bio) == WRITE) {
+		if (conf->next_resync + NEXT_NORMALIO_DISTANCE
+		    <= bio->bi_sector) {
+			if (conf->start_next_window == MaxSector)
+				conf->start_next_window =
+					conf->next_resync +
+					NEXT_NORMALIO_DISTANCE;
+
+			if ((conf->start_next_window + NEXT_NORMALIO_DISTANCE)
+			    <= bio->bi_sector)
+				conf->next_window_requests++;
+			else
+				conf->current_window_requests++;
+		}
+		if (bio->bi_sector >= conf->start_next_window)
+			sector = conf->start_next_window;
+	}
+
 	conf->nr_pending++;
 	spin_unlock_irq(&conf->resync_lock);
+	return sector;
 }
 
-static void allow_barrier(struct r1conf *conf)
+static void allow_barrier(struct r1conf *conf, sector_t start_next_window,
+			  sector_t bi_sector)
 {
 	unsigned long flags;
+
 	spin_lock_irqsave(&conf->resync_lock, flags);
 	conf->nr_pending--;
+	if (start_next_window) {
+		if (start_next_window == conf->start_next_window) {
+			if (conf->start_next_window + NEXT_NORMALIO_DISTANCE
+			    <= bi_sector)
+				conf->next_window_requests--;
+			else
+				conf->current_window_requests--;
+		} else
+			conf->current_window_requests--;
+
+		if (!conf->current_window_requests) {
+			if (conf->next_window_requests) {
+				conf->current_window_requests =
+					conf->next_window_requests;
+				conf->next_window_requests = 0;
+				conf->start_next_window +=
+					NEXT_NORMALIO_DISTANCE;
+			} else
+				conf->start_next_window = MaxSector;
+		}
+	}
 	spin_unlock_irqrestore(&conf->resync_lock, flags);
 	wake_up(&conf->wait_barrier);
 }
@@ -884,8 +970,7 @@
 {
 	/* stop syncio and normal IO and wait for everything to
 	 * go quite.
-	 * We increment barrier and nr_waiting, and then
-	 * wait until nr_pending match nr_queued+extra
+	 * We wait until nr_pending match nr_queued+extra
 	 * This is called in the context of one normal IO request
 	 * that has failed. Thus any sync request that might be pending
 	 * will be blocked by nr_pending, and we need to wait for
@@ -895,8 +980,7 @@
 	 * we continue.
 	 */
 	spin_lock_irq(&conf->resync_lock);
-	conf->barrier++;
-	conf->nr_waiting++;
+	conf->array_frozen = 1;
 	wait_event_lock_irq_cmd(conf->wait_barrier,
 				conf->nr_pending == conf->nr_queued+extra,
 				conf->resync_lock,
@@ -907,8 +991,7 @@
 {
 	/* reverse the effect of the freeze */
 	spin_lock_irq(&conf->resync_lock);
-	conf->barrier--;
-	conf->nr_waiting--;
+	conf->array_frozen = 0;
 	wake_up(&conf->wait_barrier);
 	spin_unlock_irq(&conf->resync_lock);
 }
@@ -1013,6 +1096,7 @@
 	int first_clone;
 	int sectors_handled;
 	int max_sectors;
+	sector_t start_next_window;
 
 	/*
 	 * Register the new request and wait if the reconstruction
@@ -1042,7 +1126,7 @@
 		finish_wait(&conf->wait_barrier, &w);
 	}
 
-	wait_barrier(conf);
+	start_next_window = wait_barrier(conf, bio);
 
 	bitmap = mddev->bitmap;
 
@@ -1163,6 +1247,7 @@
 
 	disks = conf->raid_disks * 2;
  retry_write:
+	r1_bio->start_next_window = start_next_window;
 	blocked_rdev = NULL;
 	rcu_read_lock();
 	max_sectors = r1_bio->sectors;
@@ -1231,14 +1316,24 @@
 	if (unlikely(blocked_rdev)) {
 		/* Wait for this device to become unblocked */
 		int j;
+		sector_t old = start_next_window;
 
 		for (j = 0; j < i; j++)
 			if (r1_bio->bios[j])
 				rdev_dec_pending(conf->mirrors[j].rdev, mddev);
 		r1_bio->state = 0;
-		allow_barrier(conf);
+		allow_barrier(conf, start_next_window, bio->bi_sector);
 		md_wait_for_blocked_rdev(blocked_rdev, mddev);
-		wait_barrier(conf);
+		start_next_window = wait_barrier(conf, bio);
+		/*
+		 * We must make sure the multi r1bios of bio have
+		 * the same value of bi_phys_segments
+		 */
+		if (bio->bi_phys_segments && old &&
+		    old != start_next_window)
+			/* Wait for the former r1bio(s) to complete */
+			wait_event(conf->wait_barrier,
+				   bio->bi_phys_segments == 1);
 		goto retry_write;
 	}
 
@@ -1438,11 +1533,14 @@
 
 static void close_sync(struct r1conf *conf)
 {
-	wait_barrier(conf);
-	allow_barrier(conf);
+	wait_barrier(conf, NULL);
+	allow_barrier(conf, 0, 0);
 
 	mempool_destroy(conf->r1buf_pool);
 	conf->r1buf_pool = NULL;
+
+	conf->next_resync = 0;
+	conf->start_next_window = MaxSector;
 }
 
 static int raid1_spare_active(struct mddev *mddev)
@@ -2714,6 +2812,9 @@
 	conf->pending_count = 0;
 	conf->recovery_disabled = mddev->recovery_disabled - 1;
 
+	conf->start_next_window = MaxSector;
+	conf->current_window_requests = conf->next_window_requests = 0;
+
 	err = -EIO;
 	for (i = 0; i < conf->raid_disks * 2; i++) {
 
@@ -2871,8 +2972,8 @@
 			   atomic_read(&bitmap->behind_writes) == 0);
 	}
 
-	raise_barrier(conf);
-	lower_barrier(conf);
+	freeze_array(conf, 0);
+	unfreeze_array(conf);
 
 	md_unregister_thread(&mddev->thread);
 	if (conf->r1bio_pool)
@@ -3031,10 +3132,10 @@
 		wake_up(&conf->wait_barrier);
 		break;
 	case 1:
-		raise_barrier(conf);
+		freeze_array(conf, 0);
 		break;
 	case 0:
-		lower_barrier(conf);
+		unfreeze_array(conf);
 		break;
 	}
 }
@@ -3051,7 +3152,8 @@
 		mddev->new_chunk_sectors = 0;
 		conf = setup_conf(mddev);
 		if (!IS_ERR(conf))
-			conf->barrier = 1;
+			/* Array must appear to be quiesced */
+			conf->array_frozen = 1;
 		return conf;
 	}
 	return ERR_PTR(-EINVAL);
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index 0ff3715..9bebca7 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -41,6 +41,19 @@
 	 */
 	sector_t		next_resync;
 
+	/* When raid1 starts resync, we divide array into four partitions
+	 * |---------|--------------|---------------------|-------------|
+	 *        next_resync   start_next_window       end_window
+	 * start_next_window = next_resync + NEXT_NORMALIO_DISTANCE
+	 * end_window = start_next_window + NEXT_NORMALIO_DISTANCE
+	 * current_window_requests means the count of normalIO between
+	 *   start_next_window and end_window.
+	 * next_window_requests means the count of normalIO after end_window.
+	 * */
+	sector_t		start_next_window;
+	int			current_window_requests;
+	int			next_window_requests;
+
 	spinlock_t		device_lock;
 
 	/* list of 'struct r1bio' that need to be processed by raid1d,
@@ -65,6 +78,7 @@
 	int			nr_waiting;
 	int			nr_queued;
 	int			barrier;
+	int			array_frozen;
 
 	/* Set to 1 if a full sync is needed, (fresh device added).
 	 * Cleared when a sync completes.
@@ -111,6 +125,7 @@
 						 * in this BehindIO request
 						 */
 	sector_t		sector;
+	sector_t		start_next_window;
 	int			sectors;
 	unsigned long		state;
 	struct mddev		*mddev;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 7c3508a..c504e83 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -4384,7 +4384,11 @@
 		set_bit(MD_CHANGE_DEVS, &mddev->flags);
 		md_wakeup_thread(mddev->thread);
 		wait_event(mddev->sb_wait, mddev->flags == 0 ||
-			   kthread_should_stop());
+			   test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+		if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
+			allow_barrier(conf);
+			return sectors_done;
+		}
 		conf->reshape_safe = mddev->reshape_position;
 		allow_barrier(conf);
 	}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7f0e17a..47da0af 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -85,6 +85,42 @@
 	return &conf->stripe_hashtbl[hash];
 }
 
+static inline int stripe_hash_locks_hash(sector_t sect)
+{
+	return (sect >> STRIPE_SHIFT) & STRIPE_HASH_LOCKS_MASK;
+}
+
+static inline void lock_device_hash_lock(struct r5conf *conf, int hash)
+{
+	spin_lock_irq(conf->hash_locks + hash);
+	spin_lock(&conf->device_lock);
+}
+
+static inline void unlock_device_hash_lock(struct r5conf *conf, int hash)
+{
+	spin_unlock(&conf->device_lock);
+	spin_unlock_irq(conf->hash_locks + hash);
+}
+
+static inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
+{
+	int i;
+	local_irq_disable();
+	spin_lock(conf->hash_locks);
+	for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++)
+		spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks);
+	spin_lock(&conf->device_lock);
+}
+
+static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf)
+{
+	int i;
+	spin_unlock(&conf->device_lock);
+	for (i = NR_STRIPE_HASH_LOCKS; i; i--)
+		spin_unlock(conf->hash_locks + i - 1);
+	local_irq_enable();
+}
+
 /* bio's attached to a stripe+device for I/O are linked together in bi_sector
  * order without overlap.  There may be several bio's per stripe+device, and
  * a bio could span several devices.
@@ -249,7 +285,8 @@
 	}
 }
 
-static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh)
+static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
+			      struct list_head *temp_inactive_list)
 {
 	BUG_ON(!list_empty(&sh->lru));
 	BUG_ON(atomic_read(&conf->active_stripes)==0);
@@ -278,23 +315,68 @@
 			    < IO_THRESHOLD)
 				md_wakeup_thread(conf->mddev->thread);
 		atomic_dec(&conf->active_stripes);
-		if (!test_bit(STRIPE_EXPANDING, &sh->state)) {
-			list_add_tail(&sh->lru, &conf->inactive_list);
-			wake_up(&conf->wait_for_stripe);
-			if (conf->retry_read_aligned)
-				md_wakeup_thread(conf->mddev->thread);
-		}
+		if (!test_bit(STRIPE_EXPANDING, &sh->state))
+			list_add_tail(&sh->lru, temp_inactive_list);
 	}
 }
 
-static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
+static void __release_stripe(struct r5conf *conf, struct stripe_head *sh,
+			     struct list_head *temp_inactive_list)
 {
 	if (atomic_dec_and_test(&sh->count))
-		do_release_stripe(conf, sh);
+		do_release_stripe(conf, sh, temp_inactive_list);
+}
+
+/*
+ * @hash could be NR_STRIPE_HASH_LOCKS, then we have a list of inactive_list
+ *
+ * Be careful: Only one task can add/delete stripes from temp_inactive_list at
+ * given time. Adding stripes only takes device lock, while deleting stripes
+ * only takes hash lock.
+ */
+static void release_inactive_stripe_list(struct r5conf *conf,
+					 struct list_head *temp_inactive_list,
+					 int hash)
+{
+	int size;
+	bool do_wakeup = false;
+	unsigned long flags;
+
+	if (hash == NR_STRIPE_HASH_LOCKS) {
+		size = NR_STRIPE_HASH_LOCKS;
+		hash = NR_STRIPE_HASH_LOCKS - 1;
+	} else
+		size = 1;
+	while (size) {
+		struct list_head *list = &temp_inactive_list[size - 1];
+
+		/*
+		 * We don't hold any lock here yet, get_active_stripe() might
+		 * remove stripes from the list
+		 */
+		if (!list_empty_careful(list)) {
+			spin_lock_irqsave(conf->hash_locks + hash, flags);
+			if (list_empty(conf->inactive_list + hash) &&
+			    !list_empty(list))
+				atomic_dec(&conf->empty_inactive_list_nr);
+			list_splice_tail_init(list, conf->inactive_list + hash);
+			do_wakeup = true;
+			spin_unlock_irqrestore(conf->hash_locks + hash, flags);
+		}
+		size--;
+		hash--;
+	}
+
+	if (do_wakeup) {
+		wake_up(&conf->wait_for_stripe);
+		if (conf->retry_read_aligned)
+			md_wakeup_thread(conf->mddev->thread);
+	}
 }
 
 /* should hold conf->device_lock already */
-static int release_stripe_list(struct r5conf *conf)
+static int release_stripe_list(struct r5conf *conf,
+			       struct list_head *temp_inactive_list)
 {
 	struct stripe_head *sh;
 	int count = 0;
@@ -303,6 +385,8 @@
 	head = llist_del_all(&conf->released_stripes);
 	head = llist_reverse_order(head);
 	while (head) {
+		int hash;
+
 		sh = llist_entry(head, struct stripe_head, release_list);
 		head = llist_next(head);
 		/* sh could be readded after STRIPE_ON_RELEASE_LIST is cleard */
@@ -313,7 +397,8 @@
 		 * again, the count is always > 1. This is true for
 		 * STRIPE_ON_UNPLUG_LIST bit too.
 		 */
-		__release_stripe(conf, sh);
+		hash = sh->hash_lock_index;
+		__release_stripe(conf, sh, &temp_inactive_list[hash]);
 		count++;
 	}
 
@@ -324,9 +409,12 @@
 {
 	struct r5conf *conf = sh->raid_conf;
 	unsigned long flags;
+	struct list_head list;
+	int hash;
 	bool wakeup;
 
-	if (test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state))
+	if (unlikely(!conf->mddev->thread) ||
+		test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state))
 		goto slow_path;
 	wakeup = llist_add(&sh->release_list, &conf->released_stripes);
 	if (wakeup)
@@ -336,8 +424,11 @@
 	local_irq_save(flags);
 	/* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */
 	if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) {
-		do_release_stripe(conf, sh);
+		INIT_LIST_HEAD(&list);
+		hash = sh->hash_lock_index;
+		do_release_stripe(conf, sh, &list);
 		spin_unlock(&conf->device_lock);
+		release_inactive_stripe_list(conf, &list, hash);
 	}
 	local_irq_restore(flags);
 }
@@ -362,18 +453,21 @@
 
 
 /* find an idle stripe, make sure it is unhashed, and return it. */
-static struct stripe_head *get_free_stripe(struct r5conf *conf)
+static struct stripe_head *get_free_stripe(struct r5conf *conf, int hash)
 {
 	struct stripe_head *sh = NULL;
 	struct list_head *first;
 
-	if (list_empty(&conf->inactive_list))
+	if (list_empty(conf->inactive_list + hash))
 		goto out;
-	first = conf->inactive_list.next;
+	first = (conf->inactive_list + hash)->next;
 	sh = list_entry(first, struct stripe_head, lru);
 	list_del_init(first);
 	remove_hash(sh);
 	atomic_inc(&conf->active_stripes);
+	BUG_ON(hash != sh->hash_lock_index);
+	if (list_empty(conf->inactive_list + hash))
+		atomic_inc(&conf->empty_inactive_list_nr);
 out:
 	return sh;
 }
@@ -416,7 +510,7 @@
 static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
 {
 	struct r5conf *conf = sh->raid_conf;
-	int i;
+	int i, seq;
 
 	BUG_ON(atomic_read(&sh->count) != 0);
 	BUG_ON(test_bit(STRIPE_HANDLE, &sh->state));
@@ -426,7 +520,8 @@
 		(unsigned long long)sh->sector);
 
 	remove_hash(sh);
-
+retry:
+	seq = read_seqcount_begin(&conf->gen_lock);
 	sh->generation = conf->generation - previous;
 	sh->disks = previous ? conf->previous_raid_disks : conf->raid_disks;
 	sh->sector = sector;
@@ -448,6 +543,8 @@
 		dev->flags = 0;
 		raid5_build_block(sh, i, previous);
 	}
+	if (read_seqcount_retry(&conf->gen_lock, seq))
+		goto retry;
 	insert_hash(conf, sh);
 	sh->cpu = smp_processor_id();
 }
@@ -552,29 +649,31 @@
 		  int previous, int noblock, int noquiesce)
 {
 	struct stripe_head *sh;
+	int hash = stripe_hash_locks_hash(sector);
 
 	pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
 
-	spin_lock_irq(&conf->device_lock);
+	spin_lock_irq(conf->hash_locks + hash);
 
 	do {
 		wait_event_lock_irq(conf->wait_for_stripe,
 				    conf->quiesce == 0 || noquiesce,
-				    conf->device_lock);
+				    *(conf->hash_locks + hash));
 		sh = __find_stripe(conf, sector, conf->generation - previous);
 		if (!sh) {
 			if (!conf->inactive_blocked)
-				sh = get_free_stripe(conf);
+				sh = get_free_stripe(conf, hash);
 			if (noblock && sh == NULL)
 				break;
 			if (!sh) {
 				conf->inactive_blocked = 1;
-				wait_event_lock_irq(conf->wait_for_stripe,
-						    !list_empty(&conf->inactive_list) &&
-						    (atomic_read(&conf->active_stripes)
-						     < (conf->max_nr_stripes *3/4)
-						     || !conf->inactive_blocked),
-						    conf->device_lock);
+				wait_event_lock_irq(
+					conf->wait_for_stripe,
+					!list_empty(conf->inactive_list + hash) &&
+					(atomic_read(&conf->active_stripes)
+					 < (conf->max_nr_stripes * 3 / 4)
+					 || !conf->inactive_blocked),
+					*(conf->hash_locks + hash));
 				conf->inactive_blocked = 0;
 			} else
 				init_stripe(sh, sector, previous);
@@ -585,9 +684,11 @@
 				    && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)
 				    && !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state));
 			} else {
+				spin_lock(&conf->device_lock);
 				if (!test_bit(STRIPE_HANDLE, &sh->state))
 					atomic_inc(&conf->active_stripes);
 				if (list_empty(&sh->lru) &&
+				    !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state) &&
 				    !test_bit(STRIPE_EXPANDING, &sh->state))
 					BUG();
 				list_del_init(&sh->lru);
@@ -595,6 +696,7 @@
 					sh->group->stripes_cnt--;
 					sh->group = NULL;
 				}
+				spin_unlock(&conf->device_lock);
 			}
 		}
 	} while (sh == NULL);
@@ -602,7 +704,7 @@
 	if (sh)
 		atomic_inc(&sh->count);
 
-	spin_unlock_irq(&conf->device_lock);
+	spin_unlock_irq(conf->hash_locks + hash);
 	return sh;
 }
 
@@ -758,7 +860,7 @@
 				bi->bi_sector = (sh->sector
 						 + rdev->data_offset);
 			if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
-				bi->bi_rw |= REQ_FLUSH;
+				bi->bi_rw |= REQ_NOMERGE;
 
 			bi->bi_vcnt = 1;
 			bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
@@ -1582,7 +1684,7 @@
 	put_cpu();
 }
 
-static int grow_one_stripe(struct r5conf *conf)
+static int grow_one_stripe(struct r5conf *conf, int hash)
 {
 	struct stripe_head *sh;
 	sh = kmem_cache_zalloc(conf->slab_cache, GFP_KERNEL);
@@ -1598,6 +1700,7 @@
 		kmem_cache_free(conf->slab_cache, sh);
 		return 0;
 	}
+	sh->hash_lock_index = hash;
 	/* we just created an active stripe so... */
 	atomic_set(&sh->count, 1);
 	atomic_inc(&conf->active_stripes);
@@ -1610,6 +1713,7 @@
 {
 	struct kmem_cache *sc;
 	int devs = max(conf->raid_disks, conf->previous_raid_disks);
+	int hash;
 
 	if (conf->mddev->gendisk)
 		sprintf(conf->cache_name[0],
@@ -1627,9 +1731,13 @@
 		return 1;
 	conf->slab_cache = sc;
 	conf->pool_size = devs;
-	while (num--)
-		if (!grow_one_stripe(conf))
+	hash = conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
+	while (num--) {
+		if (!grow_one_stripe(conf, hash))
 			return 1;
+		conf->max_nr_stripes++;
+		hash = (hash + 1) % NR_STRIPE_HASH_LOCKS;
+	}
 	return 0;
 }
 
@@ -1687,6 +1795,7 @@
 	int err;
 	struct kmem_cache *sc;
 	int i;
+	int hash, cnt;
 
 	if (newsize <= conf->pool_size)
 		return 0; /* never bother to shrink */
@@ -1726,19 +1835,29 @@
 	 * OK, we have enough stripes, start collecting inactive
 	 * stripes and copying them over
 	 */
+	hash = 0;
+	cnt = 0;
 	list_for_each_entry(nsh, &newstripes, lru) {
-		spin_lock_irq(&conf->device_lock);
-		wait_event_lock_irq(conf->wait_for_stripe,
-				    !list_empty(&conf->inactive_list),
-				    conf->device_lock);
-		osh = get_free_stripe(conf);
-		spin_unlock_irq(&conf->device_lock);
+		lock_device_hash_lock(conf, hash);
+		wait_event_cmd(conf->wait_for_stripe,
+				    !list_empty(conf->inactive_list + hash),
+				    unlock_device_hash_lock(conf, hash),
+				    lock_device_hash_lock(conf, hash));
+		osh = get_free_stripe(conf, hash);
+		unlock_device_hash_lock(conf, hash);
 		atomic_set(&nsh->count, 1);
 		for(i=0; i<conf->pool_size; i++)
 			nsh->dev[i].page = osh->dev[i].page;
 		for( ; i<newsize; i++)
 			nsh->dev[i].page = NULL;
+		nsh->hash_lock_index = hash;
 		kmem_cache_free(conf->slab_cache, osh);
+		cnt++;
+		if (cnt >= conf->max_nr_stripes / NR_STRIPE_HASH_LOCKS +
+		    !!((conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS) > hash)) {
+			hash++;
+			cnt = 0;
+		}
 	}
 	kmem_cache_destroy(conf->slab_cache);
 
@@ -1797,13 +1916,13 @@
 	return err;
 }
 
-static int drop_one_stripe(struct r5conf *conf)
+static int drop_one_stripe(struct r5conf *conf, int hash)
 {
 	struct stripe_head *sh;
 
-	spin_lock_irq(&conf->device_lock);
-	sh = get_free_stripe(conf);
-	spin_unlock_irq(&conf->device_lock);
+	spin_lock_irq(conf->hash_locks + hash);
+	sh = get_free_stripe(conf, hash);
+	spin_unlock_irq(conf->hash_locks + hash);
 	if (!sh)
 		return 0;
 	BUG_ON(atomic_read(&sh->count));
@@ -1815,8 +1934,10 @@
 
 static void shrink_stripes(struct r5conf *conf)
 {
-	while (drop_one_stripe(conf))
-		;
+	int hash;
+	for (hash = 0; hash < NR_STRIPE_HASH_LOCKS; hash++)
+		while (drop_one_stripe(conf, hash))
+			;
 
 	if (conf->slab_cache)
 		kmem_cache_destroy(conf->slab_cache);
@@ -1921,6 +2042,9 @@
 			       mdname(conf->mddev), bdn);
 		else
 			retry = 1;
+		if (set_bad && test_bit(In_sync, &rdev->flags)
+		    && !test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
+			retry = 1;
 		if (retry)
 			if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) {
 				set_bit(R5_ReadError, &sh->dev[i].flags);
@@ -3900,7 +4024,8 @@
 	}
 }
 
-static void activate_bit_delay(struct r5conf *conf)
+static void activate_bit_delay(struct r5conf *conf,
+	struct list_head *temp_inactive_list)
 {
 	/* device_lock is held */
 	struct list_head head;
@@ -3908,9 +4033,11 @@
 	list_del_init(&conf->bitmap_list);
 	while (!list_empty(&head)) {
 		struct stripe_head *sh = list_entry(head.next, struct stripe_head, lru);
+		int hash;
 		list_del_init(&sh->lru);
 		atomic_inc(&sh->count);
-		__release_stripe(conf, sh);
+		hash = sh->hash_lock_index;
+		__release_stripe(conf, sh, &temp_inactive_list[hash]);
 	}
 }
 
@@ -3926,7 +4053,7 @@
 		return 1;
 	if (conf->quiesce)
 		return 1;
-	if (list_empty_careful(&conf->inactive_list))
+	if (atomic_read(&conf->empty_inactive_list_nr))
 		return 1;
 
 	return 0;
@@ -4256,6 +4383,7 @@
 struct raid5_plug_cb {
 	struct blk_plug_cb	cb;
 	struct list_head	list;
+	struct list_head	temp_inactive_list[NR_STRIPE_HASH_LOCKS];
 };
 
 static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
@@ -4266,6 +4394,7 @@
 	struct mddev *mddev = cb->cb.data;
 	struct r5conf *conf = mddev->private;
 	int cnt = 0;
+	int hash;
 
 	if (cb->list.next && !list_empty(&cb->list)) {
 		spin_lock_irq(&conf->device_lock);
@@ -4283,11 +4412,14 @@
 			 * STRIPE_ON_RELEASE_LIST could be set here. In that
 			 * case, the count is always > 1 here
 			 */
-			__release_stripe(conf, sh);
+			hash = sh->hash_lock_index;
+			__release_stripe(conf, sh, &cb->temp_inactive_list[hash]);
 			cnt++;
 		}
 		spin_unlock_irq(&conf->device_lock);
 	}
+	release_inactive_stripe_list(conf, cb->temp_inactive_list,
+				     NR_STRIPE_HASH_LOCKS);
 	if (mddev->queue)
 		trace_block_unplug(mddev->queue, cnt, !from_schedule);
 	kfree(cb);
@@ -4308,8 +4440,12 @@
 
 	cb = container_of(blk_cb, struct raid5_plug_cb, cb);
 
-	if (cb->list.next == NULL)
+	if (cb->list.next == NULL) {
+		int i;
 		INIT_LIST_HEAD(&cb->list);
+		for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++)
+			INIT_LIST_HEAD(cb->temp_inactive_list + i);
+	}
 
 	if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state))
 		list_add_tail(&sh->lru, &cb->list);
@@ -4692,14 +4828,19 @@
 	    time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
 		/* Cannot proceed until we've updated the superblock... */
 		wait_event(conf->wait_for_overlap,
-			   atomic_read(&conf->reshape_stripes)==0);
+			   atomic_read(&conf->reshape_stripes)==0
+			   || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+		if (atomic_read(&conf->reshape_stripes) != 0)
+			return 0;
 		mddev->reshape_position = conf->reshape_progress;
 		mddev->curr_resync_completed = sector_nr;
 		conf->reshape_checkpoint = jiffies;
 		set_bit(MD_CHANGE_DEVS, &mddev->flags);
 		md_wakeup_thread(mddev->thread);
 		wait_event(mddev->sb_wait, mddev->flags == 0 ||
-			   kthread_should_stop());
+			   test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+		if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+			return 0;
 		spin_lock_irq(&conf->device_lock);
 		conf->reshape_safe = mddev->reshape_position;
 		spin_unlock_irq(&conf->device_lock);
@@ -4782,7 +4923,10 @@
 	    >= mddev->resync_max - mddev->curr_resync_completed) {
 		/* Cannot proceed until we've updated the superblock... */
 		wait_event(conf->wait_for_overlap,
-			   atomic_read(&conf->reshape_stripes) == 0);
+			   atomic_read(&conf->reshape_stripes) == 0
+			   || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+		if (atomic_read(&conf->reshape_stripes) != 0)
+			goto ret;
 		mddev->reshape_position = conf->reshape_progress;
 		mddev->curr_resync_completed = sector_nr;
 		conf->reshape_checkpoint = jiffies;
@@ -4790,13 +4934,16 @@
 		md_wakeup_thread(mddev->thread);
 		wait_event(mddev->sb_wait,
 			   !test_bit(MD_CHANGE_DEVS, &mddev->flags)
-			   || kthread_should_stop());
+			   || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
+		if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+			goto ret;
 		spin_lock_irq(&conf->device_lock);
 		conf->reshape_safe = mddev->reshape_position;
 		spin_unlock_irq(&conf->device_lock);
 		wake_up(&conf->wait_for_overlap);
 		sysfs_notify(&mddev->kobj, NULL, "sync_completed");
 	}
+ret:
 	return reshape_sectors;
 }
 
@@ -4954,27 +5101,45 @@
 }
 
 static int handle_active_stripes(struct r5conf *conf, int group,
-				 struct r5worker *worker)
+				 struct r5worker *worker,
+				 struct list_head *temp_inactive_list)
 {
 	struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
-	int i, batch_size = 0;
+	int i, batch_size = 0, hash;
+	bool release_inactive = false;
 
 	while (batch_size < MAX_STRIPE_BATCH &&
 			(sh = __get_priority_stripe(conf, group)) != NULL)
 		batch[batch_size++] = sh;
 
-	if (batch_size == 0)
-		return batch_size;
+	if (batch_size == 0) {
+		for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++)
+			if (!list_empty(temp_inactive_list + i))
+				break;
+		if (i == NR_STRIPE_HASH_LOCKS)
+			return batch_size;
+		release_inactive = true;
+	}
 	spin_unlock_irq(&conf->device_lock);
 
+	release_inactive_stripe_list(conf, temp_inactive_list,
+				     NR_STRIPE_HASH_LOCKS);
+
+	if (release_inactive) {
+		spin_lock_irq(&conf->device_lock);
+		return 0;
+	}
+
 	for (i = 0; i < batch_size; i++)
 		handle_stripe(batch[i]);
 
 	cond_resched();
 
 	spin_lock_irq(&conf->device_lock);
-	for (i = 0; i < batch_size; i++)
-		__release_stripe(conf, batch[i]);
+	for (i = 0; i < batch_size; i++) {
+		hash = batch[i]->hash_lock_index;
+		__release_stripe(conf, batch[i], &temp_inactive_list[hash]);
+	}
 	return batch_size;
 }
 
@@ -4995,9 +5160,10 @@
 	while (1) {
 		int batch_size, released;
 
-		released = release_stripe_list(conf);
+		released = release_stripe_list(conf, worker->temp_inactive_list);
 
-		batch_size = handle_active_stripes(conf, group_id, worker);
+		batch_size = handle_active_stripes(conf, group_id, worker,
+						   worker->temp_inactive_list);
 		worker->working = false;
 		if (!batch_size && !released)
 			break;
@@ -5036,7 +5202,7 @@
 		struct bio *bio;
 		int batch_size, released;
 
-		released = release_stripe_list(conf);
+		released = release_stripe_list(conf, conf->temp_inactive_list);
 
 		if (
 		    !list_empty(&conf->bitmap_list)) {
@@ -5046,7 +5212,7 @@
 			bitmap_unplug(mddev->bitmap);
 			spin_lock_irq(&conf->device_lock);
 			conf->seq_write = conf->seq_flush;
-			activate_bit_delay(conf);
+			activate_bit_delay(conf, conf->temp_inactive_list);
 		}
 		raid5_activate_delayed(conf);
 
@@ -5060,7 +5226,8 @@
 			handled++;
 		}
 
-		batch_size = handle_active_stripes(conf, ANY_GROUP, NULL);
+		batch_size = handle_active_stripes(conf, ANY_GROUP, NULL,
+						   conf->temp_inactive_list);
 		if (!batch_size && !released)
 			break;
 		handled += batch_size;
@@ -5096,22 +5263,29 @@
 {
 	struct r5conf *conf = mddev->private;
 	int err;
+	int hash;
 
 	if (size <= 16 || size > 32768)
 		return -EINVAL;
+	hash = (conf->max_nr_stripes - 1) % NR_STRIPE_HASH_LOCKS;
 	while (size < conf->max_nr_stripes) {
-		if (drop_one_stripe(conf))
+		if (drop_one_stripe(conf, hash))
 			conf->max_nr_stripes--;
 		else
 			break;
+		hash--;
+		if (hash < 0)
+			hash = NR_STRIPE_HASH_LOCKS - 1;
 	}
 	err = md_allow_write(mddev);
 	if (err)
 		return err;
+	hash = conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
 	while (size > conf->max_nr_stripes) {
-		if (grow_one_stripe(conf))
+		if (grow_one_stripe(conf, hash))
 			conf->max_nr_stripes++;
 		else break;
+		hash = (hash + 1) % NR_STRIPE_HASH_LOCKS;
 	}
 	return 0;
 }
@@ -5199,15 +5373,18 @@
 		return 0;
 }
 
-static int alloc_thread_groups(struct r5conf *conf, int cnt);
+static int alloc_thread_groups(struct r5conf *conf, int cnt,
+			       int *group_cnt,
+			       int *worker_cnt_per_group,
+			       struct r5worker_group **worker_groups);
 static ssize_t
 raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
 {
 	struct r5conf *conf = mddev->private;
 	unsigned long new;
 	int err;
-	struct r5worker_group *old_groups;
-	int old_group_cnt;
+	struct r5worker_group *new_groups, *old_groups;
+	int group_cnt, worker_cnt_per_group;
 
 	if (len >= PAGE_SIZE)
 		return -EINVAL;
@@ -5223,14 +5400,19 @@
 	mddev_suspend(mddev);
 
 	old_groups = conf->worker_groups;
-	old_group_cnt = conf->worker_cnt_per_group;
+	if (old_groups)
+		flush_workqueue(raid5_wq);
 
-	conf->worker_groups = NULL;
-	err = alloc_thread_groups(conf, new);
-	if (err) {
-		conf->worker_groups = old_groups;
-		conf->worker_cnt_per_group = old_group_cnt;
-	} else {
+	err = alloc_thread_groups(conf, new,
+				  &group_cnt, &worker_cnt_per_group,
+				  &new_groups);
+	if (!err) {
+		spin_lock_irq(&conf->device_lock);
+		conf->group_cnt = group_cnt;
+		conf->worker_cnt_per_group = worker_cnt_per_group;
+		conf->worker_groups = new_groups;
+		spin_unlock_irq(&conf->device_lock);
+
 		if (old_groups)
 			kfree(old_groups[0].workers);
 		kfree(old_groups);
@@ -5260,40 +5442,47 @@
 	.attrs = raid5_attrs,
 };
 
-static int alloc_thread_groups(struct r5conf *conf, int cnt)
+static int alloc_thread_groups(struct r5conf *conf, int cnt,
+			       int *group_cnt,
+			       int *worker_cnt_per_group,
+			       struct r5worker_group **worker_groups)
 {
-	int i, j;
+	int i, j, k;
 	ssize_t size;
 	struct r5worker *workers;
 
-	conf->worker_cnt_per_group = cnt;
+	*worker_cnt_per_group = cnt;
 	if (cnt == 0) {
-		conf->worker_groups = NULL;
+		*group_cnt = 0;
+		*worker_groups = NULL;
 		return 0;
 	}
-	conf->group_cnt = num_possible_nodes();
+	*group_cnt = num_possible_nodes();
 	size = sizeof(struct r5worker) * cnt;
-	workers = kzalloc(size * conf->group_cnt, GFP_NOIO);
-	conf->worker_groups = kzalloc(sizeof(struct r5worker_group) *
-				conf->group_cnt, GFP_NOIO);
-	if (!conf->worker_groups || !workers) {
+	workers = kzalloc(size * *group_cnt, GFP_NOIO);
+	*worker_groups = kzalloc(sizeof(struct r5worker_group) *
+				*group_cnt, GFP_NOIO);
+	if (!*worker_groups || !workers) {
 		kfree(workers);
-		kfree(conf->worker_groups);
-		conf->worker_groups = NULL;
+		kfree(*worker_groups);
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < conf->group_cnt; i++) {
+	for (i = 0; i < *group_cnt; i++) {
 		struct r5worker_group *group;
 
-		group = &conf->worker_groups[i];
+		group = worker_groups[i];
 		INIT_LIST_HEAD(&group->handle_list);
 		group->conf = conf;
 		group->workers = workers + i * cnt;
 
 		for (j = 0; j < cnt; j++) {
-			group->workers[j].group = group;
-			INIT_WORK(&group->workers[j].work, raid5_do_work);
+			struct r5worker *worker = group->workers + j;
+			worker->group = group;
+			INIT_WORK(&worker->work, raid5_do_work);
+
+			for (k = 0; k < NR_STRIPE_HASH_LOCKS; k++)
+				INIT_LIST_HEAD(worker->temp_inactive_list + k);
 		}
 	}
 
@@ -5444,6 +5633,9 @@
 	struct md_rdev *rdev;
 	struct disk_info *disk;
 	char pers_name[6];
+	int i;
+	int group_cnt, worker_cnt_per_group;
+	struct r5worker_group *new_group;
 
 	if (mddev->new_level != 5
 	    && mddev->new_level != 4
@@ -5478,7 +5670,12 @@
 	if (conf == NULL)
 		goto abort;
 	/* Don't enable multi-threading by default*/
-	if (alloc_thread_groups(conf, 0))
+	if (!alloc_thread_groups(conf, 0, &group_cnt, &worker_cnt_per_group,
+				 &new_group)) {
+		conf->group_cnt = group_cnt;
+		conf->worker_cnt_per_group = worker_cnt_per_group;
+		conf->worker_groups = new_group;
+	} else
 		goto abort;
 	spin_lock_init(&conf->device_lock);
 	seqcount_init(&conf->gen_lock);
@@ -5488,7 +5685,6 @@
 	INIT_LIST_HEAD(&conf->hold_list);
 	INIT_LIST_HEAD(&conf->delayed_list);
 	INIT_LIST_HEAD(&conf->bitmap_list);
-	INIT_LIST_HEAD(&conf->inactive_list);
 	init_llist_head(&conf->released_stripes);
 	atomic_set(&conf->active_stripes, 0);
 	atomic_set(&conf->preread_active_stripes, 0);
@@ -5514,6 +5710,21 @@
 	if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
 		goto abort;
 
+	/* We init hash_locks[0] separately to that it can be used
+	 * as the reference lock in the spin_lock_nest_lock() call
+	 * in lock_all_device_hash_locks_irq in order to convince
+	 * lockdep that we know what we are doing.
+	 */
+	spin_lock_init(conf->hash_locks);
+	for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++)
+		spin_lock_init(conf->hash_locks + i);
+
+	for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++)
+		INIT_LIST_HEAD(conf->inactive_list + i);
+
+	for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++)
+		INIT_LIST_HEAD(conf->temp_inactive_list + i);
+
 	conf->level = mddev->new_level;
 	if (raid5_alloc_percpu(conf) != 0)
 		goto abort;
@@ -5554,7 +5765,6 @@
 	else
 		conf->max_degraded = 1;
 	conf->algorithm = mddev->new_layout;
-	conf->max_nr_stripes = NR_STRIPES;
 	conf->reshape_progress = mddev->reshape_position;
 	if (conf->reshape_progress != MaxSector) {
 		conf->prev_chunk_sectors = mddev->chunk_sectors;
@@ -5563,7 +5773,8 @@
 
 	memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
 		 max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
-	if (grow_stripes(conf, conf->max_nr_stripes)) {
+	atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS);
+	if (grow_stripes(conf, NR_STRIPES)) {
 		printk(KERN_ERR
 		       "md/raid:%s: couldn't allocate %dkB for buffers\n",
 		       mdname(mddev), memory);
@@ -6369,12 +6580,18 @@
 	if (!mddev->sync_thread) {
 		mddev->recovery = 0;
 		spin_lock_irq(&conf->device_lock);
+		write_seqcount_begin(&conf->gen_lock);
 		mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
+		mddev->new_chunk_sectors =
+			conf->chunk_sectors = conf->prev_chunk_sectors;
+		mddev->new_layout = conf->algorithm = conf->prev_algo;
 		rdev_for_each(rdev, mddev)
 			rdev->new_data_offset = rdev->data_offset;
 		smp_wmb();
+		conf->generation --;
 		conf->reshape_progress = MaxSector;
 		mddev->reshape_position = MaxSector;
+		write_seqcount_end(&conf->gen_lock);
 		spin_unlock_irq(&conf->device_lock);
 		return -EAGAIN;
 	}
@@ -6462,27 +6679,28 @@
 		break;
 
 	case 1: /* stop all writes */
-		spin_lock_irq(&conf->device_lock);
+		lock_all_device_hash_locks_irq(conf);
 		/* '2' tells resync/reshape to pause so that all
 		 * active stripes can drain
 		 */
 		conf->quiesce = 2;
-		wait_event_lock_irq(conf->wait_for_stripe,
+		wait_event_cmd(conf->wait_for_stripe,
 				    atomic_read(&conf->active_stripes) == 0 &&
 				    atomic_read(&conf->active_aligned_reads) == 0,
-				    conf->device_lock);
+				    unlock_all_device_hash_locks_irq(conf),
+				    lock_all_device_hash_locks_irq(conf));
 		conf->quiesce = 1;
-		spin_unlock_irq(&conf->device_lock);
+		unlock_all_device_hash_locks_irq(conf);
 		/* allow reshape to continue */
 		wake_up(&conf->wait_for_overlap);
 		break;
 
 	case 0: /* re-enable writes */
-		spin_lock_irq(&conf->device_lock);
+		lock_all_device_hash_locks_irq(conf);
 		conf->quiesce = 0;
 		wake_up(&conf->wait_for_stripe);
 		wake_up(&conf->wait_for_overlap);
-		spin_unlock_irq(&conf->device_lock);
+		unlock_all_device_hash_locks_irq(conf);
 		break;
 	}
 }
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index b42e6b4..01ad8ae 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -205,6 +205,7 @@
 	short			pd_idx;		/* parity disk index */
 	short			qd_idx;		/* 'Q' disk index for raid6 */
 	short			ddf_layout;/* use DDF ordering to calculate Q */
+	short			hash_lock_index;
 	unsigned long		state;		/* state flags */
 	atomic_t		count;	      /* nr of active thread/requests */
 	int			bm_seq;	/* sequence number for bitmap flushes */
@@ -367,9 +368,18 @@
 	struct md_rdev	*rdev, *replacement;
 };
 
+/* NOTE NR_STRIPE_HASH_LOCKS must remain below 64.
+ * This is because we sometimes take all the spinlocks
+ * and creating that much locking depth can cause
+ * problems.
+ */
+#define NR_STRIPE_HASH_LOCKS 8
+#define STRIPE_HASH_LOCKS_MASK (NR_STRIPE_HASH_LOCKS - 1)
+
 struct r5worker {
 	struct work_struct work;
 	struct r5worker_group *group;
+	struct list_head temp_inactive_list[NR_STRIPE_HASH_LOCKS];
 	bool working;
 };
 
@@ -382,6 +392,8 @@
 
 struct r5conf {
 	struct hlist_head	*stripe_hashtbl;
+	/* only protect corresponding hash list and inactive_list */
+	spinlock_t		hash_locks[NR_STRIPE_HASH_LOCKS];
 	struct mddev		*mddev;
 	int			chunk_sectors;
 	int			level, algorithm;
@@ -462,7 +474,8 @@
 	 * Free stripes pool
 	 */
 	atomic_t		active_stripes;
-	struct list_head	inactive_list;
+	struct list_head	inactive_list[NR_STRIPE_HASH_LOCKS];
+	atomic_t		empty_inactive_list_nr;
 	struct llist_head	released_stripes;
 	wait_queue_head_t	wait_for_stripe;
 	wait_queue_head_t	wait_for_overlap;
@@ -477,6 +490,7 @@
 	 * the new thread here until we fully activate the array.
 	 */
 	struct md_thread	*thread;
+	struct list_head	temp_inactive_list[NR_STRIPE_HASH_LOCKS];
 	struct r5worker_group	*worker_groups;
 	int			group_cnt;
 	int			worker_cnt_per_group;
diff --git a/drivers/media/platform/m2m-deinterlace.c b/drivers/media/platform/m2m-deinterlace.c
index 36513e8..65cab70 100644
--- a/drivers/media/platform/m2m-deinterlace.c
+++ b/drivers/media/platform/m2m-deinterlace.c
@@ -341,8 +341,7 @@
 	ctx->xt->dir = DMA_MEM_TO_MEM;
 	ctx->xt->src_sgl = false;
 	ctx->xt->dst_sgl = true;
-	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT |
-		DMA_COMPL_SKIP_DEST_UNMAP | DMA_COMPL_SKIP_SRC_UNMAP;
+	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
 
 	tx = dmadev->device_prep_interleaved_dma(chan, ctx->xt, flags);
 	if (tx == NULL) {
diff --git a/drivers/media/platform/timblogiw.c b/drivers/media/platform/timblogiw.c
index 6a74ce0..ccdadd6 100644
--- a/drivers/media/platform/timblogiw.c
+++ b/drivers/media/platform/timblogiw.c
@@ -565,7 +565,7 @@
 
 	desc = dmaengine_prep_slave_sg(fh->chan,
 		buf->sg, sg_elems, DMA_DEV_TO_MEM,
-		DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
+		DMA_PREP_INTERRUPT);
 	if (!desc) {
 		spin_lock_irq(&fh->queue_lock);
 		list_del_init(&vb->queue);
diff --git a/drivers/misc/carma/carma-fpga.c b/drivers/misc/carma/carma-fpga.c
index 08b18f3..9e2b985 100644
--- a/drivers/misc/carma/carma-fpga.c
+++ b/drivers/misc/carma/carma-fpga.c
@@ -633,8 +633,7 @@
 	struct dma_async_tx_descriptor *tx;
 	dma_cookie_t cookie;
 	dma_addr_t dst, src;
-	unsigned long dma_flags = DMA_COMPL_SKIP_DEST_UNMAP |
-				  DMA_COMPL_SKIP_SRC_UNMAP;
+	unsigned long dma_flags = 0;
 
 	dst_sg = buf->vb.sglist;
 	dst_nents = buf->vb.sglen;
diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index ef89565..157b570 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -308,8 +308,7 @@
 	struct mmc_host *host = func->card->host;
 	u64 addr = (host->slotno << 16) | func->num;
 
-	ACPI_HANDLE_SET(&func->dev,
-			acpi_get_child(ACPI_HANDLE(host->parent), addr));
+	acpi_preset_companion(&func->dev, ACPI_HANDLE(host->parent), addr);
 }
 #else
 static inline void sdio_acpi_set_handle(struct sdio_func *func) {}
diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
index d78a97d..59f08c4 100644
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c
@@ -375,8 +375,7 @@
 
 	dma_dev = host->dma_chan->device;
 
-	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP |
-		DMA_COMPL_SKIP_DEST_UNMAP;
+	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
 
 	phys_addr = dma_map_single(dma_dev->dev, p, len, dir);
 	if (dma_mapping_error(dma_dev->dev, phys_addr)) {
diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index 3dc1a75..8b27522 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -573,8 +573,6 @@
 	dma_dev = chan->device;
 	dma_addr = dma_map_single(dma_dev->dev, buffer, len, direction);
 
-	flags |= DMA_COMPL_SKIP_SRC_UNMAP | DMA_COMPL_SKIP_DEST_UNMAP;
-
 	if (direction == DMA_TO_DEVICE) {
 		dma_src = dma_addr;
 		dma_dst = host->data_pa;
diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c
index 0951f7a..822616e 100644
--- a/drivers/net/ethernet/micrel/ks8842.c
+++ b/drivers/net/ethernet/micrel/ks8842.c
@@ -459,8 +459,7 @@
 		sg_dma_len(&ctl->sg) += 4 - sg_dma_len(&ctl->sg) % 4;
 
 	ctl->adesc = dmaengine_prep_slave_sg(ctl->chan,
-		&ctl->sg, 1, DMA_MEM_TO_DEV,
-		DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
+		&ctl->sg, 1, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT);
 	if (!ctl->adesc)
 		return NETDEV_TX_BUSY;
 
@@ -571,8 +570,7 @@
 		sg_dma_len(sg) = DMA_BUFFER_SIZE;
 
 		ctl->adesc = dmaengine_prep_slave_sg(ctl->chan,
-			sg, 1, DMA_DEV_TO_MEM,
-			DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP);
+			sg, 1, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT);
 
 		if (!ctl->adesc)
 			goto out;
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 12a9e83..d0222f1 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -1034,10 +1034,9 @@
 	struct dma_chan *chan = qp->dma_chan;
 	struct dma_device *device;
 	size_t pay_off, buff_off;
-	dma_addr_t src, dest;
+	struct dmaengine_unmap_data *unmap;
 	dma_cookie_t cookie;
 	void *buf = entry->buf;
-	unsigned long flags;
 
 	entry->len = len;
 
@@ -1045,35 +1044,49 @@
 		goto err;
 
 	if (len < copy_bytes) 
-		goto err1;
+		goto err_wait;
 
 	device = chan->device;
 	pay_off = (size_t) offset & ~PAGE_MASK;
 	buff_off = (size_t) buf & ~PAGE_MASK;
 
 	if (!is_dma_copy_aligned(device, pay_off, buff_off, len))
-		goto err1;
+		goto err_wait;
 
-	dest = dma_map_single(device->dev, buf, len, DMA_FROM_DEVICE);
-	if (dma_mapping_error(device->dev, dest))
-		goto err1;
+	unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOWAIT);
+	if (!unmap)
+		goto err_wait;
 
-	src = dma_map_single(device->dev, offset, len, DMA_TO_DEVICE);
-	if (dma_mapping_error(device->dev, src))
-		goto err2;
+	unmap->len = len;
+	unmap->addr[0] = dma_map_page(device->dev, virt_to_page(offset),
+				      pay_off, len, DMA_TO_DEVICE);
+	if (dma_mapping_error(device->dev, unmap->addr[0]))
+		goto err_get_unmap;
 
-	flags = DMA_COMPL_DEST_UNMAP_SINGLE | DMA_COMPL_SRC_UNMAP_SINGLE |
-		DMA_PREP_INTERRUPT;
-	txd = device->device_prep_dma_memcpy(chan, dest, src, len, flags);
+	unmap->to_cnt = 1;
+
+	unmap->addr[1] = dma_map_page(device->dev, virt_to_page(buf),
+				      buff_off, len, DMA_FROM_DEVICE);
+	if (dma_mapping_error(device->dev, unmap->addr[1]))
+		goto err_get_unmap;
+
+	unmap->from_cnt = 1;
+
+	txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
+					     unmap->addr[0], len,
+					     DMA_PREP_INTERRUPT);
 	if (!txd)
-		goto err3;
+		goto err_get_unmap;
 
 	txd->callback = ntb_rx_copy_callback;
 	txd->callback_param = entry;
+	dma_set_unmap(txd, unmap);
 
 	cookie = dmaengine_submit(txd);
 	if (dma_submit_error(cookie))
-		goto err3;
+		goto err_set_unmap;
+
+	dmaengine_unmap_put(unmap);
 
 	qp->last_cookie = cookie;
 
@@ -1081,11 +1094,11 @@
 
 	return;
 
-err3:
-	dma_unmap_single(device->dev, src, len, DMA_TO_DEVICE);
-err2:
-	dma_unmap_single(device->dev, dest, len, DMA_FROM_DEVICE);
-err1:
+err_set_unmap:
+	dmaengine_unmap_put(unmap);
+err_get_unmap:
+	dmaengine_unmap_put(unmap);
+err_wait:
 	/* If the callbacks come out of order, the writing of the index to the
 	 * last completed will be out of order.  This may result in the
 	 * receive stalling forever.
@@ -1245,12 +1258,12 @@
 	struct dma_chan *chan = qp->dma_chan;
 	struct dma_device *device;
 	size_t dest_off, buff_off;
-	dma_addr_t src, dest;
+	struct dmaengine_unmap_data *unmap;
+	dma_addr_t dest;
 	dma_cookie_t cookie;
 	void __iomem *offset;
 	size_t len = entry->len;
 	void *buf = entry->buf;
-	unsigned long flags;
 
 	offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
 	hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header);
@@ -1273,28 +1286,41 @@
 	if (!is_dma_copy_aligned(device, buff_off, dest_off, len))
 		goto err;
 
-	src = dma_map_single(device->dev, buf, len, DMA_TO_DEVICE);
-	if (dma_mapping_error(device->dev, src))
+	unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT);
+	if (!unmap)
 		goto err;
 
-	flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_PREP_INTERRUPT;
-	txd = device->device_prep_dma_memcpy(chan, dest, src, len, flags);
+	unmap->len = len;
+	unmap->addr[0] = dma_map_page(device->dev, virt_to_page(buf),
+				      buff_off, len, DMA_TO_DEVICE);
+	if (dma_mapping_error(device->dev, unmap->addr[0]))
+		goto err_get_unmap;
+
+	unmap->to_cnt = 1;
+
+	txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0], len,
+					     DMA_PREP_INTERRUPT);
 	if (!txd)
-		goto err1;
+		goto err_get_unmap;
 
 	txd->callback = ntb_tx_copy_callback;
 	txd->callback_param = entry;
+	dma_set_unmap(txd, unmap);
 
 	cookie = dmaengine_submit(txd);
 	if (dma_submit_error(cookie))
-		goto err1;
+		goto err_set_unmap;
+
+	dmaengine_unmap_put(unmap);
 
 	dma_async_issue_pending(chan);
 	qp->tx_async++;
 
 	return;
-err1:
-	dma_unmap_single(device->dev, src, len, DMA_TO_DEVICE);
+err_set_unmap:
+	dmaengine_unmap_put(unmap);
+err_get_unmap:
+	dmaengine_unmap_put(unmap);
 err:
 	ntb_memcpy_tx(entry, offset);
 	qp->tx_memcpy++;
diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index 1ce8ee0..a94d850 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -367,7 +367,7 @@
 		string = (struct acpi_buffer){ ACPI_ALLOCATE_BUFFER, NULL };
 	}
 
-	handle = DEVICE_ACPI_HANDLE(&pdev->dev);
+	handle = ACPI_HANDLE(&pdev->dev);
 	if (!handle) {
 		/*
 		 * This hotplug controller was not listed in the ACPI name
diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h
index 26100f5..1592dbe 100644
--- a/drivers/pci/hotplug/acpiphp.h
+++ b/drivers/pci/hotplug/acpiphp.h
@@ -176,7 +176,6 @@
 u8 acpiphp_get_adapter_status(struct acpiphp_slot *slot);
 
 /* variables */
-extern bool acpiphp_debug;
 extern bool acpiphp_disabled;
 
 #endif /* _ACPIPHP_H */
diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c
index ead7c53..cff7cad 100644
--- a/drivers/pci/hotplug/pciehp_acpi.c
+++ b/drivers/pci/hotplug/pciehp_acpi.c
@@ -54,7 +54,7 @@
 {
 	if (slot_detection_mode != PCIEHP_DETECT_ACPI)
 		return 0;
-	if (acpi_pci_detect_ejectable(DEVICE_ACPI_HANDLE(&dev->dev)))
+	if (acpi_pci_detect_ejectable(ACPI_HANDLE(&dev->dev)))
 		return 0;
 	return -ENODEV;
 }
@@ -96,7 +96,7 @@
 			dup_slot_id++;
 	}
 	list_add_tail(&slot->list, &dummy_slots);
-	handle = DEVICE_ACPI_HANDLE(&pdev->dev);
+	handle = ACPI_HANDLE(&pdev->dev);
 	if (!acpi_slot_detected && acpi_pci_detect_ejectable(handle))
 		acpi_slot_detected = 1;
 	return -ENODEV;         /* dummy driver always returns error */
diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c
index b2781df..5b05a68 100644
--- a/drivers/pci/hotplug/sgi_hotplug.c
+++ b/drivers/pci/hotplug/sgi_hotplug.c
@@ -9,6 +9,7 @@
  * Work to add BIOS PROM support was completed by Mike Habeck.
  */
 
+#include <linux/acpi.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -29,7 +30,6 @@
 #include <asm/sn/sn_feature_sets.h>
 #include <asm/sn/sn_sal.h>
 #include <asm/sn/types.h>
-#include <linux/acpi.h>
 #include <asm/sn/acpi.h>
 
 #include "../pci.h"
@@ -414,7 +414,7 @@
 		acpi_handle rethandle;
 		acpi_status ret;
 
-		phandle = PCI_CONTROLLER(slot->pci_bus)->acpi_handle;
+		phandle = acpi_device_handle(PCI_CONTROLLER(slot->pci_bus)->companion);
 
 		if (acpi_bus_get_device(phandle, &pdevice)) {
 			dev_dbg(&slot->pci_bus->self->dev,
@@ -495,7 +495,7 @@
 
 	/* free the ACPI resources for the slot */
 	if (SN_ACPI_BASE_SUPPORT() &&
-            PCI_CONTROLLER(slot->pci_bus)->acpi_handle) {
+            PCI_CONTROLLER(slot->pci_bus)->companion) {
 		unsigned long long adr;
 		struct acpi_device *device;
 		acpi_handle phandle;
@@ -504,7 +504,7 @@
 		acpi_status ret;
 
 		/* Get the rootbus node pointer */
-		phandle = PCI_CONTROLLER(slot->pci_bus)->acpi_handle;
+		phandle = acpi_device_handle(PCI_CONTROLLER(slot->pci_bus)->companion);
 
 		acpi_scan_lock_acquire();
 		/*
diff --git a/drivers/pci/ioapic.c b/drivers/pci/ioapic.c
index 1b90579..50ce680 100644
--- a/drivers/pci/ioapic.c
+++ b/drivers/pci/ioapic.c
@@ -37,7 +37,7 @@
 	char *type;
 	struct resource *res;
 
-	handle = DEVICE_ACPI_HANDLE(&dev->dev);
+	handle = ACPI_HANDLE(&dev->dev);
 	if (!handle)
 		return -EINVAL;
 
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index dfd1f59..f166126 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -173,14 +173,14 @@
 
 static bool acpi_pci_power_manageable(struct pci_dev *dev)
 {
-	acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
+	acpi_handle handle = ACPI_HANDLE(&dev->dev);
 
 	return handle ? acpi_bus_power_manageable(handle) : false;
 }
 
 static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state)
 {
-	acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
+	acpi_handle handle = ACPI_HANDLE(&dev->dev);
 	static const u8 state_conv[] = {
 		[PCI_D0] = ACPI_STATE_D0,
 		[PCI_D1] = ACPI_STATE_D1,
@@ -217,7 +217,7 @@
 
 static bool acpi_pci_can_wakeup(struct pci_dev *dev)
 {
-	acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
+	acpi_handle handle = ACPI_HANDLE(&dev->dev);
 
 	return handle ? acpi_bus_can_wakeup(handle) : false;
 }
diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c
index edaed6f..d51f45a 100644
--- a/drivers/pci/pci-label.c
+++ b/drivers/pci/pci-label.c
@@ -263,7 +263,7 @@
 	acpi_handle handle;
 	struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL};
 
-	handle = DEVICE_ACPI_HANDLE(dev);
+	handle = ACPI_HANDLE(dev);
 
 	if (!handle)
 		return FALSE;
@@ -295,7 +295,7 @@
 	acpi_handle handle;
 	int length;
 
-	handle = DEVICE_ACPI_HANDLE(dev);
+	handle = ACPI_HANDLE(dev);
 
 	if (!handle)
 		return -1;
@@ -316,7 +316,7 @@
 	acpi_handle handle;
 	int length;
 
-	handle = DEVICE_ACPI_HANDLE(dev);
+	handle = ACPI_HANDLE(dev);
 
 	if (!handle)
 		return -1;
diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c
index 605a9be..b9429fb 100644
--- a/drivers/platform/x86/apple-gmux.c
+++ b/drivers/platform/x86/apple-gmux.c
@@ -519,7 +519,7 @@
 
 	gmux_data->power_state = VGA_SWITCHEROO_ON;
 
-	gmux_data->dhandle = DEVICE_ACPI_HANDLE(&pnp->dev);
+	gmux_data->dhandle = ACPI_HANDLE(&pnp->dev);
 	if (!gmux_data->dhandle) {
 		pr_err("Cannot find acpi handle for pnp device %s\n",
 		       dev_name(&pnp->dev));
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index 747826d..14655a0 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -89,7 +89,7 @@
 
 	pnp_dbg(&dev->dev, "set resources\n");
 
-	handle = DEVICE_ACPI_HANDLE(&dev->dev);
+	handle = ACPI_HANDLE(&dev->dev);
 	if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
 		dev_dbg(&dev->dev, "ACPI device not found in %s!\n", __func__);
 		return -ENODEV;
@@ -122,7 +122,7 @@
 
 	dev_dbg(&dev->dev, "disable resources\n");
 
-	handle = DEVICE_ACPI_HANDLE(&dev->dev);
+	handle = ACPI_HANDLE(&dev->dev);
 	if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
 		dev_dbg(&dev->dev, "ACPI device not found in %s!\n", __func__);
 		return 0;
@@ -144,7 +144,7 @@
 	struct acpi_device *acpi_dev;
 	acpi_handle handle;
 
-	handle = DEVICE_ACPI_HANDLE(&dev->dev);
+	handle = ACPI_HANDLE(&dev->dev);
 	if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
 		dev_dbg(&dev->dev, "ACPI device not found in %s!\n", __func__);
 		return false;
@@ -159,7 +159,7 @@
 	acpi_handle handle;
 	int error = 0;
 
-	handle = DEVICE_ACPI_HANDLE(&dev->dev);
+	handle = ACPI_HANDLE(&dev->dev);
 	if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
 		dev_dbg(&dev->dev, "ACPI device not found in %s!\n", __func__);
 		return 0;
@@ -194,7 +194,7 @@
 static int pnpacpi_resume(struct pnp_dev *dev)
 {
 	struct acpi_device *acpi_dev;
-	acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev);
+	acpi_handle handle = ACPI_HANDLE(&dev->dev);
 	int error = 0;
 
 	if (!handle || acpi_bus_get_device(handle, &acpi_dev)) {
diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c
index b9f0192..6d207af 100644
--- a/drivers/spi/spi-dw-mid.c
+++ b/drivers/spi/spi-dw-mid.c
@@ -150,7 +150,7 @@
 				&dws->tx_sgl,
 				1,
 				DMA_MEM_TO_DEV,
-				DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_DEST_UNMAP);
+				DMA_PREP_INTERRUPT);
 	txdesc->callback = dw_spi_dma_done;
 	txdesc->callback_param = dws;
 
@@ -173,7 +173,7 @@
 				&dws->rx_sgl,
 				1,
 				DMA_DEV_TO_MEM,
-				DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_DEST_UNMAP);
+				DMA_PREP_INTERRUPT);
 	rxdesc->callback = dw_spi_dma_done;
 	rxdesc->callback_param = dws;
 
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 8d85ddc..18cc625d 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -357,6 +357,19 @@
 }
 EXPORT_SYMBOL_GPL(spi_alloc_device);
 
+static void spi_dev_set_name(struct spi_device *spi)
+{
+	struct acpi_device *adev = ACPI_COMPANION(&spi->dev);
+
+	if (adev) {
+		dev_set_name(&spi->dev, "spi-%s", acpi_dev_name(adev));
+		return;
+	}
+
+	dev_set_name(&spi->dev, "%s.%u", dev_name(&spi->master->dev),
+		     spi->chip_select);
+}
+
 /**
  * spi_add_device - Add spi_device allocated with spi_alloc_device
  * @spi: spi_device to register
@@ -383,9 +396,7 @@
 	}
 
 	/* Set the bus ID string */
-	dev_set_name(&spi->dev, "%s.%u", dev_name(&spi->master->dev),
-			spi->chip_select);
-
+	spi_dev_set_name(spi);
 
 	/* We need to make sure there's no other device with this
 	 * chipselect **BEFORE** we call setup(), else we'll trash
@@ -1144,7 +1155,7 @@
 		return AE_NO_MEMORY;
 	}
 
-	ACPI_HANDLE_SET(&spi->dev, handle);
+	ACPI_COMPANION_SET(&spi->dev, adev);
 	spi->irq = -1;
 
 	INIT_LIST_HEAD(&resource_list);
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 5377502..7d8103c 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -1433,7 +1433,7 @@
 	desc = s->desc_rx[new];
 
 	if (dma_async_is_tx_complete(s->chan_rx, s->active_rx, NULL, NULL) !=
-	    DMA_SUCCESS) {
+	    DMA_COMPLETE) {
 		/* Handle incomplete DMA receive */
 		struct dma_chan *chan = s->chan_rx;
 		struct shdma_desc *sh_desc = container_of(desc,
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 06cec635..a7c04e2 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -5501,6 +5501,6 @@
 	if (!hub)
 		return NULL;
 
-	return DEVICE_ACPI_HANDLE(&hub->ports[port1 - 1]->dev);
+	return ACPI_HANDLE(&hub->ports[port1 - 1]->dev);
 }
 #endif
diff --git a/drivers/usb/core/usb-acpi.c b/drivers/usb/core/usb-acpi.c
index 255c144..4e243c3 100644
--- a/drivers/usb/core/usb-acpi.c
+++ b/drivers/usb/core/usb-acpi.c
@@ -173,7 +173,7 @@
 		}
 
 		/* root hub's parent is the usb hcd. */
-		parent_handle = DEVICE_ACPI_HANDLE(dev->parent);
+		parent_handle = ACPI_HANDLE(dev->parent);
 		*handle = acpi_get_child(parent_handle, udev->portnum);
 		if (!*handle)
 			return -ENODEV;
@@ -194,7 +194,7 @@
 
 			raw_port_num = usb_hcd_find_raw_port_number(hcd,
 				port_num);
-			*handle = acpi_get_child(DEVICE_ACPI_HANDLE(&udev->dev),
+			*handle = acpi_get_child(ACPI_HANDLE(&udev->dev),
 				raw_port_num);
 			if (!*handle)
 				return -ENODEV;
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index d15f6e8..1888251 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -59,12 +59,12 @@
 			add.flags = XEN_PCI_DEV_EXTFN;
 
 #ifdef CONFIG_ACPI
-		handle = DEVICE_ACPI_HANDLE(&pci_dev->dev);
+		handle = ACPI_HANDLE(&pci_dev->dev);
 		if (!handle && pci_dev->bus->bridge)
-			handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge);
+			handle = ACPI_HANDLE(pci_dev->bus->bridge);
 #ifdef CONFIG_PCI_IOV
 		if (!handle && pci_dev->is_virtfn)
-			handle = DEVICE_ACPI_HANDLE(physfn->bus->bridge);
+			handle = ACPI_HANDLE(physfn->bus->bridge);
 #endif
 		if (handle) {
 			acpi_status status;
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index f039b10..b03dd23 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -43,23 +43,6 @@
 #include "fid.h"
 
 /**
- * v9fs_dentry_delete - called when dentry refcount equals 0
- * @dentry:  dentry in question
- *
- * By returning 1 here we should remove cacheing of unused
- * dentry components.
- *
- */
-
-static int v9fs_dentry_delete(const struct dentry *dentry)
-{
-	p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n",
-		 dentry->d_name.name, dentry);
-
-	return 1;
-}
-
-/**
  * v9fs_cached_dentry_delete - called when dentry refcount equals 0
  * @dentry:  dentry in question
  *
@@ -134,6 +117,6 @@
 };
 
 const struct dentry_operations v9fs_dentry_operations = {
-	.d_delete = v9fs_dentry_delete,
+	.d_delete = always_delete_dentry,
 	.d_release = v9fs_dentry_release,
 };
diff --git a/fs/bio.c b/fs/bio.c
index 2bdb4e2..33d79a4 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -601,7 +601,7 @@
 
 static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
 			  *page, unsigned int len, unsigned int offset,
-			  unsigned short max_sectors)
+			  unsigned int max_sectors)
 {
 	int retried_segments = 0;
 	struct bio_vec *bvec;
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 277bd1b..4522e07 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -66,19 +66,9 @@
 	iput(inode);
 }
 
-/*
- * We _must_ delete our dentries on last dput, as the chain-to-parent
- * behavior is required to clear the parents of default_groups.
- */
-static int configfs_d_delete(const struct dentry *dentry)
-{
-	return 1;
-}
-
 const struct dentry_operations configfs_dentry_ops = {
 	.d_iput		= configfs_d_iput,
-	/* simple_delete_dentry() isn't exported */
-	.d_delete	= configfs_d_delete,
+	.d_delete	= always_delete_dentry,
 };
 
 #ifdef CONFIG_LOCKDEP
diff --git a/fs/coredump.c b/fs/coredump.c
index 62406b6..bc3fbcd 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -695,7 +695,7 @@
 	while (nr) {
 		if (dump_interrupted())
 			return 0;
-		n = vfs_write(file, addr, nr, &pos);
+		n = __kernel_write(file, addr, nr, &pos);
 		if (n <= 0)
 			return 0;
 		file->f_pos = pos;
@@ -733,7 +733,7 @@
 {
 	unsigned mod = cprm->written & (align - 1);
 	if (align & (align - 1))
-		return -EINVAL;
-	return mod ? dump_skip(cprm, align - mod) : 0;
+		return 0;
+	return mod ? dump_skip(cprm, align - mod) : 1;
 }
 EXPORT_SYMBOL(dump_align);
diff --git a/fs/dcache.c b/fs/dcache.c
index 0a38ef8..4bdb300 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -88,35 +88,6 @@
 
 static struct kmem_cache *dentry_cache __read_mostly;
 
-/**
- * read_seqbegin_or_lock - begin a sequence number check or locking block
- * @lock: sequence lock
- * @seq : sequence number to be checked
- *
- * First try it once optimistically without taking the lock. If that fails,
- * take the lock. The sequence number is also used as a marker for deciding
- * whether to be a reader (even) or writer (odd).
- * N.B. seq must be initialized to an even number to begin with.
- */
-static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
-{
-	if (!(*seq & 1))	/* Even */
-		*seq = read_seqbegin(lock);
-	else			/* Odd */
-		read_seqlock_excl(lock);
-}
-
-static inline int need_seqretry(seqlock_t *lock, int seq)
-{
-	return !(seq & 1) && read_seqretry(lock, seq);
-}
-
-static inline void done_seqretry(seqlock_t *lock, int seq)
-{
-	if (seq & 1)
-		read_sequnlock_excl(lock);
-}
-
 /*
  * This is the single most critical data structure when it comes
  * to the dcache: the hashtable for lookups. Somebody should try
@@ -125,8 +96,6 @@
  * This hash-function tries to avoid losing too many bits of hash
  * information, yet avoid using a prime hash-size or similar.
  */
-#define D_HASHBITS     d_hash_shift
-#define D_HASHMASK     d_hash_mask
 
 static unsigned int d_hash_mask __read_mostly;
 static unsigned int d_hash_shift __read_mostly;
@@ -137,8 +106,8 @@
 					unsigned int hash)
 {
 	hash += (unsigned long) parent / L1_CACHE_BYTES;
-	hash = hash + (hash >> D_HASHBITS);
-	return dentry_hashtable + (hash & D_HASHMASK);
+	hash = hash + (hash >> d_hash_shift);
+	return dentry_hashtable + (hash & d_hash_mask);
 }
 
 /* Statistics gathering. */
@@ -469,7 +438,7 @@
 {
 	list_del(&dentry->d_u.d_child);
 	/*
-	 * Inform try_to_ascend() that we are no longer attached to the
+	 * Inform d_walk() that we are no longer attached to the
 	 * dentry tree
 	 */
 	dentry->d_flags |= DCACHE_DENTRY_KILLED;
@@ -1069,34 +1038,6 @@
 }
 EXPORT_SYMBOL(shrink_dcache_sb);
 
-/*
- * This tries to ascend one level of parenthood, but
- * we can race with renaming, so we need to re-check
- * the parenthood after dropping the lock and check
- * that the sequence number still matches.
- */
-static struct dentry *try_to_ascend(struct dentry *old, unsigned seq)
-{
-	struct dentry *new = old->d_parent;
-
-	rcu_read_lock();
-	spin_unlock(&old->d_lock);
-	spin_lock(&new->d_lock);
-
-	/*
-	 * might go back up the wrong parent if we have had a rename
-	 * or deletion
-	 */
-	if (new != old->d_parent ||
-		 (old->d_flags & DCACHE_DENTRY_KILLED) ||
-		 need_seqretry(&rename_lock, seq)) {
-		spin_unlock(&new->d_lock);
-		new = NULL;
-	}
-	rcu_read_unlock();
-	return new;
-}
-
 /**
  * enum d_walk_ret - action to talke during tree walk
  * @D_WALK_CONTINUE:	contrinue walk
@@ -1185,9 +1126,24 @@
 	 */
 	if (this_parent != parent) {
 		struct dentry *child = this_parent;
-		this_parent = try_to_ascend(this_parent, seq);
-		if (!this_parent)
+		this_parent = child->d_parent;
+
+		rcu_read_lock();
+		spin_unlock(&child->d_lock);
+		spin_lock(&this_parent->d_lock);
+
+		/*
+		 * might go back up the wrong parent if we have had a rename
+		 * or deletion
+		 */
+		if (this_parent != child->d_parent ||
+			 (child->d_flags & DCACHE_DENTRY_KILLED) ||
+			 need_seqretry(&rename_lock, seq)) {
+			spin_unlock(&this_parent->d_lock);
+			rcu_read_unlock();
 			goto rename_retry;
+		}
+		rcu_read_unlock();
 		next = child->d_u.d_child.next;
 		goto resume;
 	}
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index a8766b8..becc725 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -83,19 +83,10 @@
 	return 0;
 }
 
-/*
- * Retaining negative dentries for an in-memory filesystem just wastes
- * memory and lookup time: arrange for them to be deleted immediately.
- */
-static int efivarfs_delete_dentry(const struct dentry *dentry)
-{
-	return 1;
-}
-
 static struct dentry_operations efivarfs_d_ops = {
 	.d_compare = efivarfs_d_compare,
 	.d_hash = efivarfs_d_hash,
-	.d_delete = efivarfs_delete_dentry,
+	.d_delete = always_delete_dentry,
 };
 
 static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name)
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index c8423d6..2a6ba06 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -466,19 +466,19 @@
 static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen,
 			     char *lvb_bits)
 {
-	uint32_t gen;
+	__le32 gen;
 	memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE);
-	memcpy(&gen, lvb_bits, sizeof(uint32_t));
+	memcpy(&gen, lvb_bits, sizeof(__le32));
 	*lvb_gen = le32_to_cpu(gen);
 }
 
 static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen,
 			      char *lvb_bits)
 {
-	uint32_t gen;
+	__le32 gen;
 	memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE);
 	gen = cpu_to_le32(lvb_gen);
-	memcpy(ls->ls_control_lvb, &gen, sizeof(uint32_t));
+	memcpy(ls->ls_control_lvb, &gen, sizeof(__le32));
 }
 
 static int all_jid_bits_clear(char *lvb)
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 453b50e..98236d0 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -667,7 +667,7 @@
 	struct buffer_head *bh;
 	struct page *page;
 	void *kaddr, *ptr;
-	struct gfs2_quota q, *qp;
+	struct gfs2_quota q;
 	int err, nbytes;
 	u64 size;
 
@@ -683,28 +683,25 @@
 		return err;
 
 	err = -EIO;
-	qp = &q;
-	qp->qu_value = be64_to_cpu(qp->qu_value);
-	qp->qu_value += change;
-	qp->qu_value = cpu_to_be64(qp->qu_value);
-	qd->qd_qb.qb_value = qp->qu_value;
+	be64_add_cpu(&q.qu_value, change);
+	qd->qd_qb.qb_value = q.qu_value;
 	if (fdq) {
 		if (fdq->d_fieldmask & FS_DQ_BSOFT) {
-			qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift);
-			qd->qd_qb.qb_warn = qp->qu_warn;
+			q.qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift);
+			qd->qd_qb.qb_warn = q.qu_warn;
 		}
 		if (fdq->d_fieldmask & FS_DQ_BHARD) {
-			qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
-			qd->qd_qb.qb_limit = qp->qu_limit;
+			q.qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
+			qd->qd_qb.qb_limit = q.qu_limit;
 		}
 		if (fdq->d_fieldmask & FS_DQ_BCOUNT) {
-			qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift);
-			qd->qd_qb.qb_value = qp->qu_value;
+			q.qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift);
+			qd->qd_qb.qb_value = q.qu_value;
 		}
 	}
 
 	/* Write the quota into the quota file on disk */
-	ptr = qp;
+	ptr = &q;
 	nbytes = sizeof(struct gfs2_quota);
 get_a_page:
 	page = find_or_create_page(mapping, index, GFP_NOFS);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 4d83abd..c8d6161 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1127,7 +1127,7 @@
 		rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
 		rgd->rd_free_clone = rgd->rd_free;
 	}
-	if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
+	if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
 		rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
 		gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
 				     rgd->rd_bits[0].bi_bh->b_data);
@@ -1161,7 +1161,7 @@
 	if (rgd->rd_flags & GFS2_RDF_UPTODATE)
 		return 0;
 
-	if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
+	if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
 		return gfs2_rgrp_bh_get(rgd);
 
 	rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 2543728..db23ce1 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -33,15 +33,6 @@
 
 #define FILE_HOSTFS_I(file) HOSTFS_I(file_inode(file))
 
-static int hostfs_d_delete(const struct dentry *dentry)
-{
-	return 1;
-}
-
-static const struct dentry_operations hostfs_dentry_ops = {
-	.d_delete		= hostfs_d_delete,
-};
-
 /* Changed in hostfs_args before the kernel starts running */
 static char *root_ino = "";
 static int append = 0;
@@ -925,7 +916,7 @@
 	sb->s_blocksize_bits = 10;
 	sb->s_magic = HOSTFS_SUPER_MAGIC;
 	sb->s_op = &hostfs_sbops;
-	sb->s_d_op = &hostfs_dentry_ops;
+	sb->s_d_op = &simple_dentry_operations;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
 
 	/* NULL is printed as <NULL> by sprintf: avoid that. */
diff --git a/fs/libfs.c b/fs/libfs.c
index 5de0694..a184424 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -47,10 +47,16 @@
  * Retaining negative dentries for an in-memory filesystem just wastes
  * memory and lookup time: arrange for them to be deleted immediately.
  */
-static int simple_delete_dentry(const struct dentry *dentry)
+int always_delete_dentry(const struct dentry *dentry)
 {
 	return 1;
 }
+EXPORT_SYMBOL(always_delete_dentry);
+
+const struct dentry_operations simple_dentry_operations = {
+	.d_delete = always_delete_dentry,
+};
+EXPORT_SYMBOL(simple_dentry_operations);
 
 /*
  * Lookup the data. This is trivial - if the dentry didn't already
@@ -58,10 +64,6 @@
  */
 struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
-	static const struct dentry_operations simple_dentry_operations = {
-		.d_delete = simple_delete_dentry,
-	};
-
 	if (dentry->d_name.len > NAME_MAX)
 		return ERR_PTR(-ENAMETOOLONG);
 	if (!dentry->d_sb->s_d_op)
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 737e156..cca93b6 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -175,22 +175,6 @@
 };
 
 /*
- * As some entries in /proc are volatile, we want to 
- * get rid of unused dentries.  This could be made 
- * smarter: we could keep a "volatile" flag in the 
- * inode to indicate which ones to keep.
- */
-static int proc_delete_dentry(const struct dentry * dentry)
-{
-	return 1;
-}
-
-static const struct dentry_operations proc_dentry_operations =
-{
-	.d_delete	= proc_delete_dentry,
-};
-
-/*
  * Don't create negative dentries here, return -ENOENT by hand
  * instead.
  */
@@ -209,7 +193,7 @@
 			inode = proc_get_inode(dir->i_sb, de);
 			if (!inode)
 				return ERR_PTR(-ENOMEM);
-			d_set_d_op(dentry, &proc_dentry_operations);
+			d_set_d_op(dentry, &simple_dentry_operations);
 			d_add(dentry, inode);
 			return NULL;
 		}
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 49a7fff..9ae46b8 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -42,12 +42,6 @@
 	.setattr	= proc_setattr,
 };
 
-static int ns_delete_dentry(const struct dentry *dentry)
-{
-	/* Don't cache namespace inodes when not in use */
-	return 1;
-}
-
 static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
 {
 	struct inode *inode = dentry->d_inode;
@@ -59,7 +53,7 @@
 
 const struct dentry_operations ns_dentry_operations =
 {
-	.d_delete	= ns_delete_dentry,
+	.d_delete	= always_delete_dentry,
 	.d_dname	= ns_dname,
 };
 
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index c70111e..b6fa865 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -25,6 +25,78 @@
 
 	  If unsure, say N.
 
+choice
+	prompt "File decompression options"
+	depends on SQUASHFS
+	help
+	  Squashfs now supports two options for decompressing file
+	  data.  Traditionally Squashfs has decompressed into an
+	  intermediate buffer and then memcopied it into the page cache.
+	  Squashfs now supports the ability to decompress directly into
+	  the page cache.
+
+	  If unsure, select "Decompress file data into an intermediate buffer"
+
+config SQUASHFS_FILE_CACHE
+	bool "Decompress file data into an intermediate buffer"
+	help
+	  Decompress file data into an intermediate buffer and then
+	  memcopy it into the page cache.
+
+config SQUASHFS_FILE_DIRECT
+	bool "Decompress files directly into the page cache"
+	help
+	  Directly decompress file data into the page cache.
+	  Doing so can significantly improve performance because
+	  it eliminates a memcpy and it also removes the lock contention
+	  on the single buffer.
+
+endchoice
+
+choice
+	prompt "Decompressor parallelisation options"
+	depends on SQUASHFS
+	help
+	  Squashfs now supports three parallelisation options for
+	  decompression.  Each one exhibits various trade-offs between
+	  decompression performance and CPU and memory usage.
+
+	  If in doubt, select "Single threaded compression"
+
+config SQUASHFS_DECOMP_SINGLE
+	bool "Single threaded compression"
+	help
+	  Traditionally Squashfs has used single-threaded decompression.
+	  Only one block (data or metadata) can be decompressed at any
+	  one time.  This limits CPU and memory usage to a minimum.
+
+config SQUASHFS_DECOMP_MULTI
+	bool "Use multiple decompressors for parallel I/O"
+	help
+	  By default Squashfs uses a single decompressor but it gives
+	  poor performance on parallel I/O workloads when using multiple CPU
+	  machines due to waiting on decompressor availability.
+
+	  If you have a parallel I/O workload and your system has enough memory,
+	  using this option may improve overall I/O performance.
+
+	  This decompressor implementation uses up to two parallel
+	  decompressors per core.  It dynamically allocates decompressors
+	  on a demand basis.
+
+config SQUASHFS_DECOMP_MULTI_PERCPU
+	bool "Use percpu multiple decompressors for parallel I/O"
+	help
+	  By default Squashfs uses a single decompressor but it gives
+	  poor performance on parallel I/O workloads when using multiple CPU
+	  machines due to waiting on decompressor availability.
+
+	  This decompressor implementation uses a maximum of one
+	  decompressor per core.  It uses percpu variables to ensure
+	  decompression is load-balanced across the cores.
+
+endchoice
+
 config SQUASHFS_XATTR
 	bool "Squashfs XATTR support"
 	depends on SQUASHFS
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
index 110b047..4132520 100644
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile
@@ -5,6 +5,11 @@
 obj-$(CONFIG_SQUASHFS) += squashfs.o
 squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
 squashfs-y += namei.o super.o symlink.o decompressor.o
+squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o
+squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o
+squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
+squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
+squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o
 squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o
 squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o
 squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 41d108e..0cea9b9 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -36,6 +36,7 @@
 #include "squashfs_fs_sb.h"
 #include "squashfs.h"
 #include "decompressor.h"
+#include "page_actor.h"
 
 /*
  * Read the metadata block length, this is stored in the first two
@@ -86,16 +87,16 @@
  * generated a larger block - this does occasionally happen with compression
  * algorithms).
  */
-int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
-			int length, u64 *next_index, int srclength, int pages)
+int squashfs_read_data(struct super_block *sb, u64 index, int length,
+		u64 *next_index, struct squashfs_page_actor *output)
 {
 	struct squashfs_sb_info *msblk = sb->s_fs_info;
 	struct buffer_head **bh;
 	int offset = index & ((1 << msblk->devblksize_log2) - 1);
 	u64 cur_index = index >> msblk->devblksize_log2;
-	int bytes, compressed, b = 0, k = 0, page = 0, avail;
+	int bytes, compressed, b = 0, k = 0, avail, i;
 
-	bh = kcalloc(((srclength + msblk->devblksize - 1)
+	bh = kcalloc(((output->length + msblk->devblksize - 1)
 		>> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
 	if (bh == NULL)
 		return -ENOMEM;
@@ -111,9 +112,9 @@
 			*next_index = index + length;
 
 		TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n",
-			index, compressed ? "" : "un", length, srclength);
+			index, compressed ? "" : "un", length, output->length);
 
-		if (length < 0 || length > srclength ||
+		if (length < 0 || length > output->length ||
 				(index + length) > msblk->bytes_used)
 			goto read_failure;
 
@@ -145,7 +146,7 @@
 		TRACE("Block @ 0x%llx, %scompressed size %d\n", index,
 				compressed ? "" : "un", length);
 
-		if (length < 0 || length > srclength ||
+		if (length < 0 || length > output->length ||
 					(index + length) > msblk->bytes_used)
 			goto block_release;
 
@@ -158,9 +159,15 @@
 		ll_rw_block(READ, b - 1, bh + 1);
 	}
 
+	for (i = 0; i < b; i++) {
+		wait_on_buffer(bh[i]);
+		if (!buffer_uptodate(bh[i]))
+			goto block_release;
+	}
+
 	if (compressed) {
-		length = squashfs_decompress(msblk, buffer, bh, b, offset,
-			 length, srclength, pages);
+		length = squashfs_decompress(msblk, bh, b, offset, length,
+			output);
 		if (length < 0)
 			goto read_failure;
 	} else {
@@ -168,22 +175,20 @@
 		 * Block is uncompressed.
 		 */
 		int in, pg_offset = 0;
+		void *data = squashfs_first_page(output);
 
 		for (bytes = length; k < b; k++) {
 			in = min(bytes, msblk->devblksize - offset);
 			bytes -= in;
-			wait_on_buffer(bh[k]);
-			if (!buffer_uptodate(bh[k]))
-				goto block_release;
 			while (in) {
 				if (pg_offset == PAGE_CACHE_SIZE) {
-					page++;
+					data = squashfs_next_page(output);
 					pg_offset = 0;
 				}
 				avail = min_t(int, in, PAGE_CACHE_SIZE -
 						pg_offset);
-				memcpy(buffer[page] + pg_offset,
-						bh[k]->b_data + offset, avail);
+				memcpy(data + pg_offset, bh[k]->b_data + offset,
+						avail);
 				in -= avail;
 				pg_offset += avail;
 				offset += avail;
@@ -191,6 +196,7 @@
 			offset = 0;
 			put_bh(bh[k]);
 		}
+		squashfs_finish_page(output);
 	}
 
 	kfree(bh);
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index af0b738..1cb70a0 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -56,6 +56,7 @@
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
 #include "squashfs.h"
+#include "page_actor.h"
 
 /*
  * Look-up block in cache, and increment usage count.  If not in cache, read
@@ -119,9 +120,8 @@
 			entry->error = 0;
 			spin_unlock(&cache->lock);
 
-			entry->length = squashfs_read_data(sb, entry->data,
-				block, length, &entry->next_index,
-				cache->block_size, cache->pages);
+			entry->length = squashfs_read_data(sb, block, length,
+				&entry->next_index, entry->actor);
 
 			spin_lock(&cache->lock);
 
@@ -220,6 +220,7 @@
 				kfree(cache->entry[i].data[j]);
 			kfree(cache->entry[i].data);
 		}
+		kfree(cache->entry[i].actor);
 	}
 
 	kfree(cache->entry);
@@ -280,6 +281,13 @@
 				goto cleanup;
 			}
 		}
+
+		entry->actor = squashfs_page_actor_init(entry->data,
+						cache->pages, 0);
+		if (entry->actor == NULL) {
+			ERROR("Failed to allocate %s cache entry\n", name);
+			goto cleanup;
+		}
 	}
 
 	return cache;
@@ -410,6 +418,7 @@
 	int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	int i, res;
 	void *table, *buffer, **data;
+	struct squashfs_page_actor *actor;
 
 	table = buffer = kmalloc(length, GFP_KERNEL);
 	if (table == NULL)
@@ -421,19 +430,28 @@
 		goto failed;
 	}
 
+	actor = squashfs_page_actor_init(data, pages, length);
+	if (actor == NULL) {
+		res = -ENOMEM;
+		goto failed2;
+	}
+
 	for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE)
 		data[i] = buffer;
 
-	res = squashfs_read_data(sb, data, block, length |
-		SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages);
+	res = squashfs_read_data(sb, block, length |
+		SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor);
 
 	kfree(data);
+	kfree(actor);
 
 	if (res < 0)
 		goto failed;
 
 	return table;
 
+failed2:
+	kfree(data);
 failed:
 	kfree(table);
 	return ERR_PTR(res);
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index 3f6271d..ac22fe7 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -30,6 +30,7 @@
 #include "squashfs_fs_sb.h"
 #include "decompressor.h"
 #include "squashfs.h"
+#include "page_actor.h"
 
 /*
  * This file (and decompressor.h) implements a decompressor framework for
@@ -37,29 +38,29 @@
  */
 
 static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = {
-	NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0
+	NULL, NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0
 };
 
 #ifndef CONFIG_SQUASHFS_LZO
 static const struct squashfs_decompressor squashfs_lzo_comp_ops = {
-	NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0
+	NULL, NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0
 };
 #endif
 
 #ifndef CONFIG_SQUASHFS_XZ
 static const struct squashfs_decompressor squashfs_xz_comp_ops = {
-	NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0
+	NULL, NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0
 };
 #endif
 
 #ifndef CONFIG_SQUASHFS_ZLIB
 static const struct squashfs_decompressor squashfs_zlib_comp_ops = {
-	NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0
+	NULL, NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0
 };
 #endif
 
 static const struct squashfs_decompressor squashfs_unknown_comp_ops = {
-	NULL, NULL, NULL, 0, "unknown", 0
+	NULL, NULL, NULL, NULL, 0, "unknown", 0
 };
 
 static const struct squashfs_decompressor *decompressor[] = {
@@ -83,10 +84,11 @@
 }
 
 
-void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags)
+static void *get_comp_opts(struct super_block *sb, unsigned short flags)
 {
 	struct squashfs_sb_info *msblk = sb->s_fs_info;
-	void *strm, *buffer = NULL;
+	void *buffer = NULL, *comp_opts;
+	struct squashfs_page_actor *actor = NULL;
 	int length = 0;
 
 	/*
@@ -94,23 +96,46 @@
 	 */
 	if (SQUASHFS_COMP_OPTS(flags)) {
 		buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
-		if (buffer == NULL)
-			return ERR_PTR(-ENOMEM);
+		if (buffer == NULL) {
+			comp_opts = ERR_PTR(-ENOMEM);
+			goto out;
+		}
 
-		length = squashfs_read_data(sb, &buffer,
-			sizeof(struct squashfs_super_block), 0, NULL,
-			PAGE_CACHE_SIZE, 1);
+		actor = squashfs_page_actor_init(&buffer, 1, 0);
+		if (actor == NULL) {
+			comp_opts = ERR_PTR(-ENOMEM);
+			goto out;
+		}
+
+		length = squashfs_read_data(sb,
+			sizeof(struct squashfs_super_block), 0, NULL, actor);
 
 		if (length < 0) {
-			strm = ERR_PTR(length);
-			goto finished;
+			comp_opts = ERR_PTR(length);
+			goto out;
 		}
 	}
 
-	strm = msblk->decompressor->init(msblk, buffer, length);
+	comp_opts = squashfs_comp_opts(msblk, buffer, length);
 
-finished:
+out:
+	kfree(actor);
 	kfree(buffer);
+	return comp_opts;
+}
 
-	return strm;
+
+void *squashfs_decompressor_setup(struct super_block *sb, unsigned short flags)
+{
+	struct squashfs_sb_info *msblk = sb->s_fs_info;
+	void *stream, *comp_opts = get_comp_opts(sb, flags);
+
+	if (IS_ERR(comp_opts))
+		return comp_opts;
+
+	stream = squashfs_decompressor_create(msblk, comp_opts);
+	if (IS_ERR(stream))
+		kfree(comp_opts);
+
+	return stream;
 }
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h
index 330073e..af09853 100644
--- a/fs/squashfs/decompressor.h
+++ b/fs/squashfs/decompressor.h
@@ -24,28 +24,22 @@
  */
 
 struct squashfs_decompressor {
-	void	*(*init)(struct squashfs_sb_info *, void *, int);
+	void	*(*init)(struct squashfs_sb_info *, void *);
+	void	*(*comp_opts)(struct squashfs_sb_info *, void *, int);
 	void	(*free)(void *);
-	int	(*decompress)(struct squashfs_sb_info *, void **,
-		struct buffer_head **, int, int, int, int, int);
+	int	(*decompress)(struct squashfs_sb_info *, void *,
+		struct buffer_head **, int, int, int,
+		struct squashfs_page_actor *);
 	int	id;
 	char	*name;
 	int	supported;
 };
 
-static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk,
-	void *s)
+static inline void *squashfs_comp_opts(struct squashfs_sb_info *msblk,
+							void *buff, int length)
 {
-	if (msblk->decompressor)
-		msblk->decompressor->free(s);
-}
-
-static inline int squashfs_decompress(struct squashfs_sb_info *msblk,
-	void **buffer, struct buffer_head **bh, int b, int offset, int length,
-	int srclength, int pages)
-{
-	return msblk->decompressor->decompress(msblk, buffer, bh, b, offset,
-		length, srclength, pages);
+	return msblk->decompressor->comp_opts ?
+		msblk->decompressor->comp_opts(msblk, buff, length) : NULL;
 }
 
 #ifdef CONFIG_SQUASHFS_XZ
diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c
new file mode 100644
index 0000000..d6008a6
--- /dev/null
+++ b/fs/squashfs/decompressor_multi.c
@@ -0,0 +1,198 @@
+/*
+ *  Copyright (c) 2013
+ *  Minchan Kim <minchan@kernel.org>
+ *
+ *  This work is licensed under the terms of the GNU GPL, version 2. See
+ *  the COPYING file in the top-level directory.
+ */
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/buffer_head.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/cpumask.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "decompressor.h"
+#include "squashfs.h"
+
+/*
+ * This file implements multi-threaded decompression in the
+ * decompressor framework
+ */
+
+
+/*
+ * The reason that multiply two is that a CPU can request new I/O
+ * while it is waiting previous request.
+ */
+#define MAX_DECOMPRESSOR	(num_online_cpus() * 2)
+
+
+int squashfs_max_decompressors(void)
+{
+	return MAX_DECOMPRESSOR;
+}
+
+
+struct squashfs_stream {
+	void			*comp_opts;
+	struct list_head	strm_list;
+	struct mutex		mutex;
+	int			avail_decomp;
+	wait_queue_head_t	wait;
+};
+
+
+struct decomp_stream {
+	void *stream;
+	struct list_head list;
+};
+
+
+static void put_decomp_stream(struct decomp_stream *decomp_strm,
+				struct squashfs_stream *stream)
+{
+	mutex_lock(&stream->mutex);
+	list_add(&decomp_strm->list, &stream->strm_list);
+	mutex_unlock(&stream->mutex);
+	wake_up(&stream->wait);
+}
+
+void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+				void *comp_opts)
+{
+	struct squashfs_stream *stream;
+	struct decomp_stream *decomp_strm = NULL;
+	int err = -ENOMEM;
+
+	stream = kzalloc(sizeof(*stream), GFP_KERNEL);
+	if (!stream)
+		goto out;
+
+	stream->comp_opts = comp_opts;
+	mutex_init(&stream->mutex);
+	INIT_LIST_HEAD(&stream->strm_list);
+	init_waitqueue_head(&stream->wait);
+
+	/*
+	 * We should have a decompressor at least as default
+	 * so if we fail to allocate new decompressor dynamically,
+	 * we could always fall back to default decompressor and
+	 * file system works.
+	 */
+	decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL);
+	if (!decomp_strm)
+		goto out;
+
+	decomp_strm->stream = msblk->decompressor->init(msblk,
+						stream->comp_opts);
+	if (IS_ERR(decomp_strm->stream)) {
+		err = PTR_ERR(decomp_strm->stream);
+		goto out;
+	}
+
+	list_add(&decomp_strm->list, &stream->strm_list);
+	stream->avail_decomp = 1;
+	return stream;
+
+out:
+	kfree(decomp_strm);
+	kfree(stream);
+	return ERR_PTR(err);
+}
+
+
+void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+{
+	struct squashfs_stream *stream = msblk->stream;
+	if (stream) {
+		struct decomp_stream *decomp_strm;
+
+		while (!list_empty(&stream->strm_list)) {
+			decomp_strm = list_entry(stream->strm_list.prev,
+						struct decomp_stream, list);
+			list_del(&decomp_strm->list);
+			msblk->decompressor->free(decomp_strm->stream);
+			kfree(decomp_strm);
+			stream->avail_decomp--;
+		}
+		WARN_ON(stream->avail_decomp);
+		kfree(stream->comp_opts);
+		kfree(stream);
+	}
+}
+
+
+static struct decomp_stream *get_decomp_stream(struct squashfs_sb_info *msblk,
+					struct squashfs_stream *stream)
+{
+	struct decomp_stream *decomp_strm;
+
+	while (1) {
+		mutex_lock(&stream->mutex);
+
+		/* There is available decomp_stream */
+		if (!list_empty(&stream->strm_list)) {
+			decomp_strm = list_entry(stream->strm_list.prev,
+				struct decomp_stream, list);
+			list_del(&decomp_strm->list);
+			mutex_unlock(&stream->mutex);
+			break;
+		}
+
+		/*
+		 * If there is no available decomp and already full,
+		 * let's wait for releasing decomp from other users.
+		 */
+		if (stream->avail_decomp >= MAX_DECOMPRESSOR)
+			goto wait;
+
+		/* Let's allocate new decomp */
+		decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL);
+		if (!decomp_strm)
+			goto wait;
+
+		decomp_strm->stream = msblk->decompressor->init(msblk,
+						stream->comp_opts);
+		if (IS_ERR(decomp_strm->stream)) {
+			kfree(decomp_strm);
+			goto wait;
+		}
+
+		stream->avail_decomp++;
+		WARN_ON(stream->avail_decomp > MAX_DECOMPRESSOR);
+
+		mutex_unlock(&stream->mutex);
+		break;
+wait:
+		/*
+		 * If system memory is tough, let's for other's
+		 * releasing instead of hurting VM because it could
+		 * make page cache thrashing.
+		 */
+		mutex_unlock(&stream->mutex);
+		wait_event(stream->wait,
+			!list_empty(&stream->strm_list));
+	}
+
+	return decomp_strm;
+}
+
+
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
+	int b, int offset, int length, struct squashfs_page_actor *output)
+{
+	int res;
+	struct squashfs_stream *stream = msblk->stream;
+	struct decomp_stream *decomp_stream = get_decomp_stream(msblk, stream);
+	res = msblk->decompressor->decompress(msblk, decomp_stream->stream,
+		bh, b, offset, length, output);
+	put_decomp_stream(decomp_stream, stream);
+	if (res < 0)
+		ERROR("%s decompression failed, data probably corrupt\n",
+			msblk->decompressor->name);
+	return res;
+}
diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c
new file mode 100644
index 0000000..23a9c28
--- /dev/null
+++ b/fs/squashfs/decompressor_multi_percpu.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/percpu.h>
+#include <linux/buffer_head.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "decompressor.h"
+#include "squashfs.h"
+
+/*
+ * This file implements multi-threaded decompression using percpu
+ * variables, one thread per cpu core.
+ */
+
+struct squashfs_stream {
+	void		*stream;
+};
+
+void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+						void *comp_opts)
+{
+	struct squashfs_stream *stream;
+	struct squashfs_stream __percpu *percpu;
+	int err, cpu;
+
+	percpu = alloc_percpu(struct squashfs_stream);
+	if (percpu == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	for_each_possible_cpu(cpu) {
+		stream = per_cpu_ptr(percpu, cpu);
+		stream->stream = msblk->decompressor->init(msblk, comp_opts);
+		if (IS_ERR(stream->stream)) {
+			err = PTR_ERR(stream->stream);
+			goto out;
+		}
+	}
+
+	kfree(comp_opts);
+	return (__force void *) percpu;
+
+out:
+	for_each_possible_cpu(cpu) {
+		stream = per_cpu_ptr(percpu, cpu);
+		if (!IS_ERR_OR_NULL(stream->stream))
+			msblk->decompressor->free(stream->stream);
+	}
+	free_percpu(percpu);
+	return ERR_PTR(err);
+}
+
+void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+{
+	struct squashfs_stream __percpu *percpu =
+			(struct squashfs_stream __percpu *) msblk->stream;
+	struct squashfs_stream *stream;
+	int cpu;
+
+	if (msblk->stream) {
+		for_each_possible_cpu(cpu) {
+			stream = per_cpu_ptr(percpu, cpu);
+			msblk->decompressor->free(stream->stream);
+		}
+		free_percpu(percpu);
+	}
+}
+
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
+	int b, int offset, int length, struct squashfs_page_actor *output)
+{
+	struct squashfs_stream __percpu *percpu =
+			(struct squashfs_stream __percpu *) msblk->stream;
+	struct squashfs_stream *stream = get_cpu_ptr(percpu);
+	int res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
+		offset, length, output);
+	put_cpu_ptr(stream);
+
+	if (res < 0)
+		ERROR("%s decompression failed, data probably corrupt\n",
+			msblk->decompressor->name);
+
+	return res;
+}
+
+int squashfs_max_decompressors(void)
+{
+	return num_possible_cpus();
+}
diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c
new file mode 100644
index 0000000..a6c7592
--- /dev/null
+++ b/fs/squashfs/decompressor_single.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/buffer_head.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "decompressor.h"
+#include "squashfs.h"
+
+/*
+ * This file implements single-threaded decompression in the
+ * decompressor framework
+ */
+
+struct squashfs_stream {
+	void		*stream;
+	struct mutex	mutex;
+};
+
+void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+						void *comp_opts)
+{
+	struct squashfs_stream *stream;
+	int err = -ENOMEM;
+
+	stream = kmalloc(sizeof(*stream), GFP_KERNEL);
+	if (stream == NULL)
+		goto out;
+
+	stream->stream = msblk->decompressor->init(msblk, comp_opts);
+	if (IS_ERR(stream->stream)) {
+		err = PTR_ERR(stream->stream);
+		goto out;
+	}
+
+	kfree(comp_opts);
+	mutex_init(&stream->mutex);
+	return stream;
+
+out:
+	kfree(stream);
+	return ERR_PTR(err);
+}
+
+void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+{
+	struct squashfs_stream *stream = msblk->stream;
+
+	if (stream) {
+		msblk->decompressor->free(stream->stream);
+		kfree(stream);
+	}
+}
+
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
+	int b, int offset, int length, struct squashfs_page_actor *output)
+{
+	int res;
+	struct squashfs_stream *stream = msblk->stream;
+
+	mutex_lock(&stream->mutex);
+	res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
+		offset, length, output);
+	mutex_unlock(&stream->mutex);
+
+	if (res < 0)
+		ERROR("%s decompression failed, data probably corrupt\n",
+			msblk->decompressor->name);
+
+	return res;
+}
+
+int squashfs_max_decompressors(void)
+{
+	return 1;
+}
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 8ca62c2..e5c9689 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -370,77 +370,15 @@
 	return le32_to_cpu(size);
 }
 
-
-static int squashfs_readpage(struct file *file, struct page *page)
+/* Copy data into page cache  */
+void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
+	int bytes, int offset)
 {
 	struct inode *inode = page->mapping->host;
 	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
-	int bytes, i, offset = 0, sparse = 0;
-	struct squashfs_cache_entry *buffer = NULL;
 	void *pageaddr;
-
-	int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
-	int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
-	int start_index = page->index & ~mask;
-	int end_index = start_index | mask;
-	int file_end = i_size_read(inode) >> msblk->block_log;
-
-	TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
-				page->index, squashfs_i(inode)->start);
-
-	if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
-					PAGE_CACHE_SHIFT))
-		goto out;
-
-	if (index < file_end || squashfs_i(inode)->fragment_block ==
-					SQUASHFS_INVALID_BLK) {
-		/*
-		 * Reading a datablock from disk.  Need to read block list
-		 * to get location and block size.
-		 */
-		u64 block = 0;
-		int bsize = read_blocklist(inode, index, &block);
-		if (bsize < 0)
-			goto error_out;
-
-		if (bsize == 0) { /* hole */
-			bytes = index == file_end ?
-				(i_size_read(inode) & (msblk->block_size - 1)) :
-				 msblk->block_size;
-			sparse = 1;
-		} else {
-			/*
-			 * Read and decompress datablock.
-			 */
-			buffer = squashfs_get_datablock(inode->i_sb,
-								block, bsize);
-			if (buffer->error) {
-				ERROR("Unable to read page, block %llx, size %x"
-					"\n", block, bsize);
-				squashfs_cache_put(buffer);
-				goto error_out;
-			}
-			bytes = buffer->length;
-		}
-	} else {
-		/*
-		 * Datablock is stored inside a fragment (tail-end packed
-		 * block).
-		 */
-		buffer = squashfs_get_fragment(inode->i_sb,
-				squashfs_i(inode)->fragment_block,
-				squashfs_i(inode)->fragment_size);
-
-		if (buffer->error) {
-			ERROR("Unable to read page, block %llx, size %x\n",
-				squashfs_i(inode)->fragment_block,
-				squashfs_i(inode)->fragment_size);
-			squashfs_cache_put(buffer);
-			goto error_out;
-		}
-		bytes = i_size_read(inode) & (msblk->block_size - 1);
-		offset = squashfs_i(inode)->fragment_offset;
-	}
+	int i, mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+	int start_index = page->index & ~mask, end_index = start_index | mask;
 
 	/*
 	 * Loop copying datablock into pages.  As the datablock likely covers
@@ -451,7 +389,7 @@
 	for (i = start_index; i <= end_index && bytes > 0; i++,
 			bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
 		struct page *push_page;
-		int avail = sparse ? 0 : min_t(int, bytes, PAGE_CACHE_SIZE);
+		int avail = buffer ? min_t(int, bytes, PAGE_CACHE_SIZE) : 0;
 
 		TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail);
 
@@ -475,11 +413,75 @@
 		if (i != page->index)
 			page_cache_release(push_page);
 	}
+}
 
-	if (!sparse)
-		squashfs_cache_put(buffer);
+/* Read datablock stored packed inside a fragment (tail-end packed block) */
+static int squashfs_readpage_fragment(struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+	struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb,
+		squashfs_i(inode)->fragment_block,
+		squashfs_i(inode)->fragment_size);
+	int res = buffer->error;
 
+	if (res)
+		ERROR("Unable to read page, block %llx, size %x\n",
+			squashfs_i(inode)->fragment_block,
+			squashfs_i(inode)->fragment_size);
+	else
+		squashfs_copy_cache(page, buffer, i_size_read(inode) &
+			(msblk->block_size - 1),
+			squashfs_i(inode)->fragment_offset);
+
+	squashfs_cache_put(buffer);
+	return res;
+}
+
+static int squashfs_readpage_sparse(struct page *page, int index, int file_end)
+{
+	struct inode *inode = page->mapping->host;
+	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+	int bytes = index == file_end ?
+			(i_size_read(inode) & (msblk->block_size - 1)) :
+			 msblk->block_size;
+
+	squashfs_copy_cache(page, NULL, bytes, 0);
 	return 0;
+}
+
+static int squashfs_readpage(struct file *file, struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+	int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
+	int file_end = i_size_read(inode) >> msblk->block_log;
+	int res;
+	void *pageaddr;
+
+	TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
+				page->index, squashfs_i(inode)->start);
+
+	if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
+					PAGE_CACHE_SHIFT))
+		goto out;
+
+	if (index < file_end || squashfs_i(inode)->fragment_block ==
+					SQUASHFS_INVALID_BLK) {
+		u64 block = 0;
+		int bsize = read_blocklist(inode, index, &block);
+		if (bsize < 0)
+			goto error_out;
+
+		if (bsize == 0)
+			res = squashfs_readpage_sparse(page, index, file_end);
+		else
+			res = squashfs_readpage_block(page, block, bsize);
+	} else
+		res = squashfs_readpage_fragment(page);
+
+	if (!res)
+		return 0;
 
 error_out:
 	SetPageError(page);
diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c
new file mode 100644
index 0000000..f2310d2
--- /dev/null
+++ b/fs/squashfs/file_cache.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/pagemap.h>
+#include <linux/mutex.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+
+/* Read separately compressed datablock and memcopy into page cache */
+int squashfs_readpage_block(struct page *page, u64 block, int bsize)
+{
+	struct inode *i = page->mapping->host;
+	struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
+		block, bsize);
+	int res = buffer->error;
+
+	if (res)
+		ERROR("Unable to read page, block %llx, size %x\n", block,
+			bsize);
+	else
+		squashfs_copy_cache(page, buffer, buffer->length, 0);
+
+	squashfs_cache_put(buffer);
+	return res;
+}
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
new file mode 100644
index 0000000..2943b2b
--- /dev/null
+++ b/fs/squashfs/file_direct.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/pagemap.h>
+#include <linux/mutex.h>
+
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+#include "page_actor.h"
+
+static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
+	int pages, struct page **page);
+
+/* Read separately compressed datablock directly into page cache */
+int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
+
+{
+	struct inode *inode = target_page->mapping->host;
+	struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+
+	int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+	int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+	int start_index = target_page->index & ~mask;
+	int end_index = start_index | mask;
+	int i, n, pages, missing_pages, bytes, res = -ENOMEM;
+	struct page **page;
+	struct squashfs_page_actor *actor;
+	void *pageaddr;
+
+	if (end_index > file_end)
+		end_index = file_end;
+
+	pages = end_index - start_index + 1;
+
+	page = kmalloc(sizeof(void *) * pages, GFP_KERNEL);
+	if (page == NULL)
+		return res;
+
+	/*
+	 * Create a "page actor" which will kmap and kunmap the
+	 * page cache pages appropriately within the decompressor
+	 */
+	actor = squashfs_page_actor_init_special(page, pages, 0);
+	if (actor == NULL)
+		goto out;
+
+	/* Try to grab all the pages covered by the Squashfs block */
+	for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) {
+		page[i] = (n == target_page->index) ? target_page :
+			grab_cache_page_nowait(target_page->mapping, n);
+
+		if (page[i] == NULL) {
+			missing_pages++;
+			continue;
+		}
+
+		if (PageUptodate(page[i])) {
+			unlock_page(page[i]);
+			page_cache_release(page[i]);
+			page[i] = NULL;
+			missing_pages++;
+		}
+	}
+
+	if (missing_pages) {
+		/*
+		 * Couldn't get one or more pages, this page has either
+		 * been VM reclaimed, but others are still in the page cache
+		 * and uptodate, or we're racing with another thread in
+		 * squashfs_readpage also trying to grab them.  Fall back to
+		 * using an intermediate buffer.
+		 */
+		res = squashfs_read_cache(target_page, block, bsize, pages,
+								page);
+		goto out;
+	}
+
+	/* Decompress directly into the page cache buffers */
+	res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
+	if (res < 0)
+		goto mark_errored;
+
+	/* Last page may have trailing bytes not filled */
+	bytes = res % PAGE_CACHE_SIZE;
+	if (bytes) {
+		pageaddr = kmap_atomic(page[pages - 1]);
+		memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
+		kunmap_atomic(pageaddr);
+	}
+
+	/* Mark pages as uptodate, unlock and release */
+	for (i = 0; i < pages; i++) {
+		flush_dcache_page(page[i]);
+		SetPageUptodate(page[i]);
+		unlock_page(page[i]);
+		if (page[i] != target_page)
+			page_cache_release(page[i]);
+	}
+
+	kfree(actor);
+	kfree(page);
+
+	return 0;
+
+mark_errored:
+	/* Decompression failed, mark pages as errored.  Target_page is
+	 * dealt with by the caller
+	 */
+	for (i = 0; i < pages; i++) {
+		if (page[i] == target_page)
+			continue;
+		flush_dcache_page(page[i]);
+		SetPageError(page[i]);
+		unlock_page(page[i]);
+		page_cache_release(page[i]);
+	}
+
+out:
+	kfree(actor);
+	kfree(page);
+	return res;
+}
+
+
+static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
+	int pages, struct page **page)
+{
+	struct inode *i = target_page->mapping->host;
+	struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
+						 block, bsize);
+	int bytes = buffer->length, res = buffer->error, n, offset = 0;
+	void *pageaddr;
+
+	if (res) {
+		ERROR("Unable to read page, block %llx, size %x\n", block,
+			bsize);
+		goto out;
+	}
+
+	for (n = 0; n < pages && bytes > 0; n++,
+			bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
+		int avail = min_t(int, bytes, PAGE_CACHE_SIZE);
+
+		if (page[n] == NULL)
+			continue;
+
+		pageaddr = kmap_atomic(page[n]);
+		squashfs_copy_data(pageaddr, buffer, offset, avail);
+		memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
+		kunmap_atomic(pageaddr);
+		flush_dcache_page(page[n]);
+		SetPageUptodate(page[n]);
+		unlock_page(page[n]);
+		if (page[n] != target_page)
+			page_cache_release(page[n]);
+	}
+
+out:
+	squashfs_cache_put(buffer);
+	return res;
+}
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
index 00f4dfc..244b9fb 100644
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c
@@ -31,13 +31,14 @@
 #include "squashfs_fs_sb.h"
 #include "squashfs.h"
 #include "decompressor.h"
+#include "page_actor.h"
 
 struct squashfs_lzo {
 	void	*input;
 	void	*output;
 };
 
-static void *lzo_init(struct squashfs_sb_info *msblk, void *buff, int len)
+static void *lzo_init(struct squashfs_sb_info *msblk, void *buff)
 {
 	int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE);
 
@@ -74,22 +75,16 @@
 }
 
 
-static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer,
-	struct buffer_head **bh, int b, int offset, int length, int srclength,
-	int pages)
+static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm,
+	struct buffer_head **bh, int b, int offset, int length,
+	struct squashfs_page_actor *output)
 {
-	struct squashfs_lzo *stream = msblk->stream;
-	void *buff = stream->input;
+	struct squashfs_lzo *stream = strm;
+	void *buff = stream->input, *data;
 	int avail, i, bytes = length, res;
-	size_t out_len = srclength;
-
-	mutex_lock(&msblk->read_data_mutex);
+	size_t out_len = output->length;
 
 	for (i = 0; i < b; i++) {
-		wait_on_buffer(bh[i]);
-		if (!buffer_uptodate(bh[i]))
-			goto block_release;
-
 		avail = min(bytes, msblk->devblksize - offset);
 		memcpy(buff, bh[i]->b_data + offset, avail);
 		buff += avail;
@@ -104,24 +99,24 @@
 		goto failed;
 
 	res = bytes = (int)out_len;
-	for (i = 0, buff = stream->output; bytes && i < pages; i++) {
-		avail = min_t(int, bytes, PAGE_CACHE_SIZE);
-		memcpy(buffer[i], buff, avail);
-		buff += avail;
-		bytes -= avail;
+	data = squashfs_first_page(output);
+	buff = stream->output;
+	while (data) {
+		if (bytes <= PAGE_CACHE_SIZE) {
+			memcpy(data, buff, bytes);
+			break;
+		} else {
+			memcpy(data, buff, PAGE_CACHE_SIZE);
+			buff += PAGE_CACHE_SIZE;
+			bytes -= PAGE_CACHE_SIZE;
+			data = squashfs_next_page(output);
+		}
 	}
+	squashfs_finish_page(output);
 
-	mutex_unlock(&msblk->read_data_mutex);
 	return res;
 
-block_release:
-	for (; i < b; i++)
-		put_bh(bh[i]);
-
 failed:
-	mutex_unlock(&msblk->read_data_mutex);
-
-	ERROR("lzo decompression failed, data probably corrupt\n");
 	return -EIO;
 }
 
diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c
new file mode 100644
index 0000000..5a1c11f
--- /dev/null
+++ b/fs/squashfs/page_actor.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include "page_actor.h"
+
+/*
+ * This file contains implementations of page_actor for decompressing into
+ * an intermediate buffer, and for decompressing directly into the
+ * page cache.
+ *
+ * Calling code should avoid sleeping between calls to squashfs_first_page()
+ * and squashfs_finish_page().
+ */
+
+/* Implementation of page_actor for decompressing into intermediate buffer */
+static void *cache_first_page(struct squashfs_page_actor *actor)
+{
+	actor->next_page = 1;
+	return actor->buffer[0];
+}
+
+static void *cache_next_page(struct squashfs_page_actor *actor)
+{
+	if (actor->next_page == actor->pages)
+		return NULL;
+
+	return actor->buffer[actor->next_page++];
+}
+
+static void cache_finish_page(struct squashfs_page_actor *actor)
+{
+	/* empty */
+}
+
+struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
+	int pages, int length)
+{
+	struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
+
+	if (actor == NULL)
+		return NULL;
+
+	actor->length = length ? : pages * PAGE_CACHE_SIZE;
+	actor->buffer = buffer;
+	actor->pages = pages;
+	actor->next_page = 0;
+	actor->squashfs_first_page = cache_first_page;
+	actor->squashfs_next_page = cache_next_page;
+	actor->squashfs_finish_page = cache_finish_page;
+	return actor;
+}
+
+/* Implementation of page_actor for decompressing directly into page cache. */
+static void *direct_first_page(struct squashfs_page_actor *actor)
+{
+	actor->next_page = 1;
+	return actor->pageaddr = kmap_atomic(actor->page[0]);
+}
+
+static void *direct_next_page(struct squashfs_page_actor *actor)
+{
+	if (actor->pageaddr)
+		kunmap_atomic(actor->pageaddr);
+
+	return actor->pageaddr = actor->next_page == actor->pages ? NULL :
+		kmap_atomic(actor->page[actor->next_page++]);
+}
+
+static void direct_finish_page(struct squashfs_page_actor *actor)
+{
+	if (actor->pageaddr)
+		kunmap_atomic(actor->pageaddr);
+}
+
+struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page,
+	int pages, int length)
+{
+	struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
+
+	if (actor == NULL)
+		return NULL;
+
+	actor->length = length ? : pages * PAGE_CACHE_SIZE;
+	actor->page = page;
+	actor->pages = pages;
+	actor->next_page = 0;
+	actor->pageaddr = NULL;
+	actor->squashfs_first_page = direct_first_page;
+	actor->squashfs_next_page = direct_next_page;
+	actor->squashfs_finish_page = direct_finish_page;
+	return actor;
+}
diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
new file mode 100644
index 0000000..26dd820
--- /dev/null
+++ b/fs/squashfs/page_actor.h
@@ -0,0 +1,81 @@
+#ifndef PAGE_ACTOR_H
+#define PAGE_ACTOR_H
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef CONFIG_SQUASHFS_FILE_DIRECT
+struct squashfs_page_actor {
+	void	**page;
+	int	pages;
+	int	length;
+	int	next_page;
+};
+
+static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page,
+	int pages, int length)
+{
+	struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
+
+	if (actor == NULL)
+		return NULL;
+
+	actor->length = length ? : pages * PAGE_CACHE_SIZE;
+	actor->page = page;
+	actor->pages = pages;
+	actor->next_page = 0;
+	return actor;
+}
+
+static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
+{
+	actor->next_page = 1;
+	return actor->page[0];
+}
+
+static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
+{
+	return actor->next_page == actor->pages ? NULL :
+		actor->page[actor->next_page++];
+}
+
+static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
+{
+	/* empty */
+}
+#else
+struct squashfs_page_actor {
+	union {
+		void		**buffer;
+		struct page	**page;
+	};
+	void	*pageaddr;
+	void    *(*squashfs_first_page)(struct squashfs_page_actor *);
+	void    *(*squashfs_next_page)(struct squashfs_page_actor *);
+	void    (*squashfs_finish_page)(struct squashfs_page_actor *);
+	int	pages;
+	int	length;
+	int	next_page;
+};
+
+extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int);
+extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page
+							 **, int, int);
+static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
+{
+	return actor->squashfs_first_page(actor);
+}
+static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
+{
+	return actor->squashfs_next_page(actor);
+}
+static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
+{
+	actor->squashfs_finish_page(actor);
+}
+#endif
+#endif
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index d126651..9e1bb79 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -28,8 +28,8 @@
 #define WARNING(s, args...)	pr_warning("SQUASHFS: "s, ## args)
 
 /* block.c */
-extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *,
-				int, int);
+extern int squashfs_read_data(struct super_block *, u64, int, u64 *,
+				struct squashfs_page_actor *);
 
 /* cache.c */
 extern struct squashfs_cache *squashfs_cache_init(char *, int, int);
@@ -48,7 +48,14 @@
 
 /* decompressor.c */
 extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int);
-extern void *squashfs_decompressor_init(struct super_block *, unsigned short);
+extern void *squashfs_decompressor_setup(struct super_block *, unsigned short);
+
+/* decompressor_xxx.c */
+extern void *squashfs_decompressor_create(struct squashfs_sb_info *, void *);
+extern void squashfs_decompressor_destroy(struct squashfs_sb_info *);
+extern int squashfs_decompress(struct squashfs_sb_info *, struct buffer_head **,
+	int, int, int, struct squashfs_page_actor *);
+extern int squashfs_max_decompressors(void);
 
 /* export.c */
 extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64,
@@ -59,6 +66,13 @@
 extern __le64 *squashfs_read_fragment_index_table(struct super_block *,
 				u64, u64, unsigned int);
 
+/* file.c */
+void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int,
+				int);
+
+/* file_xxx.c */
+extern int squashfs_readpage_block(struct page *, u64, int);
+
 /* id.c */
 extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *);
 extern __le64 *squashfs_read_id_index_table(struct super_block *, u64, u64,
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 52934a2..1da565c 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -50,6 +50,7 @@
 	wait_queue_head_t	wait_queue;
 	struct squashfs_cache	*cache;
 	void			**data;
+	struct squashfs_page_actor	*actor;
 };
 
 struct squashfs_sb_info {
@@ -63,10 +64,9 @@
 	__le64					*id_table;
 	__le64					*fragment_index;
 	__le64					*xattr_id_table;
-	struct mutex				read_data_mutex;
 	struct mutex				meta_index_mutex;
 	struct meta_index			*meta_index;
-	void					*stream;
+	struct squashfs_stream			*stream;
 	__le64					*inode_lookup_table;
 	u64					inode_table;
 	u64					directory_table;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 60553a9..202df63 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -98,7 +98,6 @@
 	msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
 	msblk->devblksize_log2 = ffz(~msblk->devblksize);
 
-	mutex_init(&msblk->read_data_mutex);
 	mutex_init(&msblk->meta_index_mutex);
 
 	/*
@@ -206,13 +205,14 @@
 		goto failed_mount;
 
 	/* Allocate read_page block */
-	msblk->read_page = squashfs_cache_init("data", 1, msblk->block_size);
+	msblk->read_page = squashfs_cache_init("data",
+		squashfs_max_decompressors(), msblk->block_size);
 	if (msblk->read_page == NULL) {
 		ERROR("Failed to allocate read_page block\n");
 		goto failed_mount;
 	}
 
-	msblk->stream = squashfs_decompressor_init(sb, flags);
+	msblk->stream = squashfs_decompressor_setup(sb, flags);
 	if (IS_ERR(msblk->stream)) {
 		err = PTR_ERR(msblk->stream);
 		msblk->stream = NULL;
@@ -336,7 +336,7 @@
 	squashfs_cache_delete(msblk->block_cache);
 	squashfs_cache_delete(msblk->fragment_cache);
 	squashfs_cache_delete(msblk->read_page);
-	squashfs_decompressor_free(msblk, msblk->stream);
+	squashfs_decompressor_destroy(msblk);
 	kfree(msblk->inode_lookup_table);
 	kfree(msblk->fragment_index);
 	kfree(msblk->id_table);
@@ -383,7 +383,7 @@
 		squashfs_cache_delete(sbi->block_cache);
 		squashfs_cache_delete(sbi->fragment_cache);
 		squashfs_cache_delete(sbi->read_page);
-		squashfs_decompressor_free(sbi, sbi->stream);
+		squashfs_decompressor_destroy(sbi);
 		kfree(sbi->id_table);
 		kfree(sbi->fragment_index);
 		kfree(sbi->meta_index);
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index 1760b7d1..c609624 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -32,44 +32,70 @@
 #include "squashfs_fs_sb.h"
 #include "squashfs.h"
 #include "decompressor.h"
+#include "page_actor.h"
 
 struct squashfs_xz {
 	struct xz_dec *state;
 	struct xz_buf buf;
 };
 
-struct comp_opts {
+struct disk_comp_opts {
 	__le32 dictionary_size;
 	__le32 flags;
 };
 
-static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff,
-	int len)
+struct comp_opts {
+	int dict_size;
+};
+
+static void *squashfs_xz_comp_opts(struct squashfs_sb_info *msblk,
+	void *buff, int len)
 {
-	struct comp_opts *comp_opts = buff;
-	struct squashfs_xz *stream;
-	int dict_size = msblk->block_size;
-	int err, n;
+	struct disk_comp_opts *comp_opts = buff;
+	struct comp_opts *opts;
+	int err = 0, n;
+
+	opts = kmalloc(sizeof(*opts), GFP_KERNEL);
+	if (opts == NULL) {
+		err = -ENOMEM;
+		goto out2;
+	}
 
 	if (comp_opts) {
 		/* check compressor options are the expected length */
 		if (len < sizeof(*comp_opts)) {
 			err = -EIO;
-			goto failed;
+			goto out;
 		}
 
-		dict_size = le32_to_cpu(comp_opts->dictionary_size);
+		opts->dict_size = le32_to_cpu(comp_opts->dictionary_size);
 
 		/* the dictionary size should be 2^n or 2^n+2^(n+1) */
-		n = ffs(dict_size) - 1;
-		if (dict_size != (1 << n) && dict_size != (1 << n) +
+		n = ffs(opts->dict_size) - 1;
+		if (opts->dict_size != (1 << n) && opts->dict_size != (1 << n) +
 						(1 << (n + 1))) {
 			err = -EIO;
-			goto failed;
+			goto out;
 		}
-	}
+	} else
+		/* use defaults */
+		opts->dict_size = max_t(int, msblk->block_size,
+							SQUASHFS_METADATA_SIZE);
 
-	dict_size = max_t(int, dict_size, SQUASHFS_METADATA_SIZE);
+	return opts;
+
+out:
+	kfree(opts);
+out2:
+	return ERR_PTR(err);
+}
+
+
+static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff)
+{
+	struct comp_opts *comp_opts = buff;
+	struct squashfs_xz *stream;
+	int err;
 
 	stream = kmalloc(sizeof(*stream), GFP_KERNEL);
 	if (stream == NULL) {
@@ -77,7 +103,7 @@
 		goto failed;
 	}
 
-	stream->state = xz_dec_init(XZ_PREALLOC, dict_size);
+	stream->state = xz_dec_init(XZ_PREALLOC, comp_opts->dict_size);
 	if (stream->state == NULL) {
 		kfree(stream);
 		err = -ENOMEM;
@@ -103,42 +129,37 @@
 }
 
 
-static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
-	struct buffer_head **bh, int b, int offset, int length, int srclength,
-	int pages)
+static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
+	struct buffer_head **bh, int b, int offset, int length,
+	struct squashfs_page_actor *output)
 {
 	enum xz_ret xz_err;
-	int avail, total = 0, k = 0, page = 0;
-	struct squashfs_xz *stream = msblk->stream;
-
-	mutex_lock(&msblk->read_data_mutex);
+	int avail, total = 0, k = 0;
+	struct squashfs_xz *stream = strm;
 
 	xz_dec_reset(stream->state);
 	stream->buf.in_pos = 0;
 	stream->buf.in_size = 0;
 	stream->buf.out_pos = 0;
 	stream->buf.out_size = PAGE_CACHE_SIZE;
-	stream->buf.out = buffer[page++];
+	stream->buf.out = squashfs_first_page(output);
 
 	do {
 		if (stream->buf.in_pos == stream->buf.in_size && k < b) {
 			avail = min(length, msblk->devblksize - offset);
 			length -= avail;
-			wait_on_buffer(bh[k]);
-			if (!buffer_uptodate(bh[k]))
-				goto release_mutex;
-
 			stream->buf.in = bh[k]->b_data + offset;
 			stream->buf.in_size = avail;
 			stream->buf.in_pos = 0;
 			offset = 0;
 		}
 
-		if (stream->buf.out_pos == stream->buf.out_size
-							&& page < pages) {
-			stream->buf.out = buffer[page++];
-			stream->buf.out_pos = 0;
-			total += PAGE_CACHE_SIZE;
+		if (stream->buf.out_pos == stream->buf.out_size) {
+			stream->buf.out = squashfs_next_page(output);
+			if (stream->buf.out != NULL) {
+				stream->buf.out_pos = 0;
+				total += PAGE_CACHE_SIZE;
+			}
 		}
 
 		xz_err = xz_dec_run(stream->state, &stream->buf);
@@ -147,23 +168,14 @@
 			put_bh(bh[k++]);
 	} while (xz_err == XZ_OK);
 
-	if (xz_err != XZ_STREAM_END) {
-		ERROR("xz_dec_run error, data probably corrupt\n");
-		goto release_mutex;
-	}
+	squashfs_finish_page(output);
 
-	if (k < b) {
-		ERROR("xz_uncompress error, input remaining\n");
-		goto release_mutex;
-	}
+	if (xz_err != XZ_STREAM_END || k < b)
+		goto out;
 
-	total += stream->buf.out_pos;
-	mutex_unlock(&msblk->read_data_mutex);
-	return total;
+	return total + stream->buf.out_pos;
 
-release_mutex:
-	mutex_unlock(&msblk->read_data_mutex);
-
+out:
 	for (; k < b; k++)
 		put_bh(bh[k]);
 
@@ -172,6 +184,7 @@
 
 const struct squashfs_decompressor squashfs_xz_comp_ops = {
 	.init = squashfs_xz_init,
+	.comp_opts = squashfs_xz_comp_opts,
 	.free = squashfs_xz_free,
 	.decompress = squashfs_xz_uncompress,
 	.id = XZ_COMPRESSION,
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 55d918f..8727cab 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -32,8 +32,9 @@
 #include "squashfs_fs_sb.h"
 #include "squashfs.h"
 #include "decompressor.h"
+#include "page_actor.h"
 
-static void *zlib_init(struct squashfs_sb_info *dummy, void *buff, int len)
+static void *zlib_init(struct squashfs_sb_info *dummy, void *buff)
 {
 	z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL);
 	if (stream == NULL)
@@ -61,44 +62,37 @@
 }
 
 
-static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
-	struct buffer_head **bh, int b, int offset, int length, int srclength,
-	int pages)
+static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
+	struct buffer_head **bh, int b, int offset, int length,
+	struct squashfs_page_actor *output)
 {
-	int zlib_err, zlib_init = 0;
-	int k = 0, page = 0;
-	z_stream *stream = msblk->stream;
+	int zlib_err, zlib_init = 0, k = 0;
+	z_stream *stream = strm;
 
-	mutex_lock(&msblk->read_data_mutex);
-
-	stream->avail_out = 0;
+	stream->avail_out = PAGE_CACHE_SIZE;
+	stream->next_out = squashfs_first_page(output);
 	stream->avail_in = 0;
 
 	do {
 		if (stream->avail_in == 0 && k < b) {
 			int avail = min(length, msblk->devblksize - offset);
 			length -= avail;
-			wait_on_buffer(bh[k]);
-			if (!buffer_uptodate(bh[k]))
-				goto release_mutex;
-
 			stream->next_in = bh[k]->b_data + offset;
 			stream->avail_in = avail;
 			offset = 0;
 		}
 
-		if (stream->avail_out == 0 && page < pages) {
-			stream->next_out = buffer[page++];
-			stream->avail_out = PAGE_CACHE_SIZE;
+		if (stream->avail_out == 0) {
+			stream->next_out = squashfs_next_page(output);
+			if (stream->next_out != NULL)
+				stream->avail_out = PAGE_CACHE_SIZE;
 		}
 
 		if (!zlib_init) {
 			zlib_err = zlib_inflateInit(stream);
 			if (zlib_err != Z_OK) {
-				ERROR("zlib_inflateInit returned unexpected "
-					"result 0x%x, srclength %d\n",
-					zlib_err, srclength);
-				goto release_mutex;
+				squashfs_finish_page(output);
+				goto out;
 			}
 			zlib_init = 1;
 		}
@@ -109,29 +103,21 @@
 			put_bh(bh[k++]);
 	} while (zlib_err == Z_OK);
 
-	if (zlib_err != Z_STREAM_END) {
-		ERROR("zlib_inflate error, data probably corrupt\n");
-		goto release_mutex;
-	}
+	squashfs_finish_page(output);
+
+	if (zlib_err != Z_STREAM_END)
+		goto out;
 
 	zlib_err = zlib_inflateEnd(stream);
-	if (zlib_err != Z_OK) {
-		ERROR("zlib_inflate error, data probably corrupt\n");
-		goto release_mutex;
-	}
+	if (zlib_err != Z_OK)
+		goto out;
 
-	if (k < b) {
-		ERROR("zlib_uncompress error, data remaining\n");
-		goto release_mutex;
-	}
+	if (k < b)
+		goto out;
 
-	length = stream->total_out;
-	mutex_unlock(&msblk->read_data_mutex);
-	return length;
+	return stream->total_out;
 
-release_mutex:
-	mutex_unlock(&msblk->read_data_mutex);
-
+out:
 	for (; k < b; k++)
 		put_bh(bh[k]);
 
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 89c60b0..7b2de02 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -431,9 +431,9 @@
 {
 	return acpi_find_child(handle, addr, false);
 }
+void acpi_preset_companion(struct device *dev, acpi_handle parent, u64 addr);
 int acpi_is_root_bridge(acpi_handle);
 struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle);
-#define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)ACPI_HANDLE(dev))
 
 int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state);
 int acpi_disable_wakeup_device_power(struct acpi_device *dev);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index b0972c4..d9099b1 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -44,6 +44,20 @@
 #include <acpi/acpi_numa.h>
 #include <asm/acpi.h>
 
+static inline acpi_handle acpi_device_handle(struct acpi_device *adev)
+{
+	return adev ? adev->handle : NULL;
+}
+
+#define ACPI_COMPANION(dev)		((dev)->acpi_node.companion)
+#define ACPI_COMPANION_SET(dev, adev)	ACPI_COMPANION(dev) = (adev)
+#define ACPI_HANDLE(dev)		acpi_device_handle(ACPI_COMPANION(dev))
+
+static inline const char *acpi_dev_name(struct acpi_device *adev)
+{
+	return dev_name(&adev->dev);
+}
+
 enum acpi_irq_model_id {
 	ACPI_IRQ_MODEL_PIC = 0,
 	ACPI_IRQ_MODEL_IOAPIC,
@@ -401,6 +415,15 @@
 
 #define acpi_disabled 1
 
+#define ACPI_COMPANION(dev)		(NULL)
+#define ACPI_COMPANION_SET(dev, adev)	do { } while (0)
+#define ACPI_HANDLE(dev)		(NULL)
+
+static inline const char *acpi_dev_name(struct acpi_device *adev)
+{
+	return NULL;
+}
+
 static inline void acpi_early_init(void) { }
 
 static inline int early_acpi_boot_init(void)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f26ec20f..1b135d4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -505,6 +505,9 @@
 				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
 				 (1 << QUEUE_FLAG_ADD_RANDOM))
 
+#define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
+				 (1 << QUEUE_FLAG_SAME_COMP))
+
 static inline void queue_lockdep_assert_held(struct request_queue *q)
 {
 	if (q->queue_lock)
diff --git a/include/linux/device.h b/include/linux/device.h
index b025925..952b010 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -644,9 +644,11 @@
 	unsigned long segment_boundary_mask;
 };
 
+struct acpi_device;
+
 struct acpi_dev_node {
 #ifdef CONFIG_ACPI
-	void	*handle;
+	struct acpi_device *companion;
 #endif
 };
 
@@ -790,14 +792,6 @@
 	return container_of(kobj, struct device, kobj);
 }
 
-#ifdef CONFIG_ACPI
-#define ACPI_HANDLE(dev)	((dev)->acpi_node.handle)
-#define ACPI_HANDLE_SET(dev, _handle_)	(dev)->acpi_node.handle = (_handle_)
-#else
-#define ACPI_HANDLE(dev)	(NULL)
-#define ACPI_HANDLE_SET(dev, _handle_)	do { } while (0)
-#endif
-
 /* Get the wakeup routines, which depend on struct device */
 #include <linux/pm_wakeup.h>
 
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 0bc7275..41cf0c3 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -45,13 +45,13 @@
 
 /**
  * enum dma_status - DMA transaction status
- * @DMA_SUCCESS: transaction completed successfully
+ * @DMA_COMPLETE: transaction completed
  * @DMA_IN_PROGRESS: transaction not yet processed
  * @DMA_PAUSED: transaction is paused
  * @DMA_ERROR: transaction failed
  */
 enum dma_status {
-	DMA_SUCCESS,
+	DMA_COMPLETE,
 	DMA_IN_PROGRESS,
 	DMA_PAUSED,
 	DMA_ERROR,
@@ -171,12 +171,6 @@
  * @DMA_CTRL_ACK - if clear, the descriptor cannot be reused until the client
  *  acknowledges receipt, i.e. has has a chance to establish any dependency
  *  chains
- * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
- * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
- * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single
- * 	(if not set, do the source dma-unmapping as page)
- * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single
- * 	(if not set, do the destination dma-unmapping as page)
  * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
  * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
  * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
@@ -188,14 +182,10 @@
 enum dma_ctrl_flags {
 	DMA_PREP_INTERRUPT = (1 << 0),
 	DMA_CTRL_ACK = (1 << 1),
-	DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2),
-	DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
-	DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4),
-	DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5),
-	DMA_PREP_PQ_DISABLE_P = (1 << 6),
-	DMA_PREP_PQ_DISABLE_Q = (1 << 7),
-	DMA_PREP_CONTINUE = (1 << 8),
-	DMA_PREP_FENCE = (1 << 9),
+	DMA_PREP_PQ_DISABLE_P = (1 << 2),
+	DMA_PREP_PQ_DISABLE_Q = (1 << 3),
+	DMA_PREP_CONTINUE = (1 << 4),
+	DMA_PREP_FENCE = (1 << 5),
 };
 
 /**
@@ -413,6 +403,17 @@
 typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
 
 typedef void (*dma_async_tx_callback)(void *dma_async_param);
+
+struct dmaengine_unmap_data {
+	u8 to_cnt;
+	u8 from_cnt;
+	u8 bidi_cnt;
+	struct device *dev;
+	struct kref kref;
+	size_t len;
+	dma_addr_t addr[0];
+};
+
 /**
  * struct dma_async_tx_descriptor - async transaction descriptor
  * ---dma generic offload fields---
@@ -438,6 +439,7 @@
 	dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
 	dma_async_tx_callback callback;
 	void *callback_param;
+	struct dmaengine_unmap_data *unmap;
 #ifdef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH
 	struct dma_async_tx_descriptor *next;
 	struct dma_async_tx_descriptor *parent;
@@ -445,6 +447,40 @@
 #endif
 };
 
+#ifdef CONFIG_DMA_ENGINE
+static inline void dma_set_unmap(struct dma_async_tx_descriptor *tx,
+				 struct dmaengine_unmap_data *unmap)
+{
+	kref_get(&unmap->kref);
+	tx->unmap = unmap;
+}
+
+struct dmaengine_unmap_data *
+dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags);
+void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap);
+#else
+static inline void dma_set_unmap(struct dma_async_tx_descriptor *tx,
+				 struct dmaengine_unmap_data *unmap)
+{
+}
+static inline struct dmaengine_unmap_data *
+dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags)
+{
+	return NULL;
+}
+static inline void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap)
+{
+}
+#endif
+
+static inline void dma_descriptor_unmap(struct dma_async_tx_descriptor *tx)
+{
+	if (tx->unmap) {
+		dmaengine_unmap_put(tx->unmap);
+		tx->unmap = NULL;
+	}
+}
+
 #ifndef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH
 static inline void txd_lock(struct dma_async_tx_descriptor *txd)
 {
@@ -979,10 +1015,10 @@
 {
 	if (last_complete <= last_used) {
 		if ((cookie <= last_complete) || (cookie > last_used))
-			return DMA_SUCCESS;
+			return DMA_COMPLETE;
 	} else {
 		if ((cookie <= last_complete) && (cookie > last_used))
-			return DMA_SUCCESS;
+			return DMA_COMPLETE;
 	}
 	return DMA_IN_PROGRESS;
 }
@@ -1013,11 +1049,11 @@
 }
 static inline enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
 {
-	return DMA_SUCCESS;
+	return DMA_COMPLETE;
 }
 static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
 {
-	return DMA_SUCCESS;
+	return DMA_COMPLETE;
 }
 static inline void dma_issue_pending_all(void)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bf5d574..121f11f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2622,7 +2622,9 @@
 extern int simple_write_end(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned copied,
 			struct page *page, void *fsdata);
+extern int always_delete_dentry(const struct dentry *);
 extern struct inode *alloc_anon_inode(struct super_block *);
+extern const struct dentry_operations simple_dentry_operations;
 
 extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
 extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0548eb2..1cedd00 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1318,7 +1318,6 @@
 
 #if USE_SPLIT_PTE_PTLOCKS
 #if BLOATED_SPINLOCKS
-void __init ptlock_cache_init(void);
 extern bool ptlock_alloc(struct page *page);
 extern void ptlock_free(struct page *page);
 
@@ -1327,7 +1326,6 @@
 	return page->ptl;
 }
 #else /* BLOATED_SPINLOCKS */
-static inline void ptlock_cache_init(void) {}
 static inline bool ptlock_alloc(struct page *page)
 {
 	return true;
@@ -1380,17 +1378,10 @@
 {
 	return &mm->page_table_lock;
 }
-static inline void ptlock_cache_init(void) {}
 static inline bool ptlock_init(struct page *page) { return true; }
 static inline void pte_lock_deinit(struct page *page) {}
 #endif /* USE_SPLIT_PTE_PTLOCKS */
 
-static inline void pgtable_init(void)
-{
-	ptlock_cache_init();
-	pgtable_cache_init();
-}
-
 static inline bool pgtable_page_ctor(struct page *page)
 {
 	inc_zone_page_state(page, NR_PAGETABLE);
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index d006f0c..5a462c4 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -27,7 +27,7 @@
 	while (!pci_is_root_bus(pbus))
 		pbus = pbus->parent;
 
-	return DEVICE_ACPI_HANDLE(pbus->bridge);
+	return ACPI_HANDLE(pbus->bridge);
 }
 
 static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus)
@@ -39,7 +39,7 @@
 	else
 		dev = &pbus->self->dev;
 
-	return DEVICE_ACPI_HANDLE(dev);
+	return ACPI_HANDLE(dev);
 }
 
 void acpi_pci_add_bus(struct pci_bus *bus);
diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index 179fb91b..f50821c 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -67,10 +67,10 @@
 #define ITCCHEN		BIT(23)
 
 /*ch_status paramater of callback function possible values*/
-#define DMA_COMPLETE 1
-#define DMA_CC_ERROR 2
-#define DMA_TC1_ERROR 3
-#define DMA_TC2_ERROR 4
+#define EDMA_DMA_COMPLETE 1
+#define EDMA_DMA_CC_ERROR 2
+#define EDMA_DMA_TC1_ERROR 3
+#define EDMA_DMA_TC2_ERROR 4
 
 enum address_mode {
 	INCR = 0,
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 1e8a8b6..cf87a24 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -354,6 +354,35 @@
 	spin_unlock(&sl->lock);
 }
 
+/**
+ * read_seqbegin_or_lock - begin a sequence number check or locking block
+ * @lock: sequence lock
+ * @seq : sequence number to be checked
+ *
+ * First try it once optimistically without taking the lock. If that fails,
+ * take the lock. The sequence number is also used as a marker for deciding
+ * whether to be a reader (even) or writer (odd).
+ * N.B. seq must be initialized to an even number to begin with.
+ */
+static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
+{
+	if (!(*seq & 1))	/* Even */
+		*seq = read_seqbegin(lock);
+	else			/* Odd */
+		read_seqlock_excl(lock);
+}
+
+static inline int need_seqretry(seqlock_t *lock, int seq)
+{
+	return !(seq & 1) && read_seqretry(lock, seq);
+}
+
+static inline void done_seqretry(seqlock_t *lock, int seq)
+{
+	if (seq & 1)
+		read_sequnlock_excl(lock);
+}
+
 static inline void read_seqlock_excl_bh(seqlock_t *sl)
 {
 	spin_lock_bh(&sl->lock);
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 61939ba..eaa00b1 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -278,6 +278,31 @@
 	__ret;								\
 })
 
+#define __wait_event_cmd(wq, condition, cmd1, cmd2)			\
+	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
+			    cmd1; schedule(); cmd2)
+
+/**
+ * wait_event_cmd - sleep until a condition gets true
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * cmd1: the command will be executed before sleep
+ * cmd2: the command will be executed after sleep
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true. The @condition is checked each time
+ * the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ */
+#define wait_event_cmd(wq, condition, cmd1, cmd2)			\
+do {									\
+	if (condition)							\
+		break;							\
+	__wait_event_cmd(wq, condition, cmd1, cmd2);			\
+} while (0)
+
 #define __wait_event_interruptible(wq, condition)			\
 	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,		\
 		      schedule())
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
index fe1a540..f7cf7f3 100644
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -16,6 +16,7 @@
 #define _MD_P_H
 
 #include <linux/types.h>
+#include <asm/byteorder.h>
 
 /*
  * RAID superblock.
diff --git a/init/main.c b/init/main.c
index 01573fd..febc511 100644
--- a/init/main.c
+++ b/init/main.c
@@ -476,7 +476,7 @@
 	mem_init();
 	kmem_cache_init();
 	percpu_init_late();
-	pgtable_init();
+	pgtable_cache_init();
 	vmalloc_init();
 }
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e0839bcd..4c62513 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -895,11 +895,6 @@
 	iput(inode);
 }
 
-static int cgroup_delete(const struct dentry *d)
-{
-	return 1;
-}
-
 static void remove_dir(struct dentry *d)
 {
 	struct dentry *parent = dget(d->d_parent);
@@ -1486,7 +1481,7 @@
 {
 	static const struct dentry_operations cgroup_dops = {
 		.d_iput = cgroup_diput,
-		.d_delete = cgroup_delete,
+		.d_delete = always_delete_dentry,
 	};
 
 	struct inode *inode =
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 10c22ca..b38109e 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -792,7 +792,8 @@
 {
 	struct memory_bitmap *bm1, *bm2;
 
-	BUG_ON(!(forbidden_pages_map && free_pages_map));
+	if (WARN_ON(!(forbidden_pages_map && free_pages_map)))
+		return;
 
 	bm1 = forbidden_pages_map;
 	bm2 = free_pages_map;
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 2485027..98d35758 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -70,6 +70,7 @@
 		data->swap = swsusp_resume_device ?
 			swap_type_of(swsusp_resume_device, 0, NULL) : -1;
 		data->mode = O_RDONLY;
+		data->free_bitmaps = false;
 		error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE);
 		if (error)
 			pm_notifier_call_chain(PM_POST_HIBERNATION);
diff --git a/mm/memory.c b/mm/memory.c
index 0409e8f..5d9025f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4272,13 +4272,6 @@
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
 
 #if USE_SPLIT_PTE_PTLOCKS && BLOATED_SPINLOCKS
-static struct kmem_cache *page_ptl_cachep;
-void __init ptlock_cache_init(void)
-{
-	page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0,
-			SLAB_PANIC, NULL);
-}
-
 bool ptlock_alloc(struct page *page)
 {
 	spinlock_t *ptl;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3dc0c6c..c4638e6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1425,7 +1425,7 @@
 	do {
 		if (dma_async_is_tx_complete(tp->ucopy.dma_chan,
 					      last_issued, &done,
-					      &used) == DMA_SUCCESS) {
+					      &used) == DMA_COMPLETE) {
 			/* Safe to free early-copied skbs now */
 			__skb_queue_purge(&sk->sk_async_wait_queue);
 			break;
@@ -1433,7 +1433,7 @@
 			struct sk_buff *skb;
 			while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
 			       (dma_async_is_complete(skb->dma_cookie, done,
-						      used) == DMA_SUCCESS)) {
+						      used) == DMA_COMPLETE)) {
 				__skb_dequeue(&sk->sk_async_wait_queue);
 				kfree_skb(skb);
 			}
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index d0d14a0..bf04b30 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -471,15 +471,6 @@
 	umode_t mode;
 };
 
-static int rpc_delete_dentry(const struct dentry *dentry)
-{
-	return 1;
-}
-
-static const struct dentry_operations rpc_dentry_operations = {
-	.d_delete = rpc_delete_dentry,
-};
-
 static struct inode *
 rpc_get_inode(struct super_block *sb, umode_t mode)
 {
@@ -1266,7 +1257,7 @@
 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
 	sb->s_magic = RPCAUTH_GSSMAGIC;
 	sb->s_op = &s_ops;
-	sb->s_d_op = &rpc_dentry_operations;
+	sb->s_d_op = &simple_dentry_operations;
 	sb->s_time_gran = 1;
 
 	inode = rpc_get_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO);
diff --git a/sound/soc/davinci/davinci-pcm.c b/sound/soc/davinci/davinci-pcm.c
index fa64cd8..fb5d107f 100644
--- a/sound/soc/davinci/davinci-pcm.c
+++ b/sound/soc/davinci/davinci-pcm.c
@@ -238,7 +238,7 @@
 	print_buf_info(prtd->ram_channel, "i ram_channel");
 	pr_debug("davinci_pcm: link=%d, status=0x%x\n", link, ch_status);
 
-	if (unlikely(ch_status != DMA_COMPLETE))
+	if (unlikely(ch_status != EDMA_DMA_COMPLETE))
 		return;
 
 	if (snd_pcm_running(substream)) {
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index fe70207..9d77f13 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2,7 +2,7 @@
  * turbostat -- show CPU frequency and C-state residency
  * on modern Intel turbo-capable processors.
  *
- * Copyright (c) 2012 Intel Corporation.
+ * Copyright (c) 2013 Intel Corporation.
  * Len Brown <len.brown@intel.com>
  *
  * This program is free software; you can redistribute it and/or modify it
@@ -47,6 +47,8 @@
 unsigned int do_nhm_cstates;
 unsigned int do_snb_cstates;
 unsigned int do_c8_c9_c10;
+unsigned int do_slm_cstates;
+unsigned int use_c1_residency_msr;
 unsigned int has_aperf;
 unsigned int has_epb;
 unsigned int units = 1000000000;	/* Ghz etc */
@@ -81,6 +83,8 @@
 #define RAPL_DRAM	(1 << 3)
 #define RAPL_PKG_PERF_STATUS	(1 << 4)
 #define RAPL_DRAM_PERF_STATUS	(1 << 5)
+#define RAPL_PKG_POWER_INFO	(1 << 6)
+#define RAPL_CORE_POLICY	(1 << 7)
 #define	TJMAX_DEFAULT	100
 
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
@@ -96,7 +100,7 @@
 	unsigned long long tsc;
 	unsigned long long aperf;
 	unsigned long long mperf;
-	unsigned long long c1;	/* derived */
+	unsigned long long c1;
 	unsigned long long extra_msr64;
 	unsigned long long extra_delta64;
 	unsigned long long extra_msr32;
@@ -266,7 +270,7 @@
 		outp += sprintf(outp, "           MSR 0x%03X", extra_msr_offset64);
 	if (do_nhm_cstates)
 		outp += sprintf(outp, "    %%c1");
-	if (do_nhm_cstates)
+	if (do_nhm_cstates && !do_slm_cstates)
 		outp += sprintf(outp, "    %%c3");
 	if (do_nhm_cstates)
 		outp += sprintf(outp, "    %%c6");
@@ -280,9 +284,9 @@
 
 	if (do_snb_cstates)
 		outp += sprintf(outp, "   %%pc2");
-	if (do_nhm_cstates)
+	if (do_nhm_cstates && !do_slm_cstates)
 		outp += sprintf(outp, "   %%pc3");
-	if (do_nhm_cstates)
+	if (do_nhm_cstates && !do_slm_cstates)
 		outp += sprintf(outp, "   %%pc6");
 	if (do_snb_cstates)
 		outp += sprintf(outp, "   %%pc7");
@@ -480,7 +484,7 @@
 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
 		goto done;
 
-	if (do_nhm_cstates)
+	if (do_nhm_cstates && !do_slm_cstates)
 		outp += sprintf(outp, " %6.2f", 100.0 * c->c3/t->tsc);
 	if (do_nhm_cstates)
 		outp += sprintf(outp, " %6.2f", 100.0 * c->c6/t->tsc);
@@ -499,9 +503,9 @@
 
 	if (do_snb_cstates)
 		outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc);
-	if (do_nhm_cstates)
+	if (do_nhm_cstates && !do_slm_cstates)
 		outp += sprintf(outp, " %6.2f", 100.0 * p->pc3/t->tsc);
-	if (do_nhm_cstates)
+	if (do_nhm_cstates && !do_slm_cstates)
 		outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc);
 	if (do_snb_cstates)
 		outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc);
@@ -648,17 +652,24 @@
 	}
 
 
-	/*
-	 * As counter collection is not atomic,
-	 * it is possible for mperf's non-halted cycles + idle states
-	 * to exceed TSC's all cycles: show c1 = 0% in that case.
-	 */
-	if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
-		old->c1 = 0;
-	else {
-		/* normal case, derive c1 */
-		old->c1 = old->tsc - old->mperf - core_delta->c3
+	if (use_c1_residency_msr) {
+		/*
+		 * Some models have a dedicated C1 residency MSR,
+		 * which should be more accurate than the derivation below.
+		 */
+	} else {
+		/*
+		 * As counter collection is not atomic,
+		 * it is possible for mperf's non-halted cycles + idle states
+		 * to exceed TSC's all cycles: show c1 = 0% in that case.
+		 */
+		if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
+			old->c1 = 0;
+		else {
+			/* normal case, derive c1 */
+			old->c1 = old->tsc - old->mperf - core_delta->c3
 				- core_delta->c6 - core_delta->c7;
+		}
 	}
 
 	if (old->mperf == 0) {
@@ -872,13 +883,21 @@
 		if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64))
 			return -5;
 
+	if (use_c1_residency_msr) {
+		if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
+			return -6;
+	}
+
 	/* collect core counters only for 1st thread in core */
 	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
 		return 0;
 
-	if (do_nhm_cstates) {
+	if (do_nhm_cstates && !do_slm_cstates) {
 		if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
 			return -6;
+	}
+
+	if (do_nhm_cstates) {
 		if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
 			return -7;
 	}
@@ -898,7 +917,7 @@
 	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
 		return 0;
 
-	if (do_nhm_cstates) {
+	if (do_nhm_cstates && !do_slm_cstates) {
 		if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
 			return -9;
 		if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
@@ -977,7 +996,7 @@
 		ratio, bclk, ratio * bclk);
 
 	get_msr(0, MSR_IA32_POWER_CTL, &msr);
-	fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E: %sabled)\n",
+	fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
 		msr, msr & 0x2 ? "EN" : "DIS");
 
 	if (!do_ivt_turbo_ratio_limit)
@@ -1046,25 +1065,28 @@
 
 	switch(msr & 0x7) {
 	case 0:
-		fprintf(stderr, "pc0");
+		fprintf(stderr, do_slm_cstates ? "no pkg states" : "pc0");
 		break;
 	case 1:
-		fprintf(stderr, do_snb_cstates ? "pc2" : "pc0");
+		fprintf(stderr, do_slm_cstates ? "no pkg states" : do_snb_cstates ? "pc2" : "pc0");
 		break;
 	case 2:
-		fprintf(stderr, do_snb_cstates ? "pc6-noret" : "pc3");
+		fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc6-noret" : "pc3");
 		break;
 	case 3:
-		fprintf(stderr, "pc6");
+		fprintf(stderr, do_slm_cstates ? "invalid" : "pc6");
 		break;
 	case 4:
-		fprintf(stderr, "pc7");
+		fprintf(stderr, do_slm_cstates ? "pc4" : "pc7");
 		break;
 	case 5:
-		fprintf(stderr, do_snb_cstates ? "pc7s" : "invalid");
+		fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc7s" : "invalid");
+		break;
+	case 6:
+		fprintf(stderr, do_slm_cstates ? "pc6" : "invalid");
 		break;
 	case 7:
-		fprintf(stderr, "unlimited");
+		fprintf(stderr, do_slm_cstates ? "pc7" : "unlimited");
 		break;
 	default:
 		fprintf(stderr, "invalid");
@@ -1460,6 +1482,8 @@
 	case 0x3F:	/* HSW */
 	case 0x45:	/* HSW */
 	case 0x46:	/* HSW */
+	case 0x37:	/* BYT */
+	case 0x4D:	/* AVN */
 		return 1;
 	case 0x2E:	/* Nehalem-EX Xeon - Beckton */
 	case 0x2F:	/* Westmere-EX Xeon - Eagleton */
@@ -1532,14 +1556,33 @@
 #define	RAPL_POWER_GRANULARITY	0x7FFF	/* 15 bit power granularity */
 #define	RAPL_TIME_GRANULARITY	0x3F /* 6 bit time granularity */
 
+double get_tdp(model)
+{
+	unsigned long long msr;
+
+	if (do_rapl & RAPL_PKG_POWER_INFO)
+		if (!get_msr(0, MSR_PKG_POWER_INFO, &msr))
+			return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
+
+	switch (model) {
+	case 0x37:
+	case 0x4D:
+		return 30.0;
+	default:
+		return 135.0;
+	}
+}
+
+
 /*
  * rapl_probe()
  *
- * sets do_rapl
+ * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
  */
 void rapl_probe(unsigned int family, unsigned int model)
 {
 	unsigned long long msr;
+	unsigned int time_unit;
 	double tdp;
 
 	if (!genuine_intel)
@@ -1555,11 +1598,15 @@
 	case 0x3F:	/* HSW */
 	case 0x45:	/* HSW */
 	case 0x46:	/* HSW */
-		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX;
+		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
 		break;
 	case 0x2D:
 	case 0x3E:
-		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS;
+		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
+		break;
+	case 0x37:	/* BYT */
+	case 0x4D:	/* AVN */
+		do_rapl = RAPL_PKG | RAPL_CORES ;
 		break;
 	default:
 		return;
@@ -1570,19 +1617,22 @@
 		return;
 
 	rapl_power_units = 1.0 / (1 << (msr & 0xF));
-	rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
-	rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF));
+	if (model == 0x37)
+		rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
+	else
+		rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
 
-	/* get TDP to determine energy counter range */
-	if (get_msr(0, MSR_PKG_POWER_INFO, &msr))
-		return;
+	time_unit = msr >> 16 & 0xF;
+	if (time_unit == 0)
+		time_unit = 0xA;
 
-	tdp = ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
+	rapl_time_units = 1.0 / (1 << (time_unit));
+
+	tdp = get_tdp(model);
 
 	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
-
 	if (verbose)
-		fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range\n", rapl_joule_counter_range);
+		fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
 
 	return;
 }
@@ -1668,7 +1718,6 @@
 {
 	unsigned long long msr;
 	int cpu;
-	double local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units;
 
 	if (!do_rapl)
 		return 0;
@@ -1686,23 +1735,13 @@
 	if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
 		return -1;
 
-	local_rapl_power_units = 1.0 / (1 << (msr & 0xF));
-	local_rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
-	local_rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF));
-
-	if (local_rapl_power_units != rapl_power_units)
-		fprintf(stderr, "cpu%d, ERROR: Power units mis-match\n", cpu);
-	if (local_rapl_energy_units != rapl_energy_units)
-		fprintf(stderr, "cpu%d, ERROR: Energy units mis-match\n", cpu);
-	if (local_rapl_time_units != rapl_time_units)
-		fprintf(stderr, "cpu%d, ERROR: Time units mis-match\n", cpu);
-
 	if (verbose) {
 		fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
 			"(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
-			local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units);
+			rapl_power_units, rapl_energy_units, rapl_time_units);
 	}
-	if (do_rapl & RAPL_PKG) {
+	if (do_rapl & RAPL_PKG_POWER_INFO) {
+
 		if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
                 	return -5;
 
@@ -1714,6 +1753,9 @@
 			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
 			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
 
+	}
+	if (do_rapl & RAPL_PKG) {
+
 		if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
 			return -9;
 
@@ -1749,12 +1791,16 @@
 
 		print_power_limit_msr(cpu, msr, "DRAM Limit");
 	}
-	if (do_rapl & RAPL_CORES) {
+	if (do_rapl & RAPL_CORE_POLICY) {
 		if (verbose) {
 			if (get_msr(cpu, MSR_PP0_POLICY, &msr))
 				return -7;
 
 			fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
+		}
+	}
+	if (do_rapl & RAPL_CORES) {
+		if (verbose) {
 
 			if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
 				return -9;
@@ -1813,10 +1859,48 @@
 }
 
 
+int is_slm(unsigned int family, unsigned int model)
+{
+	if (!genuine_intel)
+		return 0;
+	switch (model) {
+	case 0x37:	/* BYT */
+	case 0x4D:	/* AVN */
+		return 1;
+	}
+	return 0;
+}
+
+#define SLM_BCLK_FREQS 5
+double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
+
+double slm_bclk(void)
+{
+	unsigned long long msr = 3;
+	unsigned int i;
+	double freq;
+
+	if (get_msr(0, MSR_FSB_FREQ, &msr))
+		fprintf(stderr, "SLM BCLK: unknown\n");
+
+	i = msr & 0xf;
+	if (i >= SLM_BCLK_FREQS) {
+		fprintf(stderr, "SLM BCLK[%d] invalid\n", i);
+		msr = 3;
+	}
+	freq = slm_freq_table[i];
+
+	fprintf(stderr, "SLM BCLK: %.1f Mhz\n", freq);
+
+	return freq;
+}
+
 double discover_bclk(unsigned int family, unsigned int model)
 {
 	if (is_snb(family, model))
 		return 100.00;
+	else if (is_slm(family, model))
+		return slm_bclk();
 	else
 		return 133.33;
 }
@@ -1873,7 +1957,7 @@
 		fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
 			cpu, msr, target_c_local);
 
-	if (target_c_local < 85 || target_c_local > 120)
+	if (target_c_local < 85 || target_c_local > 127)
 		goto guess;
 
 	tcc_activation_temp = target_c_local;
@@ -1970,6 +2054,7 @@
 	do_smi = do_nhm_cstates;
 	do_snb_cstates = is_snb(family, model);
 	do_c8_c9_c10 = has_c8_c9_c10(family, model);
+	do_slm_cstates = is_slm(family, model);
 	bclk = discover_bclk(family, model);
 
 	do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model);
@@ -2331,7 +2416,7 @@
 	cmdline(argc, argv);
 
 	if (verbose)
-		fprintf(stderr, "turbostat v3.4 April 17, 2013"
+		fprintf(stderr, "turbostat v3.5 April 26, 2013"
 			" - Len Brown <lenb@kernel.org>\n");
 
 	turbostat_init();