Merge branch 'i2c-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jdelvare/staging * 'i2c-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jdelvare/staging: MAINTAINERS: List i2c-omap and i2c-davinci drivers MAINTAINERS: i2c: Add third maintainer i2c/gpio-i2cmux: Convert to use module_platform_driver() i2c/busses: Use module_platform_driver() i2c-dev: Use memdup_user i2c: Convert to DEFINE_PCI_DEVICE_TABLE i2c-ali1535: enable SPARC support i2c: Fix error value returned by several bus drivers

commit: f88609a0e447fb73851c0f640ed47257838ff99f [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Sat Jan 14 12:25:40 2012 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> Sat Jan 14 12:25:40 2012 -0800
tree: 12d2d77f42f7721fec8d9f99780679f7ddd0875c
parent: 2145199c4f0db7c517dd788abec301dc84b91bd0 [diff]
parent: 046d0a37024a5b1e66ec9314e5e7feba42feb811 [diff]
diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab
index 8b093f8..91bd6ca 100644
--- a/Documentation/ABI/testing/sysfs-kernel-slab
+++ b/Documentation/ABI/testing/sysfs-kernel-slab

@@ -346,6 +346,10 @@
 		number of objects per slab.  If a slab cannot be allocated
 		because of fragmentation, SLUB will retry with the minimum order
 		possible depending on its characteristics.
+		When debug_guardpage_minorder=N (N > 0) parameter is specified
+		(see Documentation/kernel-parameters.txt), the minimum possible
+		order is used and this sysfs entry can not be used to change
+		the order at run time.
 
 What:		/sys/kernel/slab/cache/order_fallback
 Date:		April 2008

diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 4d8774f..4c95c00 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt

@@ -61,7 +61,7 @@
  memory.failcnt			 # show the number of memory usage hits limits
  memory.memsw.failcnt		 # show the number of memory+Swap hits limits
  memory.max_usage_in_bytes	 # show max memory usage recorded
- memory.memsw.usage_in_bytes	 # show max memory+Swap usage recorded
+ memory.memsw.max_usage_in_bytes # show max memory+Swap usage recorded
  memory.soft_limit_in_bytes	 # set/show soft limit of memory usage
  memory.stat			 # show various statistics
  memory.use_hierarchy		 # set/show hierarchical account enabled
@@ -410,8 +410,11 @@
 cache		- # of bytes of page cache memory.
 rss		- # of bytes of anonymous and swap cache memory.
 mapped_file	- # of bytes of mapped file (includes tmpfs/shmem)
-pgpgin		- # of pages paged in (equivalent to # of charging events).
-pgpgout		- # of pages paged out (equivalent to # of uncharging events).
+pgpgin		- # of charging events to the memory cgroup. The charging
+		event happens each time a page is accounted as either mapped
+		anon page(RSS) or cache page(Page Cache) to the cgroup.
+pgpgout		- # of uncharging events to the memory cgroup. The uncharging
+		event happens each time a page is unaccounted from the cgroup.
 swap		- # of bytes of swap usage
 inactive_anon	- # of bytes of anonymous memory and swap cache memory on
 		LRU list.

diff --git a/Documentation/devicetree/bindings/mfd/mc13xxx.txt b/Documentation/devicetree/bindings/mfd/mc13xxx.txt
new file mode 100644
index 0000000..19f6af4
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/mc13xxx.txt

@@ -0,0 +1,78 @@
+* Freescale MC13783/MC13892 Power Management Integrated Circuit (PMIC)
+
+Required properties:
+- compatible : Should be "fsl,mc13783" or "fsl,mc13892"
+
+Optional properties:
+- fsl,mc13xxx-uses-adc : Indicate the ADC is being used
+- fsl,mc13xxx-uses-codec : Indicate the Audio Codec is being used
+- fsl,mc13xxx-uses-rtc : Indicate the RTC is being used
+- fsl,mc13xxx-uses-touch : Indicate the touchscreen controller is being used
+
+Sub-nodes:
+- regulators : Contain the regulator nodes.  The MC13892 regulators are
+  bound using their names as listed below with their registers and bits
+  for enabling.
+
+    vcoincell : regulator VCOINCELL (register 13, bit 23)
+    sw1       : regulator SW1	    (register 24, bit 0)
+    sw2       : regulator SW2	    (register 25, bit 0)
+    sw3       : regulator SW3	    (register 26, bit 0)
+    sw4       : regulator SW4	    (register 27, bit 0)
+    swbst     : regulator SWBST	    (register 29, bit 20)
+    vgen1     : regulator VGEN1	    (register 32, bit 0)
+    viohi     : regulator VIOHI	    (register 32, bit 3)
+    vdig      : regulator VDIG	    (register 32, bit 9)
+    vgen2     : regulator VGEN2	    (register 32, bit 12)
+    vpll      : regulator VPLL	    (register 32, bit 15)
+    vusb2     : regulator VUSB2	    (register 32, bit 18)
+    vgen3     : regulator VGEN3	    (register 33, bit 0)
+    vcam      : regulator VCAM	    (register 33, bit 6)
+    vvideo    : regulator VVIDEO    (register 33, bit 12)
+    vaudio    : regulator VAUDIO    (register 33, bit 15)
+    vsd       : regulator VSD	    (register 33, bit 18)
+    gpo1      : regulator GPO1	    (register 34, bit 6)
+    gpo2      : regulator GPO2	    (register 34, bit 8)
+    gpo3      : regulator GPO3	    (register 34, bit 10)
+    gpo4      : regulator GPO4	    (register 34, bit 12)
+    pwgt1spi  : regulator PWGT1SPI  (register 34, bit 15)
+    pwgt2spi  : regulator PWGT2SPI  (register 34, bit 16)
+    vusb      : regulator VUSB	    (register 50, bit 3)
+
+  The bindings details of individual regulator device can be found in:
+  Documentation/devicetree/bindings/regulator/regulator.txt
+
+Examples:
+
+ecspi@70010000 { /* ECSPI1 */
+	fsl,spi-num-chipselects = <2>;
+	cs-gpios = <&gpio3 24 0>, /* GPIO4_24 */
+		   <&gpio3 25 0>; /* GPIO4_25 */
+	status = "okay";
+
+	pmic: mc13892@0 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,mc13892";
+		spi-max-frequency = <6000000>;
+		reg = <0>;
+		interrupt-parent = <&gpio0>;
+		interrupts = <8>;
+
+		regulators {
+			sw1_reg: mc13892__sw1 {
+				regulator-min-microvolt = <600000>;
+				regulator-max-microvolt = <1375000>;
+				regulator-boot-on;
+				regulator-always-on;
+			};
+
+			sw2_reg: mc13892__sw2 {
+				regulator-min-microvolt = <900000>;
+				regulator-max-microvolt = <1850000>;
+				regulator-boot-on;
+				regulator-always-on;
+			};
+		};
+	};
+};

diff --git a/Documentation/devicetree/bindings/mfd/twl-familly.txt b/Documentation/devicetree/bindings/mfd/twl-familly.txt
new file mode 100644
index 0000000..a66fcf9
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/twl-familly.txt

@@ -0,0 +1,47 @@
+Texas Instruments TWL family
+
+The TWLs are Integrated Power Management Chips.
+Some version might contain much more analog function like
+USB transceiver or Audio amplifier.
+These chips are connected to an i2c bus.
+
+
+Required properties:
+- compatible : Must be "ti,twl4030";
+  For Integrated power-management/audio CODEC device used in OMAP3
+  based boards
+- compatible : Must be "ti,twl6030";
+  For Integrated power-management used in OMAP4 based boards
+- interrupts : This i2c device has an IRQ line connected to the main SoC
+- interrupt-controller : Since the twl support several interrupts internally,
+  it is considered as an interrupt controller cascaded to the SoC one.
+- #interrupt-cells = <1>;
+- interrupt-parent : The parent interrupt controller.
+
+Optional node:
+- Child nodes contain in the twl. The twl family is made of several variants
+  that support a different number of features.
+  The children nodes will thus depend of the capability of the variant.
+
+
+Example:
+/*
+ * Integrated Power Management Chip
+ * http://www.ti.com/lit/ds/symlink/twl6030.pdf
+ */
+twl@48 {
+    compatible = "ti,twl6030";
+    reg = <0x48>;
+    interrupts = <39>; /* IRQ_SYS_1N cascaded to gic */
+    interrupt-controller;
+    #interrupt-cells = <1>;
+    interrupt-parent = <&gic>;
+    #address-cells = <1>;
+    #size-cells = <0>;
+
+    twl_rtc {
+        compatible = "ti,twl_rtc";
+        interrupts = <11>;
+        reg = <0>;
+    };
+};

diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt
index 510eab3..225f96d 100644
--- a/Documentation/dma-buf-sharing.txt
+++ b/Documentation/dma-buf-sharing.txt

@@ -219,6 +219,10 @@
    If the exporter chooses not to allow an attach() operation once a
    map_dma_buf() API has been called, it simply returns an error.
 
+Miscellaneous notes:
+- Any exporters or users of the dma-buf buffer sharing framework must have
+  a 'select DMA_SHARED_BUFFER' in their respective Kconfigs.
+
 References:
 [1] struct dma_buf_ops in include/linux/dma-buf.h
 [2] All interfaces mentioned above defined in include/linux/dma-buf.h

diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.txt
index 763d8eb..d6030aa 100644
--- a/Documentation/filesystems/ceph.txt
+++ b/Documentation/filesystems/ceph.txt

@@ -119,12 +119,20 @@
 	must rely on TCP's error correction to detect data corruption
 	in the data payload.
 
-  noasyncreaddir
-	Disable client's use its local cache to satisfy	readdir
-	requests.  (This does not change correctness; the client uses
-	cached metadata only when a lease or capability ensures it is
-	valid.)
+  dcache
+        Use the dcache contents to perform negative lookups and
+        readdir when the client has the entire directory contents in
+        its cache.  (This does not change correctness; the client uses
+        cached metadata only when a lease or capability ensures it is
+        valid.)
 
+  nodcache
+        Do not use the dcache as above.  This avoids a significant amount of
+        complex code, sacrificing performance without affecting correctness,
+        and is useful for tracking down bugs.
+
+  noasyncreaddir
+	Do not use the dcache as above for readdir.
 
 More Information
 ================

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 12fee13..a76a26a 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt

@@ -307,6 +307,9 @@
   blkio_ticks   time spent waiting for block IO
   gtime         guest time of the task in jiffies
   cgtime        guest time of the task children in jiffies
+  start_data    address above which program data+bss is placed
+  end_data      address below which program data+bss is placed
+  start_brk     address above which program heap can be expanded with brk()
 ..............................................................................
 
 The /proc/PID/maps file containing the currently mapped memory regions and

diff --git a/Documentation/filesystems/squashfs.txt b/Documentation/filesystems/squashfs.txt
index 7db3ebd..403c090 100644
--- a/Documentation/filesystems/squashfs.txt
+++ b/Documentation/filesystems/squashfs.txt

@@ -93,8 +93,8 @@
 
 Compressed data blocks are written to the filesystem as files are read from
 the source directory, and checked for duplicates.  Once all file data has been
-written the completed inode, directory, fragment, export and uid/gid lookup
-tables are written.
+written the completed inode, directory, fragment, export, uid/gid lookup and
+xattr tables are written.
 
 3.1 Compression options
 -----------------------
@@ -151,7 +151,7 @@
 and at lookup the index is scanned linearly looking for the first filename
 alphabetically larger than the filename being looked up.  At this point the
 location of the metadata block the filename is in has been found.
-The general idea of the index is ensure only one metadata block needs to be
+The general idea of the index is to ensure only one metadata block needs to be
 decompressed to do a lookup irrespective of the length of the directory.
 This scheme has the advantage that it doesn't require extra memory overhead
 and doesn't require much extra storage on disk.

diff --git a/Documentation/mmc/mmc-dev-attrs.txt b/Documentation/mmc/mmc-dev-attrs.txt
index 8898a95..22ae844 100644
--- a/Documentation/mmc/mmc-dev-attrs.txt
+++ b/Documentation/mmc/mmc-dev-attrs.txt

@@ -64,3 +64,13 @@
 	size specified by the card.
 
 	"preferred_erase_size" is in bytes.
+
+SD/MMC/SDIO Clock Gating Attribute
+==================================
+
+Read and write access is provided to following attribute.
+This attribute appears only if CONFIG_MMC_CLKGATE is enabled.
+
+	clkgate_delay	Tune the clock gating delay with desired value in milliseconds.
+
+echo <desired delay> > /sys/class/mmc_host/mmcX/clkgate_delay

diff --git a/Documentation/mmc/mmc-dev-parts.txt b/Documentation/mmc/mmc-dev-parts.txt
index 2db28b8..f08d078 100644
--- a/Documentation/mmc/mmc-dev-parts.txt
+++ b/Documentation/mmc/mmc-dev-parts.txt

@@ -25,3 +25,16 @@
 To re-enable read-only access:
 
 echo 1 > /sys/block/mmcblkXbootY/force_ro
+
+The boot partitions can also be locked read only until the next power on,
+with:
+
+echo 1 > /sys/block/mmcblkXbootY/ro_lock_until_next_power_on
+
+This is a feature of the card and not of the kernel. If the card does
+not support boot partition locking, the file will not exist. If the
+feature has been disabled on the card, the file will be read-only.
+
+The boot partitions can also be locked permanently, but this feature is
+not accessible through sysfs in order to avoid accidental or malicious
+bricking.

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 6d8cd8b..8c20fbd 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt

@@ -415,6 +415,14 @@
 
 ==============================================================
 
+ns_last_pid:
+
+The last pid allocated in the current (the one task using this sysctl
+lives in) pid namespace. When selecting a pid for a next task on fork
+kernel tries to allocate a number starting from this one.
+
+==============================================================
+
 powersave-nap: (PPC only)
 
 If set, Linux-PPC will use the 'nap' mode of powersaving,

diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt
index 2acdda9..6752870 100644
--- a/Documentation/vm/slub.txt
+++ b/Documentation/vm/slub.txt

@@ -131,7 +131,10 @@
 slub_max_order specified the order at which slub_min_objects should no
 longer be checked. This is useful to avoid SLUB trying to generate
 super large order pages to fit slub_min_objects of a slab cache with
-large object sizes into one high order page.
+large object sizes into one high order page. Setting command line
+parameter debug_guardpage_minorder=N (N > 0), forces setting
+slub_max_order to 0, what cause minimum possible order of slabs
+allocation.
 
 SLUB Debug output
 -----------------

diff --git a/arch/Kconfig b/arch/Kconfig
index 2505740..4f55c73 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig

@@ -185,4 +185,18 @@
 config ARCH_HAVE_NMI_SAFE_CMPXCHG
 	bool
 
+config HAVE_ALIGNED_STRUCT_PAGE
+	bool
+	help
+	  This makes sure that struct pages are double word aligned and that
+	  e.g. the SLUB allocator can perform double word atomic operations
+	  on a struct page for better performance. However selecting this
+	  might increase the size of a struct page by a word.
+
+config HAVE_CMPXCHG_LOCAL
+	bool
+
+config HAVE_CMPXCHG_DOUBLE
+	bool
+
 source "kernel/gcov/Kconfig"

diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index 6b93e20..5bc6b38 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c

@@ -202,6 +202,7 @@
 static struct mcp_plat_data assabet_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
+	.codec		= "ucb1x00",
 };
 
 static void __init assabet_init(void)
@@ -252,6 +253,17 @@
 	sa11x0_register_mtd(&assabet_flash_data, assabet_flash_resources,
 			    ARRAY_SIZE(assabet_flash_resources));
 	sa11x0_register_irda(&assabet_irda_data);
+
+	/*
+	 * Setup the PPC unit correctly.
+	 */
+	PPDR &= ~PPC_RXD4;
+	PPDR |= PPC_TXD4 | PPC_SCLK | PPC_SFRM;
+	PSDR |= PPC_RXD4;
+	PSDR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM);
+	PPSR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM);
+
+	ASSABET_BCR_set(ASSABET_BCR_CODEC_RST);
 	sa11x0_register_mcp(&assabet_mcp_data);
 }
 

diff --git a/arch/arm/mach-sa1100/cerf.c b/arch/arm/mach-sa1100/cerf.c
index 11bb6d0..d12d0f4 100644
--- a/arch/arm/mach-sa1100/cerf.c
+++ b/arch/arm/mach-sa1100/cerf.c

@@ -124,12 +124,23 @@
 static struct mcp_plat_data cerf_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
+	.codec		= "ucb1x00",
 };
 
 static void __init cerf_init(void)
 {
 	platform_add_devices(cerf_devices, ARRAY_SIZE(cerf_devices));
 	sa11x0_register_mtd(&cerf_flash_data, &cerf_flash_resource, 1);
+
+	/*
+	 * Setup the PPC unit correctly.
+	 */
+	PPDR &= ~PPC_RXD4;
+	PPDR |= PPC_TXD4 | PPC_SCLK | PPC_SFRM;
+	PSDR |= PPC_RXD4;
+	PSDR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM);
+	PPSR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM);
+
 	sa11x0_register_mcp(&cerf_mcp_data);
 }
 

diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c
index b9060e2..c483912 100644
--- a/arch/arm/mach-sa1100/collie.c
+++ b/arch/arm/mach-sa1100/collie.c

@@ -27,6 +27,7 @@
 #include <linux/timer.h>
 #include <linux/gpio.h>
 #include <linux/pda_power.h>
+#include <linux/mfd/ucb1x00.h>
 
 #include <mach/hardware.h>
 #include <asm/mach-types.h>
@@ -85,10 +86,15 @@
 	.num_devs	= 1,
 };
 
+static struct ucb1x00_plat_data collie_ucb1x00_data = {
+	.gpio_base	= COLLIE_TC35143_GPIO_BASE,
+};
+
 static struct mcp_plat_data collie_mcp_data = {
 	.mccr0		= MCCR0_ADM | MCCR0_ExtClk,
 	.sclk_rate	= 9216000,
-	.gpio_base	= COLLIE_TC35143_GPIO_BASE,
+	.codec		= "ucb1x00",
+	.codec_pdata	= &collie_ucb1x00_data,
 };
 
 /*
@@ -351,6 +357,16 @@
 
 	sa11x0_register_mtd(&collie_flash_data, collie_flash_resources,
 			    ARRAY_SIZE(collie_flash_resources));
+
+	/*
+	 * Setup the PPC unit correctly.
+	 */
+	PPDR &= ~PPC_RXD4;
+	PPDR |= PPC_TXD4 | PPC_SCLK | PPC_SFRM;
+	PSDR |= PPC_RXD4;
+	PSDR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM);
+	PPSR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM);
+
 	sa11x0_register_mcp(&collie_mcp_data);
 
 	sharpsl_save_param();

diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c
index 480d2ea..e3a28ca 100644
--- a/arch/arm/mach-sa1100/generic.c
+++ b/arch/arm/mach-sa1100/generic.c

@@ -217,10 +217,15 @@
 static struct resource sa11x0mcp_resources[] = {
 	[0] = {
 		.start	= __PREG(Ser4MCCR0),
-		.end	= __PREG(Ser4MCCR0) + 0xffff,
+		.end	= __PREG(Ser4MCCR0) + 0x1C - 1,
 		.flags	= IORESOURCE_MEM,
 	},
 	[1] = {
+		.start	= __PREG(Ser4MCCR1),
+		.end	= __PREG(Ser4MCCR1) + 0x4 - 1,
+		.flags	= IORESOURCE_MEM,
+	},
+	[2] = {
 		.start	= IRQ_Ser4MCP,
 		.end	= IRQ_Ser4MCP,
 		.flags	= IORESOURCE_IRQ,

diff --git a/arch/arm/mach-sa1100/include/mach/mcp.h b/arch/arm/mach-sa1100/include/mach/mcp.h
index ed1a331..586cec8 100644
--- a/arch/arm/mach-sa1100/include/mach/mcp.h
+++ b/arch/arm/mach-sa1100/include/mach/mcp.h

@@ -17,6 +17,8 @@
 	u32 mccr1;
 	unsigned int sclk_rate;
 	int gpio_base;
+	const char *codec;
+	void *codec_pdata;
 };
 
 #endif

diff --git a/arch/arm/mach-sa1100/lart.c b/arch/arm/mach-sa1100/lart.c
index af4e276..d117cea 100644
--- a/arch/arm/mach-sa1100/lart.c
+++ b/arch/arm/mach-sa1100/lart.c

@@ -24,10 +24,20 @@
 static struct mcp_plat_data lart_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
+	.codec		= "ucb1x00",
 };
 
 static void __init lart_init(void)
 {
+	/*
+	 * Setup the PPC unit correctly.
+	 */
+	PPDR &= ~PPC_RXD4;
+	PPDR |= PPC_TXD4 | PPC_SCLK | PPC_SFRM;
+	PSDR |= PPC_RXD4;
+	PSDR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM);
+	PPSR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM);
+
 	sa11x0_register_mcp(&lart_mcp_data);
 }
 

diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c
index 318b2b7..748d344 100644
--- a/arch/arm/mach-sa1100/shannon.c
+++ b/arch/arm/mach-sa1100/shannon.c

@@ -55,11 +55,22 @@
 static struct mcp_plat_data shannon_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
+	.codec		= "ucb1x00",
 };
 
 static void __init shannon_init(void)
 {
 	sa11x0_register_mtd(&shannon_flash_data, &shannon_flash_resource, 1);
+
+	/*
+	 * Setup the PPC unit correctly.
+	 */
+	PPDR &= ~PPC_RXD4;
+	PPDR |= PPC_TXD4 | PPC_SCLK | PPC_SFRM;
+	PSDR |= PPC_RXD4;
+	PSDR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM);
+	PPSR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM);
+
 	sa11x0_register_mcp(&shannon_mcp_data);
 }
 

diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c
index e17c04d..458ecec 100644
--- a/arch/arm/mach-sa1100/simpad.c
+++ b/arch/arm/mach-sa1100/simpad.c

@@ -14,6 +14,7 @@
 #include <linux/mtd/partitions.h>
 #include <linux/io.h>
 #include <linux/gpio.h>
+#include <linux/mfd/ucb1x00.h>
 
 #include <asm/irq.h>
 #include <mach/hardware.h>
@@ -187,10 +188,15 @@
 	}
 };
 
+static struct ucb1x00_plat_data simpad_ucb1x00_data = {
+	.gpio_base	= SIMPAD_UCB1X00_GPIO_BASE,
+};
+
 static struct mcp_plat_data simpad_mcp_data = {
 	.mccr0		= MCCR0_ADM,
 	.sclk_rate	= 11981000,
-	.gpio_base	= SIMPAD_UCB1X00_GPIO_BASE,
+	.codec		= "ucb1300",
+	.codec_pdata	= &simpad_ucb1x00_data,
 };
 
 
@@ -378,6 +384,16 @@
 
 	sa11x0_register_mtd(&simpad_flash_data, simpad_flash_resources,
 			      ARRAY_SIZE(simpad_flash_resources));
+
+	/*
+	 * Setup the PPC unit correctly.
+	 */
+	PPDR &= ~PPC_RXD4;
+	PPDR |= PPC_TXD4 | PPC_SCLK | PPC_SFRM;
+	PSDR |= PPC_RXD4;
+	PSDR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM);
+	PPSR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM);
+
 	sa11x0_register_mcp(&simpad_mcp_data);
 
 	ret = platform_add_devices(devices, ARRAY_SIZE(devices));

diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c
index 9361a52..5c00712 100644
--- a/arch/arm/mach-ux500/board-mop500.c
+++ b/arch/arm/mach-ux500/board-mop500.c

@@ -19,11 +19,11 @@
 #include <linux/amba/pl022.h>
 #include <linux/amba/serial.h>
 #include <linux/spi/spi.h>
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 #include <linux/regulator/ab8500.h>
 #include <linux/mfd/tc3589x.h>
 #include <linux/mfd/tps6105x.h>
-#include <linux/mfd/ab8500/gpio.h>
+#include <linux/mfd/abx500/ab8500-gpio.h>
 #include <linux/leds-lp5521.h>
 #include <linux/input.h>
 #include <linux/smsc911x.h>

diff --git a/arch/arm/mach-ux500/board-u5500.c b/arch/arm/mach-ux500/board-u5500.c
index fe1569b..9de9e9c 100644
--- a/arch/arm/mach-ux500/board-u5500.c
+++ b/arch/arm/mach-ux500/board-u5500.c

@@ -10,7 +10,7 @@
 #include <linux/amba/bus.h>
 #include <linux/irq.h>
 #include <linux/i2c.h>
-#include <linux/mfd/ab5500/ab5500.h>
+#include <linux/mfd/abx500/ab5500.h>
 
 #include <asm/hardware/gic.h>
 #include <asm/mach/arch.h>

diff --git a/arch/arm/mach-ux500/include/mach/irqs-board-mop500.h b/arch/arm/mach-ux500/include/mach/irqs-board-mop500.h
index 4796990..d2d4131 100644
--- a/arch/arm/mach-ux500/include/mach/irqs-board-mop500.h
+++ b/arch/arm/mach-ux500/include/mach/irqs-board-mop500.h

@@ -9,7 +9,7 @@
 #define __MACH_IRQS_BOARD_MOP500_H
 
 /* Number of AB8500 irqs is taken from header file */
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 
 #define MOP500_AB8500_IRQ_BASE		IRQ_BOARD_START
 #define MOP500_AB8500_IRQ_END		(MOP500_AB8500_IRQ_BASE \

diff --git a/arch/arm/plat-samsung/include/plat/sdhci.h b/arch/arm/plat-samsung/include/plat/sdhci.h
index 656dc00..f82f888 100644
--- a/arch/arm/plat-samsung/include/plat/sdhci.h
+++ b/arch/arm/plat-samsung/include/plat/sdhci.h

@@ -63,6 +63,7 @@
 struct s3c_sdhci_platdata {
 	unsigned int	max_width;
 	unsigned int	host_caps;
+	unsigned int	pm_caps;
 	enum cd_types	cd_type;
 	enum clk_types	clk_type;
 

diff --git a/arch/arm/plat-samsung/platformdata.c b/arch/arm/plat-samsung/platformdata.c
index ceb9fa3..0f70718 100644
--- a/arch/arm/plat-samsung/platformdata.c
+++ b/arch/arm/plat-samsung/platformdata.c

@@ -53,6 +53,8 @@
 		set->cfg_gpio = pd->cfg_gpio;
 	if (pd->host_caps)
 		set->host_caps |= pd->host_caps;
+	if (pd->pm_caps)
+		set->pm_caps |= pd->pm_caps;
 	if (pd->clk_type)
 		set->clk_type = pd->clk_type;
 }

diff --git a/arch/avr32/include/asm/system.h b/arch/avr32/include/asm/system.h
index 9702c221..62d9ded 100644
--- a/arch/avr32/include/asm/system.h
+++ b/arch/avr32/include/asm/system.h

@@ -169,7 +169,7 @@
 #define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
 
 struct pt_regs;
-void NORET_TYPE die(const char *str, struct pt_regs *regs, long err);
+void die(const char *str, struct pt_regs *regs, long err);
 void _exception(long signr, struct pt_regs *regs, int code,
 		unsigned long addr);
 

diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c
index 7aa2575..3d760c0 100644
--- a/arch/avr32/kernel/traps.c
+++ b/arch/avr32/kernel/traps.c

@@ -24,7 +24,7 @@
 
 static DEFINE_SPINLOCK(die_lock);
 
-void NORET_TYPE die(const char *str, struct pt_regs *regs, long err)
+void die(const char *str, struct pt_regs *regs, long err)
 {
 	static int die_counter;
 

diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index d9f397f..691be0b 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h

@@ -309,7 +309,6 @@
 }
 
 #define start_thread(regs,new_ip,new_sp) do {							\
-	set_fs(USER_DS);									\
 	regs->cr_ipsr = ((regs->cr_ipsr | (IA64_PSR_BITS_TO_SET | IA64_PSR_CPL))		\
 			 & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS));		\
 	regs->cr_iip = new_ip;									\

diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index 7617248..7a3bd25 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h

@@ -323,11 +323,12 @@
 #define __NR_sendmmsg			1331
 #define __NR_process_vm_readv		1332
 #define __NR_process_vm_writev		1333
+#define __NR_accept4			1334
 
 #ifdef __KERNEL__
 
 
-#define NR_syscalls			310 /* length of syscall table */
+#define NR_syscalls			311 /* length of syscall table */
 
 /*
  * The following defines stop scripts/checksyscalls.sh from complaining about

diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 5b31d46..1ccbe12 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S

@@ -1779,6 +1779,7 @@
 	data8 sys_sendmmsg
 	data8 sys_process_vm_readv
 	data8 sys_process_vm_writev
+	data8 sys_accept4
 
 	.org sys_call_table + 8*NR_syscalls	// guard against failures to increase NR_syscalls
 #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */

diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
index 3d3aeef..4eed358 100644
--- a/arch/ia64/kernel/machine_kexec.c
+++ b/arch/ia64/kernel/machine_kexec.c

@@ -27,11 +27,11 @@
 #include <asm/sal.h>
 #include <asm/mca.h>
 
-typedef NORET_TYPE void (*relocate_new_kernel_t)(
+typedef void (*relocate_new_kernel_t)(
 					unsigned long indirection_page,
 					unsigned long start_address,
 					struct ia64_boot_param *boot_param,
-					unsigned long pal_addr) ATTRIB_NORET;
+					unsigned long pal_addr) __noreturn;
 
 struct kimage *ia64_kimage;
 

diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index 82a4bb5..b95a451 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c

@@ -511,8 +511,7 @@
 	return ticks + offset;
 }
 
-static NORET_TYPE void amiga_reset(void)
-    ATTRIB_NORET;
+static void amiga_reset(void)  __noreturn;
 
 static void amiga_reset(void)
 {

diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
index de39b1f..7b99c67 100644
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h

@@ -144,7 +144,7 @@
 extern asmlinkage void syscall_trace_enter(struct pt_regs *regs);
 extern asmlinkage void syscall_trace_leave(struct pt_regs *regs);
 
-extern NORET_TYPE void die(const char *, struct pt_regs *) ATTRIB_NORET;
+extern void die(const char *, struct pt_regs *) __noreturn;
 
 static inline void die_if_kernel(const char *str, struct pt_regs *regs)
 {

diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 5c8a49d..bbddb86 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c

@@ -1340,7 +1340,7 @@
 /*
  * NMI exception handler.
  */
-NORET_TYPE void ATTRIB_NORET nmi_exception_handler(struct pt_regs *regs)
+void __noreturn nmi_exception_handler(struct pt_regs *regs)
 {
 	bust_spinlocks(1);
 	printk("NMI taken!!!!\n");

diff --git a/arch/mn10300/include/asm/exceptions.h b/arch/mn10300/include/asm/exceptions.h
index ca3e205..95a4d42 100644
--- a/arch/mn10300/include/asm/exceptions.h
+++ b/arch/mn10300/include/asm/exceptions.h

@@ -110,7 +110,7 @@
 extern asmlinkage void misalignment(struct pt_regs *, enum exception_code);
 
 extern void die(const char *, struct pt_regs *, enum exception_code)
-	ATTRIB_NORET;
+	__noreturn;
 
 extern int die_if_no_fixup(const char *, struct pt_regs *, enum exception_code);
 

diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index 9ce66e9..7213ec9 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h

@@ -196,7 +196,6 @@
 	/* offset pc for priv. level */			\
 	pc |= 3;					\
 							\
-	set_fs(USER_DS);				\
 	regs->iasq[0] = spaceid;			\
 	regs->iasq[1] = spaceid;			\
 	regs->iaoq[0] = pc;				\
@@ -299,7 +298,6 @@
 	elf_addr_t pc = (elf_addr_t)new_pc | 3;		\
 	elf_caddr_t *argv = (elf_caddr_t *)bprm->exec + 1;	\
 							\
-	set_fs(USER_DS);				\
 	regs->iasq[0] = spaceid;			\
 	regs->iasq[1] = spaceid;			\
 	regs->iaoq[0] = pc;				\

diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 4b4b918..62c60b8 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c

@@ -192,7 +192,6 @@
 	/* Only needs to handle fpu stuff or perf monitors.
 	** REVISIT: several arches implement a "lazy fpu state".
 	*/
-	set_fs(USER_DS);
 }
 
 void release_thread(struct task_struct *dead_task)

diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c
index e63f2e7..affe5dc 100644
--- a/arch/powerpc/kernel/machine_kexec_32.c
+++ b/arch/powerpc/kernel/machine_kexec_32.c

@@ -16,10 +16,10 @@
 #include <asm/hw_irq.h>
 #include <asm/io.h>
 
-typedef NORET_TYPE void (*relocate_new_kernel_t)(
+typedef void (*relocate_new_kernel_t)(
 				unsigned long indirection_page,
 				unsigned long reboot_code_buffer,
-				unsigned long start_address) ATTRIB_NORET;
+				unsigned long start_address) __noreturn;
 
 /*
  * This is a generic machine_kexec function suitable at least for

diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 26ccbf7..d7f6090 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c

@@ -307,9 +307,9 @@
 struct paca_struct kexec_paca;
 
 /* Our assembly helper, in kexec_stub.S */
-extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
-					void *image, void *control,
-					void (*clear_all)(void)) ATTRIB_NORET;
+extern void kexec_sequence(void *newstack, unsigned long start,
+			   void *image, void *control,
+			   void (*clear_all)(void)) __noreturn;
 
 /* too late to fail here */
 void default_machine_kexec(struct kimage *image)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 4ff3d8e..3feefc3 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c

@@ -58,7 +58,7 @@
  * Allocate node_to_cpumask_map based on number of available nodes
  * Requires node_possible_map to be valid.
  *
- * Note: node_to_cpumask() is not valid until after this is done.
+ * Note: cpumask_of_node() is not valid until after this is done.
  */
 static void __init setup_node_to_cpumask_map(void)
 {

diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 330a57b..36f957f 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c

@@ -638,7 +638,6 @@
 		/* These are almost always orderly shutdowns. */
 		return;
 	case KMSG_DUMP_OOPS:
-	case KMSG_DUMP_KEXEC:
 		break;
 	case KMSG_DUMP_PANIC:
 		panicking = true;

diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 27272f6..d25843a 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h

@@ -236,7 +236,7 @@
 /*
  * Function to drop a processor into disabled wait state
  */
-static inline void ATTRIB_NORET disabled_wait(unsigned long code)
+static inline void __noreturn disabled_wait(unsigned long code)
 {
         unsigned long ctl_buf;
         psw_t dw_psw;

diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index fab8843..0fd2e86 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c

@@ -30,7 +30,7 @@
 
 static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
 
-static NORET_TYPE void s390_handle_damage(char *msg)
+static void s390_handle_damage(char *msg)
 {
 	smp_send_stop();
 	disabled_wait((unsigned long) __builtin_return_address(0));

diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index aaf6d59..7ec6651 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c

@@ -70,7 +70,7 @@
 /*
  * Create a kernel thread
  */
-ATTRIB_NORET void kernel_thread_helper(void *arg, int (*fn)(void *))
+__noreturn void kernel_thread_helper(void *arg, int (*fn)(void *))
 {
 	do_exit(fn(arg));
 }

diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index 210c1ca..cbd4e4b 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c

@@ -285,7 +285,7 @@
 /*
  * Create a kernel thread
  */
-ATTRIB_NORET void kernel_thread_helper(void *arg, int (*fn)(void *))
+__noreturn void kernel_thread_helper(void *arg, int (*fn)(void *))
 {
 	do_exit(fn(arg));
 }

diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c
index e00d717..6255f2e 100644
--- a/arch/tile/kernel/machine_kexec.c
+++ b/arch/tile/kernel/machine_kexec.c

@@ -248,11 +248,11 @@
 }
 
 
-NORET_TYPE void machine_kexec(struct kimage *image)
+void machine_kexec(struct kimage *image)
 {
 	void *reboot_code_buffer;
-	NORET_TYPE void (*rnk)(unsigned long, void *, unsigned long)
-		ATTRIB_NORET;
+	void (*rnk)(unsigned long, void *, unsigned long)
+		__noreturn;
 
 	/* Mask all interrupts before starting to reboot. */
 	interrupt_mask_set_mask(~0ULL);

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a150f4c..6c14ecd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig

@@ -60,6 +60,9 @@
 	select PERF_EVENTS
 	select HAVE_PERF_EVENTS_NMI
 	select ANON_INODES
+	select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386
+	select HAVE_CMPXCHG_LOCAL if !M386
+	select HAVE_CMPXCHG_DOUBLE
 	select HAVE_ARCH_KMEMCHECK
 	select HAVE_USER_RETURN_NOTIFIER
 	select ARCH_BINFMT_ELF_RANDOMIZE_PIE

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index e3ca7e0..3c57033 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu

@@ -309,12 +309,6 @@
 config X86_CMPXCHG
 	def_bool X86_64 || (X86_32 && !M386)
 
-config CMPXCHG_LOCAL
-	def_bool X86_64 || (X86_32 && !M386)
-
-config CMPXCHG_DOUBLE
-	def_bool y
-
 config X86_L1_CACHE_SHIFT
 	int
 	default "7" if MPENTIUM4 || MPSC

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 7404715..bc817cd 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h

@@ -40,8 +40,8 @@
 						*/
 	__u8			supervisor_stack[0];
 #endif
-	int			sig_on_uaccess_error:1;
-	int			uaccess_err:1;	/* uaccess failed */
+	unsigned int		sig_on_uaccess_error:1;
+	unsigned int		uaccess_err:1;	/* uaccess failed */
 };
 
 #define INIT_THREAD_INFO(tsk)			\

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index f22a9f7..29ba329 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c

@@ -2011,7 +2011,7 @@
 	if (!mce_available(&boot_cpu_data))
 		return -EIO;
 
-	memset(&dev->kobj, 0, sizeof(struct kobject));
+	memset(dev, 0, sizeof(struct device));
 	dev->id  = cpu;
 	dev->bus = &mce_subsys;
 

diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index cf4603b..642d880 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c

@@ -856,18 +856,23 @@
 }
 
 /*
- * With CONFIG_SPARSE_IRQ, interrupt descriptors are allocated as-needed, so
- * rather than set them in lguest_init_IRQ we are called here every time an
- * lguest device needs an interrupt.
- *
- * FIXME: irq_alloc_desc_at() can fail due to lack of memory, we should
- * pass that up!
+ * Interrupt descriptors are allocated as-needed, but low-numbered ones are
+ * reserved by the generic x86 code.  So we ignore irq_alloc_desc_at if it
+ * tells us the irq is already used: other errors (ie. ENOMEM) we take
+ * seriously.
  */
-void lguest_setup_irq(unsigned int irq)
+int lguest_setup_irq(unsigned int irq)
 {
-	irq_alloc_desc_at(irq, 0);
+	int err;
+
+	/* Returns -ve error or vector number. */
+	err = irq_alloc_desc_at(irq, 0);
+	if (err < 0 && err != -EEXIST)
+		return err;
+
 	irq_set_chip_and_handler_name(irq, &lguest_irq_controller,
 				      handle_level_irq, "level");
+	return 0;
 }
 
 /*

diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 020cd2e..19d3fa0 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c

@@ -110,7 +110,7 @@
  * Allocate node_to_cpumask_map based on number of available nodes
  * Requires node_possible_map to be valid.
  *
- * Note: node_to_cpumask() is not valid until after this is done.
+ * Note: cpumask_of_node() is not valid until after this is done.
  * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
  */
 void __init setup_node_to_cpumask_map(void)

diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 1d97bd8..b2b54d2 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig

@@ -6,14 +6,6 @@
 
 menu "Host processor type and features"
 
-config CMPXCHG_LOCAL
-	bool
-	default n
-
-config CMPXCHG_DOUBLE
-	bool
-	default n
-
 source "arch/x86/Kconfig.cpu"
 
 endmenu

diff --git a/drivers/Makefile b/drivers/Makefile
index 1b31421..c07be02 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile

@@ -97,7 +97,7 @@
 obj-y				+= lguest/
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_CPU_IDLE)		+= cpuidle/
-obj-$(CONFIG_MMC)		+= mmc/
+obj-y				+= mmc/
 obj-$(CONFIG_MEMSTICK)		+= memstick/
 obj-y				+= leds/
 obj-$(CONFIG_INFINIBAND)	+= infiniband/

diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index fcbec8a..7be9f79 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig

@@ -179,7 +179,7 @@
 source "drivers/base/regmap/Kconfig"
 
 config DMA_SHARED_BUFFER
-	bool "Buffer framework to be shared between drivers"
+	bool
 	default n
 	select ANON_INODES
 	depends on EXPERIMENTAL

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index f17e3ea..ed5de58 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c

@@ -295,11 +295,22 @@
 
 	ret = memory_block_action(mem->start_section_nr, to_state);
 
-	if (ret)
+	if (ret) {
 		mem->state = from_state_req;
-	else
-		mem->state = to_state;
+		goto out;
+	}
 
+	mem->state = to_state;
+	switch (mem->state) {
+	case MEM_OFFLINE:
+		kobject_uevent(&mem->dev.kobj, KOBJ_OFFLINE);
+		break;
+	case MEM_ONLINE:
+		kobject_uevent(&mem->dev.kobj, KOBJ_ONLINE);
+		break;
+	default:
+		break;
+	}
 out:
 	mutex_unlock(&mem->state_mutex);
 	return ret;

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 148ab94..3fd31de 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c

@@ -2184,6 +2184,8 @@
 	INIT_LIST_HEAD(&rbd_dev->node);
 	INIT_LIST_HEAD(&rbd_dev->snaps);
 
+	init_rwsem(&rbd_dev->header.snap_rwsem);
+
 	/* generate unique id: find highest unique id, add one */
 	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
 

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 4d0b70a..ffd5ca9 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c

@@ -4,6 +4,7 @@
 #include <linux/blkdev.h>
 #include <linux/hdreg.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/virtio.h>
 #include <linux/virtio_blk.h>
 #include <linux/scatterlist.h>
@@ -36,6 +37,12 @@
 	/* Process context for config space updates */
 	struct work_struct config_work;
 
+	/* Lock for config space updates */
+	struct mutex config_lock;
+
+	/* enable config space updates */
+	bool config_enable;
+
 	/* What host tells us, plus 2 for header & tailer. */
 	unsigned int sg_elems;
 
@@ -172,7 +179,7 @@
 		}
 	}
 
-	if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) {
+	if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr, GFP_ATOMIC)<0) {
 		mempool_free(vbr, vblk->pool);
 		return false;
 	}
@@ -318,6 +325,10 @@
 	char cap_str_2[10], cap_str_10[10];
 	u64 capacity, size;
 
+	mutex_lock(&vblk->config_lock);
+	if (!vblk->config_enable)
+		goto done;
+
 	/* Host must always specify the capacity. */
 	vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
 			  &capacity, sizeof(capacity));
@@ -340,6 +351,8 @@
 		  cap_str_10, cap_str_2);
 
 	set_capacity(vblk->disk, capacity);
+done:
+	mutex_unlock(&vblk->config_lock);
 }
 
 static void virtblk_config_changed(struct virtio_device *vdev)
@@ -349,6 +362,18 @@
 	queue_work(virtblk_wq, &vblk->config_work);
 }
 
+static int init_vq(struct virtio_blk *vblk)
+{
+	int err = 0;
+
+	/* We expect one virtqueue, for output. */
+	vblk->vq = virtio_find_single_vq(vblk->vdev, blk_done, "requests");
+	if (IS_ERR(vblk->vq))
+		err = PTR_ERR(vblk->vq);
+
+	return err;
+}
+
 static int __devinit virtblk_probe(struct virtio_device *vdev)
 {
 	struct virtio_blk *vblk;
@@ -388,14 +413,13 @@
 	vblk->vdev = vdev;
 	vblk->sg_elems = sg_elems;
 	sg_init_table(vblk->sg, vblk->sg_elems);
+	mutex_init(&vblk->config_lock);
 	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
+	vblk->config_enable = true;
 
-	/* We expect one virtqueue, for output. */
-	vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests");
-	if (IS_ERR(vblk->vq)) {
-		err = PTR_ERR(vblk->vq);
+	err = init_vq(vblk);
+	if (err)
 		goto out_free_vblk;
-	}
 
 	vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req));
 	if (!vblk->pool) {
@@ -542,7 +566,10 @@
 	struct virtio_blk *vblk = vdev->priv;
 	int index = vblk->index;
 
-	flush_work(&vblk->config_work);
+	/* Prevent config work handler from accessing the device. */
+	mutex_lock(&vblk->config_lock);
+	vblk->config_enable = false;
+	mutex_unlock(&vblk->config_lock);
 
 	/* Nothing should be pending. */
 	BUG_ON(!list_empty(&vblk->reqs));
@@ -550,6 +577,8 @@
 	/* Stop all the virtqueues. */
 	vdev->config->reset(vdev);
 
+	flush_work(&vblk->config_work);
+
 	del_gendisk(vblk->disk);
 	blk_cleanup_queue(vblk->disk->queue);
 	put_disk(vblk->disk);
@@ -559,6 +588,46 @@
 	ida_simple_remove(&vd_index_ida, index);
 }
 
+#ifdef CONFIG_PM
+static int virtblk_freeze(struct virtio_device *vdev)
+{
+	struct virtio_blk *vblk = vdev->priv;
+
+	/* Ensure we don't receive any more interrupts */
+	vdev->config->reset(vdev);
+
+	/* Prevent config work handler from accessing the device. */
+	mutex_lock(&vblk->config_lock);
+	vblk->config_enable = false;
+	mutex_unlock(&vblk->config_lock);
+
+	flush_work(&vblk->config_work);
+
+	spin_lock_irq(vblk->disk->queue->queue_lock);
+	blk_stop_queue(vblk->disk->queue);
+	spin_unlock_irq(vblk->disk->queue->queue_lock);
+	blk_sync_queue(vblk->disk->queue);
+
+	vdev->config->del_vqs(vdev);
+	return 0;
+}
+
+static int virtblk_restore(struct virtio_device *vdev)
+{
+	struct virtio_blk *vblk = vdev->priv;
+	int ret;
+
+	vblk->config_enable = true;
+	ret = init_vq(vdev->priv);
+	if (!ret) {
+		spin_lock_irq(vblk->disk->queue->queue_lock);
+		blk_start_queue(vblk->disk->queue);
+		spin_unlock_irq(vblk->disk->queue->queue_lock);
+	}
+	return ret;
+}
+#endif
+
 static const struct virtio_device_id id_table[] = {
 	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
 	{ 0 },
@@ -584,6 +653,10 @@
 	.probe			= virtblk_probe,
 	.remove			= __devexit_p(virtblk_remove),
 	.config_changed		= virtblk_config_changed,
+#ifdef CONFIG_PM
+	.freeze			= virtblk_freeze,
+	.restore		= virtblk_restore,
+#endif
 };
 
 static int __init init(void)

diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index fd699cc..723725b 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c

@@ -47,7 +47,7 @@
 	sg_init_one(&sg, buf, size);
 
 	/* There should always be room for one buffer. */
-	if (virtqueue_add_buf(vq, &sg, 0, 1, buf) < 0)
+	if (virtqueue_add_buf(vq, &sg, 0, 1, buf, GFP_KERNEL) < 0)
 		BUG();
 
 	virtqueue_kick(vq);

diff --git a/drivers/char/ramoops.c b/drivers/char/ramoops.c
index 7c7f42a1f8..9fec323 100644
--- a/drivers/char/ramoops.c
+++ b/drivers/char/ramoops.c

@@ -83,8 +83,7 @@
 	struct timeval timestamp;
 
 	if (reason != KMSG_DUMP_OOPS &&
-	    reason != KMSG_DUMP_PANIC &&
-	    reason != KMSG_DUMP_KEXEC)
+	    reason != KMSG_DUMP_PANIC)
 		return;
 
 	/* Only dump oopses if dump_oops is set */
@@ -126,8 +125,8 @@
 		goto fail3;
 	}
 
-	rounddown_pow_of_two(pdata->mem_size);
-	rounddown_pow_of_two(pdata->record_size);
+	pdata->mem_size = rounddown_pow_of_two(pdata->mem_size);
+	pdata->record_size = rounddown_pow_of_two(pdata->record_size);
 
 	/* Check for the minimum memory size */
 	if (pdata->mem_size < MIN_MEM_SIZE &&
@@ -148,14 +147,6 @@
 	cxt->phys_addr = pdata->mem_address;
 	cxt->record_size = pdata->record_size;
 	cxt->dump_oops = pdata->dump_oops;
-	/*
-	 * Update the module parameter variables as well so they are visible
-	 * through /sys/module/ramoops/parameters/
-	 */
-	mem_size = pdata->mem_size;
-	mem_address = pdata->mem_address;
-	record_size = pdata->record_size;
-	dump_oops = pdata->dump_oops;
 
 	if (!request_mem_region(cxt->phys_addr, cxt->size, "ramoops")) {
 		pr_err("request mem region failed\n");
@@ -176,6 +167,15 @@
 		goto fail1;
 	}
 
+	/*
+	 * Update the module parameter variables as well so they are visible
+	 * through /sys/module/ramoops/parameters/
+	 */
+	mem_size = pdata->mem_size;
+	mem_address = pdata->mem_address;
+	record_size = pdata->record_size;
+	dump_oops = pdata->dump_oops;
+
 	return 0;
 
 fail1:

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 8e3c46d..b58b561 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c

@@ -392,7 +392,7 @@
 
 	sg_init_one(sg, buf->buf, buf->size);
 
-	ret = virtqueue_add_buf(vq, sg, 0, 1, buf);
+	ret = virtqueue_add_buf(vq, sg, 0, 1, buf, GFP_ATOMIC);
 	virtqueue_kick(vq);
 	return ret;
 }
@@ -457,7 +457,7 @@
 	vq = portdev->c_ovq;
 
 	sg_init_one(sg, &cpkt, sizeof(cpkt));
-	if (virtqueue_add_buf(vq, sg, 1, 0, &cpkt) >= 0) {
+	if (virtqueue_add_buf(vq, sg, 1, 0, &cpkt, GFP_ATOMIC) >= 0) {
 		virtqueue_kick(vq);
 		while (!virtqueue_get_buf(vq, &len))
 			cpu_relax();
@@ -506,7 +506,7 @@
 	reclaim_consumed_buffers(port);
 
 	sg_init_one(sg, in_buf, in_count);
-	ret = virtqueue_add_buf(out_vq, sg, 1, 0, in_buf);
+	ret = virtqueue_add_buf(out_vq, sg, 1, 0, in_buf, GFP_ATOMIC);
 
 	/* Tell Host to go! */
 	virtqueue_kick(out_vq);
@@ -1271,6 +1271,20 @@
 	kfree(port);
 }
 
+static void remove_port_data(struct port *port)
+{
+	struct port_buffer *buf;
+
+	/* Remove unused data this port might have received. */
+	discard_port_data(port);
+
+	reclaim_consumed_buffers(port);
+
+	/* Remove buffers we queued up for the Host to send us data in. */
+	while ((buf = virtqueue_detach_unused_buf(port->in_vq)))
+		free_buf(buf);
+}
+
 /*
  * Port got unplugged.  Remove port from portdev's list and drop the
  * kref reference.  If no userspace has this port opened, it will
@@ -1278,8 +1292,6 @@
  */
 static void unplug_port(struct port *port)
 {
-	struct port_buffer *buf;
-
 	spin_lock_irq(&port->portdev->ports_lock);
 	list_del(&port->list);
 	spin_unlock_irq(&port->portdev->ports_lock);
@@ -1300,14 +1312,7 @@
 		hvc_remove(port->cons.hvc);
 	}
 
-	/* Remove unused data this port might have received. */
-	discard_port_data(port);
-
-	reclaim_consumed_buffers(port);
-
-	/* Remove buffers we queued up for the Host to send us data in. */
-	while ((buf = virtqueue_detach_unused_buf(port->in_vq)))
-		free_buf(buf);
+	remove_port_data(port);
 
 	/*
 	 * We should just assume the device itself has gone off --
@@ -1659,6 +1664,28 @@
 	.owner = THIS_MODULE,
 };
 
+static void remove_vqs(struct ports_device *portdev)
+{
+	portdev->vdev->config->del_vqs(portdev->vdev);
+	kfree(portdev->in_vqs);
+	kfree(portdev->out_vqs);
+}
+
+static void remove_controlq_data(struct ports_device *portdev)
+{
+	struct port_buffer *buf;
+	unsigned int len;
+
+	if (!use_multiport(portdev))
+		return;
+
+	while ((buf = virtqueue_get_buf(portdev->c_ivq, &len)))
+		free_buf(buf);
+
+	while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq)))
+		free_buf(buf);
+}
+
 /*
  * Once we're further in boot, we get probed like any other virtio
  * device.
@@ -1764,9 +1791,7 @@
 	/* The host might want to notify mgmt sw about device add failure */
 	__send_control_msg(portdev, VIRTIO_CONSOLE_BAD_ID,
 			   VIRTIO_CONSOLE_DEVICE_READY, 0);
-	vdev->config->del_vqs(vdev);
-	kfree(portdev->in_vqs);
-	kfree(portdev->out_vqs);
+	remove_vqs(portdev);
 free_chrdev:
 	unregister_chrdev(portdev->chr_major, "virtio-portsdev");
 free:
@@ -1804,21 +1829,8 @@
 	 * have to just stop using the port, as the vqs are going
 	 * away.
 	 */
-	if (use_multiport(portdev)) {
-		struct port_buffer *buf;
-		unsigned int len;
-
-		while ((buf = virtqueue_get_buf(portdev->c_ivq, &len)))
-			free_buf(buf);
-
-		while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq)))
-			free_buf(buf);
-	}
-
-	vdev->config->del_vqs(vdev);
-	kfree(portdev->in_vqs);
-	kfree(portdev->out_vqs);
-
+	remove_controlq_data(portdev);
+	remove_vqs(portdev);
 	kfree(portdev);
 }
 
@@ -1832,6 +1844,68 @@
 	VIRTIO_CONSOLE_F_MULTIPORT,
 };
 
+#ifdef CONFIG_PM
+static int virtcons_freeze(struct virtio_device *vdev)
+{
+	struct ports_device *portdev;
+	struct port *port;
+
+	portdev = vdev->priv;
+
+	vdev->config->reset(vdev);
+
+	virtqueue_disable_cb(portdev->c_ivq);
+	cancel_work_sync(&portdev->control_work);
+	/*
+	 * Once more: if control_work_handler() was running, it would
+	 * enable the cb as the last step.
+	 */
+	virtqueue_disable_cb(portdev->c_ivq);
+	remove_controlq_data(portdev);
+
+	list_for_each_entry(port, &portdev->ports, list) {
+		virtqueue_disable_cb(port->in_vq);
+		virtqueue_disable_cb(port->out_vq);
+		/*
+		 * We'll ask the host later if the new invocation has
+		 * the port opened or closed.
+		 */
+		port->host_connected = false;
+		remove_port_data(port);
+	}
+	remove_vqs(portdev);
+
+	return 0;
+}
+
+static int virtcons_restore(struct virtio_device *vdev)
+{
+	struct ports_device *portdev;
+	struct port *port;
+	int ret;
+
+	portdev = vdev->priv;
+
+	ret = init_vqs(portdev);
+	if (ret)
+		return ret;
+
+	if (use_multiport(portdev))
+		fill_queue(portdev->c_ivq, &portdev->cvq_lock);
+
+	list_for_each_entry(port, &portdev->ports, list) {
+		port->in_vq = portdev->in_vqs[port->id];
+		port->out_vq = portdev->out_vqs[port->id];
+
+		fill_queue(port->in_vq, &port->inbuf_lock);
+
+		/* Get port open/close status on the host */
+		send_control_msg(port, VIRTIO_CONSOLE_PORT_READY, 1);
+	}
+	return 0;
+}
+#endif
+
 static struct virtio_driver virtio_console = {
 	.feature_table = features,
 	.feature_table_size = ARRAY_SIZE(features),
@@ -1841,6 +1915,10 @@
 	.probe =	virtcons_probe,
 	.remove =	virtcons_remove,
 	.config_changed = config_intr,
+#ifdef CONFIG_PM
+	.freeze =	virtcons_freeze,
+	.restore =	virtcons_restore,
+#endif
 };
 
 static int __init init(void)

diff --git a/drivers/gpio/gpio-stmpe.c b/drivers/gpio/gpio-stmpe.c
index 4c980b5..87a68a8 100644
--- a/drivers/gpio/gpio-stmpe.c
+++ b/drivers/gpio/gpio-stmpe.c

@@ -65,7 +65,14 @@
 	u8 reg = stmpe->regs[which] - (offset / 8);
 	u8 mask = 1 << (offset % 8);
 
-	stmpe_reg_write(stmpe, reg, mask);
+	/*
+	 * Some variants have single register for gpio set/clear functionality.
+	 * For them we need to write 0 to clear and 1 to set.
+	 */
+	if (stmpe->regs[STMPE_IDX_GPSR_LSB] == stmpe->regs[STMPE_IDX_GPCR_LSB])
+		stmpe_set_bits(stmpe, reg, mask, val ? mask : 0);
+	else
+		stmpe_reg_write(stmpe, reg, mask);
 }
 
 static int stmpe_gpio_direction_output(struct gpio_chip *chip,
@@ -132,6 +139,10 @@
 	if (type == IRQ_TYPE_LEVEL_LOW || type == IRQ_TYPE_LEVEL_HIGH)
 		return -EINVAL;
 
+	/* STMPE801 doesn't have RE and FE registers */
+	if (stmpe_gpio->stmpe->partnum == STMPE801)
+		return 0;
+
 	if (type == IRQ_TYPE_EDGE_RISING)
 		stmpe_gpio->regs[REG_RE][regoffset] |= mask;
 	else
@@ -165,6 +176,11 @@
 	int i, j;
 
 	for (i = 0; i < CACHE_NR_REGS; i++) {
+		/* STMPE801 doesn't have RE and FE registers */
+		if ((stmpe->partnum == STMPE801) &&
+				(i != REG_IE))
+			continue;
+
 		for (j = 0; j < num_banks; j++) {
 			u8 old = stmpe_gpio->oldregs[i][j];
 			u8 new = stmpe_gpio->regs[i][j];
@@ -241,8 +257,11 @@
 		}
 
 		stmpe_reg_write(stmpe, statmsbreg + i, status[i]);
-		stmpe_reg_write(stmpe, stmpe->regs[STMPE_IDX_GPEDR_MSB] + i,
-				status[i]);
+
+		/* Edge detect register is not present on 801 */
+		if (stmpe->partnum != STMPE801)
+			stmpe_reg_write(stmpe, stmpe->regs[STMPE_IDX_GPEDR_MSB]
+					+ i, status[i]);
 	}
 
 	return IRQ_HANDLED;

diff --git a/drivers/gpu/drm/gma500/cdv_intel_crt.c b/drivers/gpu/drm/gma500/cdv_intel_crt.c
index 6d0f10b..c100f3e9 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_crt.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_crt.c

@@ -66,6 +66,7 @@
 static int cdv_intel_crt_mode_valid(struct drm_connector *connector,
 				struct drm_display_mode *mode)
 {
+	struct drm_psb_private *dev_priv = connector->dev->dev_private;
 	int max_clock = 0;
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return MODE_NO_DBLESCAN;
@@ -82,6 +83,11 @@
 	if (mode->hdisplay > 1680 || mode->vdisplay > 1050)
 		return MODE_PANEL;
 
+	/* We assume worst case scenario of 32 bpp here, since we don't know */
+	if ((ALIGN(mode->hdisplay * 4, 64) * mode->vdisplay) >
+	    dev_priv->vram_stolen_size)
+		return MODE_MEM;
+
 	return MODE_OK;
 }
 

diff --git a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
index 50d7cfb..de25560 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c

@@ -241,6 +241,7 @@
 static int cdv_hdmi_mode_valid(struct drm_connector *connector,
 				 struct drm_display_mode *mode)
 {
+	struct drm_psb_private *dev_priv = connector->dev->dev_private;
 
 	if (mode->clock > 165000)
 		return MODE_CLOCK_HIGH;
@@ -255,14 +256,11 @@
 	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
 		return MODE_NO_INTERLACE;
 
-	/*
-	 * FIXME: for now we limit the size to 1680x1050 on CDV, otherwise it
-	 * will go beyond the stolen memory size allocated to the framebuffer
-	 */
-	if (mode->hdisplay > 1680)
-		return MODE_PANEL;
-	if (mode->vdisplay > 1050)
-		return MODE_PANEL;
+	/* We assume worst case scenario of 32 bpp here, since we don't know */
+	if ((ALIGN(mode->hdisplay * 4, 64) * mode->vdisplay) >
+	    dev_priv->vram_stolen_size)
+		return MODE_MEM;
+
 	return MODE_OK;
 }
 

diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi.c b/drivers/gpu/drm/gma500/oaktrail_hdmi.c
index 36878a6..025d309 100644
--- a/drivers/gpu/drm/gma500/oaktrail_hdmi.c
+++ b/drivers/gpu/drm/gma500/oaktrail_hdmi.c

@@ -506,6 +506,7 @@
 static int oaktrail_hdmi_mode_valid(struct drm_connector *connector,
 				struct drm_display_mode *mode)
 {
+	struct drm_psb_private *dev_priv = connector->dev->dev_private;
 	if (mode->clock > 165000)
 		return MODE_CLOCK_HIGH;
 	if (mode->clock < 20000)
@@ -514,6 +515,11 @@
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		return MODE_NO_DBLESCAN;
 
+	/* We assume worst case scenario of 32 bpp here, since we don't know */
+	if ((ALIGN(mode->hdisplay * 4, 64) * mode->vdisplay) >
+	    dev_priv->vram_stolen_size)
+		return MODE_MEM;
+
 	return MODE_OK;
 }
 

diff --git a/drivers/gpu/drm/gma500/psb_intel_sdvo.c b/drivers/gpu/drm/gma500/psb_intel_sdvo.c
index 4882b29..88b4297 100644
--- a/drivers/gpu/drm/gma500/psb_intel_sdvo.c
+++ b/drivers/gpu/drm/gma500/psb_intel_sdvo.c

@@ -1141,6 +1141,7 @@
 static int psb_intel_sdvo_mode_valid(struct drm_connector *connector,
 				 struct drm_display_mode *mode)
 {
+	struct drm_psb_private *dev_priv = connector->dev->dev_private;
 	struct psb_intel_sdvo *psb_intel_sdvo = intel_attached_sdvo(connector);
 
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
@@ -1160,6 +1161,11 @@
 			return MODE_PANEL;
 	}
 
+	/* We assume worst case scenario of 32 bpp here, since we don't know */
+	if ((ALIGN(mode->hdisplay * 4, 64) * mode->vdisplay) >
+	    dev_priv->vram_stolen_size)
+		return MODE_MEM;
+
 	return MODE_OK;
 }
 

diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index 525744d..7814a76 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c

@@ -18,12 +18,6 @@
 
 #include <linux/vga_switcheroo.h>
 
-#define NOUVEAU_DSM_SUPPORTED 0x00
-#define NOUVEAU_DSM_SUPPORTED_FUNCTIONS 0x00
-
-#define NOUVEAU_DSM_ACTIVE 0x01
-#define NOUVEAU_DSM_ACTIVE_QUERY 0x00
-
 #define NOUVEAU_DSM_LED 0x02
 #define NOUVEAU_DSM_LED_STATE 0x00
 #define NOUVEAU_DSM_LED_OFF 0x10
@@ -35,6 +29,9 @@
 #define NOUVEAU_DSM_POWER_SPEED 0x01
 #define NOUVEAU_DSM_POWER_STAMINA 0x02
 
+#define NOUVEAU_DSM_OPTIMUS_FN 0x1A
+#define NOUVEAU_DSM_OPTIMUS_ARGS 0x03000001
+
 static struct nouveau_dsm_priv {
 	bool dsm_detected;
 	bool optimus_detected;
@@ -61,7 +58,8 @@
 	struct acpi_object_list input;
 	union acpi_object params[4];
 	union acpi_object *obj;
-	int err;
+	int i, err;
+	char args_buff[4];
 
 	input.count = 4;
 	input.pointer = params;
@@ -73,7 +71,11 @@
 	params[2].type = ACPI_TYPE_INTEGER;
 	params[2].integer.value = func;
 	params[3].type = ACPI_TYPE_BUFFER;
-	params[3].buffer.length = 0;
+	params[3].buffer.length = 4;
+	/* ACPI is little endian, AABBCCDD becomes {DD,CC,BB,AA} */
+	for (i = 0; i < 4; i++)
+		args_buff[i] = (arg >> i * 8) & 0xFF;
+	params[3].buffer.pointer = args_buff;
 
 	err = acpi_evaluate_object(handle, "_DSM", &input, &output);
 	if (err) {
@@ -148,6 +150,23 @@
 	return 0;
 }
 
+/* Returns 1 if a DSM function is usable and 0 otherwise */
+static int nouveau_test_dsm(acpi_handle test_handle,
+	int (*dsm_func)(acpi_handle, int, int, uint32_t *),
+	int sfnc)
+{
+	u32 result = 0;
+
+	/* Function 0 returns a Buffer containing available functions. The args
+	 * parameter is ignored for function 0, so just put 0 in it */
+	if (dsm_func(test_handle, 0, 0, &result))
+		return 0;
+
+	/* ACPI Spec v4 9.14.1: if bit 0 is zero, no function is supported. If
+	 * the n-th bit is enabled, function n is supported */
+	return result & 1 && result & (1 << sfnc);
+}
+
 static int nouveau_dsm_switch_mux(acpi_handle handle, int mux_id)
 {
 	mxm_wmi_call_mxmx(mux_id == NOUVEAU_DSM_LED_STAMINA ? MXM_MXDS_ADAPTER_IGD : MXM_MXDS_ADAPTER_0);
@@ -168,6 +187,10 @@
 
 static int nouveau_dsm_switchto(enum vga_switcheroo_client_id id)
 {
+	/* perhaps the _DSM functions are mutually exclusive, but prepare for
+	 * the future */
+	if (!nouveau_dsm_priv.dsm_detected && nouveau_dsm_priv.optimus_detected)
+		return 0;
 	if (id == VGA_SWITCHEROO_IGD)
 		return nouveau_dsm_switch_mux(nouveau_dsm_priv.dhandle, NOUVEAU_DSM_LED_STAMINA);
 	else
@@ -180,6 +203,11 @@
 	if (id == VGA_SWITCHEROO_IGD)
 		return 0;
 
+	/* Optimus laptops have the card already disabled in
+	 * nouveau_switcheroo_set_state */
+	if (!nouveau_dsm_priv.dsm_detected && nouveau_dsm_priv.optimus_detected)
+		return 0;
+
 	return nouveau_dsm_set_discrete_state(nouveau_dsm_priv.dhandle, state);
 }
 
@@ -212,8 +240,7 @@
 {
 	acpi_handle dhandle, nvidia_handle;
 	acpi_status status;
-	int ret, retval = 0;
-	uint32_t result;
+	int retval = 0;
 
 	dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
 	if (!dhandle)
@@ -224,13 +251,11 @@
 		return false;
 	}
 
-	ret = nouveau_dsm(dhandle, NOUVEAU_DSM_SUPPORTED,
-			  NOUVEAU_DSM_SUPPORTED_FUNCTIONS, &result);
-	if (ret == 0)
+	if (nouveau_test_dsm(dhandle, nouveau_dsm, NOUVEAU_DSM_POWER))
 		retval |= NOUVEAU_DSM_HAS_MUX;
 
-	ret = nouveau_optimus_dsm(dhandle, 0, 0, &result);
-	if (ret == 0)
+	if (nouveau_test_dsm(dhandle, nouveau_optimus_dsm,
+		NOUVEAU_DSM_OPTIMUS_FN))
 		retval |= NOUVEAU_DSM_HAS_OPT;
 
 	if (retval)
@@ -269,15 +294,22 @@
 	}
 
 	if (vga_count == 2 && has_dsm && guid_valid) {
-		acpi_get_name(nouveau_dsm_priv.dhandle, ACPI_FULL_PATHNAME, &buffer);
+		acpi_get_name(nouveau_dsm_priv.dhandle, ACPI_FULL_PATHNAME,
+			&buffer);
 		printk(KERN_INFO "VGA switcheroo: detected DSM switching method %s handle\n",
-		       acpi_method_name);
+			acpi_method_name);
 		nouveau_dsm_priv.dsm_detected = true;
 		ret = true;
 	}
 
-	if (has_optimus == 1)
+	if (has_optimus == 1) {
+		acpi_get_name(nouveau_dsm_priv.dhandle, ACPI_FULL_PATHNAME,
+			&buffer);
+		printk(KERN_INFO "VGA switcheroo: detected Optimus DSM method %s handle\n",
+			acpi_method_name);
 		nouveau_dsm_priv.optimus_detected = true;
+		ret = true;
+	}
 
 	return ret;
 }
@@ -293,6 +325,17 @@
 	vga_switcheroo_register_handler(&nouveau_dsm_handler);
 }
 
+/* Must be called for Optimus models before the card can be turned off */
+void nouveau_switcheroo_optimus_dsm(void)
+{
+	u32 result = 0;
+	if (!nouveau_dsm_priv.optimus_detected)
+		return;
+
+	nouveau_optimus_dsm(nouveau_dsm_priv.dhandle, NOUVEAU_DSM_OPTIMUS_FN,
+		NOUVEAU_DSM_OPTIMUS_ARGS, &result);
+}
+
 void nouveau_unregister_dsm_handler(void)
 {
 	vga_switcheroo_unregister_handler();

diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 38134a9..b827098 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h

@@ -1055,12 +1055,14 @@
 #if defined(CONFIG_ACPI)
 void nouveau_register_dsm_handler(void);
 void nouveau_unregister_dsm_handler(void);
+void nouveau_switcheroo_optimus_dsm(void);
 int nouveau_acpi_get_bios_chunk(uint8_t *bios, int offset, int len);
 bool nouveau_acpi_rom_supported(struct pci_dev *pdev);
 int nouveau_acpi_edid(struct drm_device *, struct drm_connector *);
 #else
 static inline void nouveau_register_dsm_handler(void) {}
 static inline void nouveau_unregister_dsm_handler(void) {}
+static inline void nouveau_switcheroo_optimus_dsm(void) {}
 static inline bool nouveau_acpi_rom_supported(struct pci_dev *pdev) { return false; }
 static inline int nouveau_acpi_get_bios_chunk(uint8_t *bios, int offset, int len) { return -EINVAL; }
 static inline int nouveau_acpi_edid(struct drm_device *dev, struct drm_connector *connector) { return -EINVAL; }

diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index f5e9891..f80c5e0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c

@@ -525,6 +525,7 @@
 		printk(KERN_ERR "VGA switcheroo: switched nouveau off\n");
 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 		drm_kms_helper_poll_disable(dev);
+		nouveau_switcheroo_optimus_dsm();
 		nouveau_pci_suspend(pdev, pmm);
 		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
 	}

diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index f7442e6..8e8cd85 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c

@@ -1793,10 +1793,12 @@
 			ret = -EINVAL;
 			break;
 		case PACKET_TYPE2:
+			idx += 1;
 			break;
 		case PACKET_TYPE3:
 			pkt.opcode = CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
 			ret = evergreen_vm_packet3_check(rdev, ib->ptr, &pkt);
+			idx += pkt.count + 2;
 			break;
 		default:
 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
@@ -1805,7 +1807,6 @@
 		}
 		if (ret)
 			break;
-		idx += pkt.count + 2;
 	} while (idx < ib->length_dw);
 
 	return ret;

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 3ec81c3..bfd36ab 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c

@@ -2186,7 +2186,6 @@
 void r100_bm_disable(struct radeon_device *rdev)
 {
 	u32 tmp;
-	u16 tmp16;
 
 	/* disable bus mastering */
 	tmp = RREG32(R_000030_BUS_CNTL);
@@ -2197,8 +2196,7 @@
 	WREG32(R_000030_BUS_CNTL, (tmp & 0xFFFFFFFF) | 0x00000040);
 	tmp = RREG32(RADEON_BUS_CNTL);
 	mdelay(1);
-	pci_read_config_word(rdev->pdev, 0x4, &tmp16);
-	pci_write_config_word(rdev->pdev, 0x4, tmp16 & 0xFFFB);
+	pci_clear_master(rdev->pdev);
 	mdelay(1);
 }
 

diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 31da622..8032f1f 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c

@@ -145,7 +145,7 @@
 MODULE_PARM_DESC(agpmode, "AGP Mode (-1 == PCI)");
 module_param_named(agpmode, radeon_agpmode, int, 0444);
 
-MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32,64, etc)\n");
+MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32, 64, etc)");
 module_param_named(gartsize, radeon_gart_size, int, 0600);
 
 MODULE_PARM_DESC(benchmark, "Run benchmark");

diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 803e0d3..ec46eb4 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c

@@ -322,16 +322,6 @@
 	}
 }
 
-void rs600_bm_disable(struct radeon_device *rdev)
-{
-	u16 tmp;
-
-	/* disable bus mastering */
-	pci_read_config_word(rdev->pdev, 0x4, &tmp);
-	pci_write_config_word(rdev->pdev, 0x4, tmp & 0xFFFB);
-	mdelay(1);
-}
-
 int rs600_asic_reset(struct radeon_device *rdev)
 {
 	struct rv515_mc_save save;
@@ -355,7 +345,8 @@
 	WREG32(RADEON_CP_RB_CNTL, tmp);
 	pci_save_state(rdev->pdev);
 	/* disable bus mastering */
-	rs600_bm_disable(rdev);
+	pci_clear_master(rdev->pdev);
+	mdelay(1);
 	/* reset GA+VAP */
 	WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_VAP(1) |
 					S_0000F0_SOFT_RESET_GA(1));

diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
index 37ead69..0c46d8c 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c

@@ -952,10 +952,9 @@
 
 	type = ttm_to_type(ttm->page_flags, ttm->caching_state);
 	pool = ttm_dma_find_pool(dev, type);
-	if (!pool) {
-		WARN_ON(!pool);
+	if (!pool)
 		return;
-	}
+
 	is_cached = (ttm_dma_find_pool(pool->dev,
 		     ttm_to_type(ttm->page_flags, tt_cached)) == pool);
 

diff --git a/drivers/input/keyboard/amikbd.c b/drivers/input/keyboard/amikbd.c
index 6df5f6a..79172af 100644
--- a/drivers/input/keyboard/amikbd.c
+++ b/drivers/input/keyboard/amikbd.c

@@ -259,6 +259,19 @@
 		.owner	= THIS_MODULE,
 	},
 };
-module_platform_driver(amikbd_driver);
+
+static int __init amikbd_init(void)
+{
+	return platform_driver_probe(&amikbd_driver, amikbd_probe);
+}
+
+module_init(amikbd_init);
+
+static void __exit amikbd_exit(void)
+{
+	platform_driver_unregister(&amikbd_driver);
+}
+
+module_exit(amikbd_exit);
 
 MODULE_ALIAS("platform:amiga-keyboard");

diff --git a/drivers/input/keyboard/davinci_keyscan.c b/drivers/input/keyboard/davinci_keyscan.c
index 4698252..9d82b3a 100644
--- a/drivers/input/keyboard/davinci_keyscan.c
+++ b/drivers/input/keyboard/davinci_keyscan.c

@@ -328,7 +328,18 @@
 	},
 	.remove	= __devexit_p(davinci_ks_remove),
 };
-module_platform_driver(davinci_ks_driver);
+
+static int __init davinci_ks_init(void)
+{
+	return platform_driver_probe(&davinci_ks_driver, davinci_ks_probe);
+}
+module_init(davinci_ks_init);
+
+static void __exit davinci_ks_exit(void)
+{
+	platform_driver_unregister(&davinci_ks_driver);
+}
+module_exit(davinci_ks_exit);
 
 MODULE_AUTHOR("Miguel Aguilar");
 MODULE_DESCRIPTION("Texas Instruments DaVinci Key Scan Driver");

diff --git a/drivers/input/keyboard/nomadik-ske-keypad.c b/drivers/input/keyboard/nomadik-ske-keypad.c
index 5a71e55..e35566a 100644
--- a/drivers/input/keyboard/nomadik-ske-keypad.c
+++ b/drivers/input/keyboard/nomadik-ske-keypad.c

@@ -390,7 +390,18 @@
 	.probe = ske_keypad_probe,
 	.remove = __devexit_p(ske_keypad_remove),
 };
-module_platform_driver(ske_keypad_driver);
+
+static int __init ske_keypad_init(void)
+{
+	return platform_driver_probe(&ske_keypad_driver, ske_keypad_probe);
+}
+module_init(ske_keypad_init);
+
+static void __exit ske_keypad_exit(void)
+{
+	platform_driver_unregister(&ske_keypad_driver);
+}
+module_exit(ske_keypad_exit);
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Naveen Kumar <naveen.gaddipati@stericsson.com> / Sundar Iyer <sundar.iyer@stericsson.com>");

diff --git a/drivers/input/misc/ab8500-ponkey.c b/drivers/input/misc/ab8500-ponkey.c
index 79d9016..350fd0c 100644
--- a/drivers/input/misc/ab8500-ponkey.c
+++ b/drivers/input/misc/ab8500-ponkey.c

@@ -12,7 +12,7 @@
 #include <linux/platform_device.h>
 #include <linux/input.h>
 #include <linux/interrupt.h>
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 #include <linux/slab.h>
 
 /**

diff --git a/drivers/input/misc/twl4030-pwrbutton.c b/drivers/input/misc/twl4030-pwrbutton.c
index 19a6882..38e4b50 100644
--- a/drivers/input/misc/twl4030-pwrbutton.c
+++ b/drivers/input/misc/twl4030-pwrbutton.c

@@ -107,14 +107,25 @@
 }
 
 static struct platform_driver twl4030_pwrbutton_driver = {
-	.probe		= twl4030_pwrbutton_probe,
 	.remove		= __exit_p(twl4030_pwrbutton_remove),
 	.driver		= {
 		.name	= "twl4030_pwrbutton",
 		.owner	= THIS_MODULE,
 	},
 };
-module_platform_driver(twl4030_pwrbutton_driver);
+
+static int __init twl4030_pwrbutton_init(void)
+{
+	return platform_driver_probe(&twl4030_pwrbutton_driver,
+			twl4030_pwrbutton_probe);
+}
+module_init(twl4030_pwrbutton_init);
+
+static void __exit twl4030_pwrbutton_exit(void)
+{
+	platform_driver_unregister(&twl4030_pwrbutton_driver);
+}
+module_exit(twl4030_pwrbutton_exit);
 
 MODULE_ALIAS("platform:twl4030_pwrbutton");
 MODULE_DESCRIPTION("Triton2 Power Button");

diff --git a/drivers/input/mouse/amimouse.c b/drivers/input/mouse/amimouse.c
index 39be7b8..ff5f61a 100644
--- a/drivers/input/mouse/amimouse.c
+++ b/drivers/input/mouse/amimouse.c

@@ -140,13 +140,25 @@
 }
 
 static struct platform_driver amimouse_driver = {
-	.probe = amimouse_probe,
 	.remove = __exit_p(amimouse_remove),
 	.driver   = {
 		.name	= "amiga-mouse",
 		.owner	= THIS_MODULE,
 	},
 };
-module_platform_driver(amimouse_driver);
+
+static int __init amimouse_init(void)
+{
+	return platform_driver_probe(&amimouse_driver, amimouse_probe);
+}
+
+module_init(amimouse_init);
+
+static void __exit amimouse_exit(void)
+{
+	platform_driver_unregister(&amimouse_driver);
+}
+
+module_exit(amimouse_exit);
 
 MODULE_ALIAS("platform:amiga-mouse");

diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c
index cf87f8b..927e479 100644
--- a/drivers/input/mouse/bcm5974.c
+++ b/drivers/input/mouse/bcm5974.c

@@ -433,6 +433,9 @@
 	__set_bit(BTN_TOOL_QUADTAP, input_dev->keybit);
 	__set_bit(BTN_LEFT, input_dev->keybit);
 
+	if (cfg->caps & HAS_INTEGRATED_BUTTON)
+		__set_bit(INPUT_PROP_BUTTONPAD, input_dev->propbit);
+
 	input_set_events_per_packet(input_dev, 60);
 }
 

diff --git a/drivers/input/serio/at32psif.c b/drivers/input/serio/at32psif.c
index 421a744..95280f9 100644
--- a/drivers/input/serio/at32psif.c
+++ b/drivers/input/serio/at32psif.c

@@ -358,7 +358,19 @@
 	.suspend	= psif_suspend,
 	.resume		= psif_resume,
 };
-module_platform_driver(psif_driver);
+
+static int __init psif_init(void)
+{
+	return platform_driver_probe(&psif_driver, psif_probe);
+}
+
+static void __exit psif_exit(void)
+{
+	platform_driver_unregister(&psif_driver);
+}
+
+module_init(psif_init);
+module_exit(psif_exit);
 
 MODULE_AUTHOR("Hans-Christian Egtvedt <egtvedt@samfundet.no>");
 MODULE_DESCRIPTION("Atmel AVR32 PSIF PS/2 driver");

diff --git a/drivers/input/serio/serio_raw.c b/drivers/input/serio/serio_raw.c
index 4d4cd14..8250299 100644
--- a/drivers/input/serio/serio_raw.c
+++ b/drivers/input/serio/serio_raw.c

@@ -220,11 +220,11 @@
 			goto out;
 		}
 		written++;
-	};
+	}
 
 out:
 	mutex_unlock(&serio_raw_mutex);
-	return written;
+	return written ?: retval;
 }
 
 static unsigned int serio_raw_poll(struct file *file, poll_table *wait)
@@ -237,9 +237,9 @@
 
 	mask = serio_raw->dead ? POLLHUP | POLLERR : POLLOUT | POLLWRNORM;
 	if (serio_raw->head != serio_raw->tail)
-		return POLLIN | POLLRDNORM;
+		mask |= POLLIN | POLLRDNORM;
 
-	return 0;
+	return mask;
 }
 
 static const struct file_operations serio_raw_fops = {

diff --git a/drivers/input/touchscreen/atmel-wm97xx.c b/drivers/input/touchscreen/atmel-wm97xx.c
index d016cb2..8034cbb 100644
--- a/drivers/input/touchscreen/atmel-wm97xx.c
+++ b/drivers/input/touchscreen/atmel-wm97xx.c

@@ -429,7 +429,18 @@
 	.suspend	= atmel_wm97xx_suspend,
 	.resume		= atmel_wm97xx_resume,
 };
-module_platform_driver(atmel_wm97xx_driver);
+
+static int __init atmel_wm97xx_init(void)
+{
+	return platform_driver_probe(&atmel_wm97xx_driver, atmel_wm97xx_probe);
+}
+module_init(atmel_wm97xx_init);
+
+static void __exit atmel_wm97xx_exit(void)
+{
+	platform_driver_unregister(&atmel_wm97xx_driver);
+}
+module_exit(atmel_wm97xx_exit);
 
 MODULE_AUTHOR("Hans-Christian Egtvedt <egtvedt@samfundet.no>");
 MODULE_DESCRIPTION("wm97xx continuous touch driver for Atmel AT91 and AVR32");

diff --git a/drivers/input/touchscreen/mc13783_ts.c b/drivers/input/touchscreen/mc13783_ts.c
index 68f86f7..ede0274 100644
--- a/drivers/input/touchscreen/mc13783_ts.c
+++ b/drivers/input/touchscreen/mc13783_ts.c

@@ -240,7 +240,18 @@
 		.name	= MC13783_TS_NAME,
 	},
 };
-module_platform_driver(mc13783_ts_driver);
+
+static int __init mc13783_ts_init(void)
+{
+	return platform_driver_probe(&mc13783_ts_driver, &mc13783_ts_probe);
+}
+module_init(mc13783_ts_init);
+
+static void __exit mc13783_ts_exit(void)
+{
+	platform_driver_unregister(&mc13783_ts_driver);
+}
+module_exit(mc13783_ts_exit);
 
 MODULE_DESCRIPTION("MC13783 input touchscreen driver");
 MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>");

diff --git a/drivers/isdn/i4l/Kconfig b/drivers/isdn/i4l/Kconfig
index 9c6650e..2302fbe 100644
--- a/drivers/isdn/i4l/Kconfig
+++ b/drivers/isdn/i4l/Kconfig

@@ -6,7 +6,7 @@
 
 config ISDN_PPP
 	bool "Support synchronous PPP"
-	depends on INET
+	depends on INET && NETDEVICES
 	select SLHC
 	help
 	  Over digital connections such as ISDN, there is no need to

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 897a77d..c957c34 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig

@@ -396,6 +396,13 @@
 	  LED driver chips accessed via the I2C bus.
 	  Driver support brightness control and hardware-assisted blinking.
 
+config LEDS_MAX8997
+	tristate "LED support for MAX8997 PMIC"
+	depends on LEDS_CLASS && MFD_MAX8997
+	help
+	  This option enables support for on-chip LED drivers on
+	  MAXIM MAX8997 PMIC.
+
 config LEDS_TRIGGERS
 	bool "LED Trigger support"
 	depends on LEDS_CLASS

diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 5c9dc4b..b8a9723 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile

@@ -44,6 +44,7 @@
 obj-$(CONFIG_LEDS_NETXBIG)		+= leds-netxbig.o
 obj-$(CONFIG_LEDS_ASIC3)		+= leds-asic3.o
 obj-$(CONFIG_LEDS_RENESAS_TPU)		+= leds-renesas-tpu.o
+obj-$(CONFIG_LEDS_MAX8997)		+= leds-max8997.o
 
 # LED SPI Drivers
 obj-$(CONFIG_LEDS_DAC124S085)		+= leds-dac124s085.o

diff --git a/drivers/leds/leds-max8997.c b/drivers/leds/leds-max8997.c
new file mode 100644
index 0000000..f4c0e37
--- /dev/null
+++ b/drivers/leds/leds-max8997.c

@@ -0,0 +1,372 @@
+/*
+ * leds-max8997.c - LED class driver for MAX8997 LEDs.
+ *
+ * Copyright (C) 2011 Samsung Electronics
+ * Donggeun Kim <dg77.kim@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/leds.h>
+#include <linux/mfd/max8997.h>
+#include <linux/mfd/max8997-private.h>
+#include <linux/platform_device.h>
+
+#define MAX8997_LED_FLASH_SHIFT			3
+#define MAX8997_LED_FLASH_CUR_MASK		0xf8
+#define MAX8997_LED_MOVIE_SHIFT			4
+#define MAX8997_LED_MOVIE_CUR_MASK		0xf0
+
+#define MAX8997_LED_FLASH_MAX_BRIGHTNESS	0x1f
+#define MAX8997_LED_MOVIE_MAX_BRIGHTNESS	0xf
+#define MAX8997_LED_NONE_MAX_BRIGHTNESS		0
+
+#define MAX8997_LED0_FLASH_MASK			0x1
+#define MAX8997_LED0_FLASH_PIN_MASK		0x5
+#define MAX8997_LED0_MOVIE_MASK			0x8
+#define MAX8997_LED0_MOVIE_PIN_MASK		0x28
+
+#define MAX8997_LED1_FLASH_MASK			0x2
+#define MAX8997_LED1_FLASH_PIN_MASK		0x6
+#define MAX8997_LED1_MOVIE_MASK			0x10
+#define MAX8997_LED1_MOVIE_PIN_MASK		0x30
+
+#define MAX8997_LED_BOOST_ENABLE_MASK		(1 << 6)
+
+struct max8997_led {
+	struct max8997_dev *iodev;
+	struct led_classdev cdev;
+	bool enabled;
+	int id;
+	enum max8997_led_mode led_mode;
+	struct mutex mutex;
+};
+
+static void max8997_led_clear_mode(struct max8997_led *led,
+			enum max8997_led_mode mode)
+{
+	struct i2c_client *client = led->iodev->i2c;
+	u8 val = 0, mask = 0;
+	int ret;
+
+	switch (mode) {
+	case MAX8997_FLASH_MODE:
+		mask = led->id ?
+		      MAX8997_LED1_FLASH_MASK : MAX8997_LED0_FLASH_MASK;
+		break;
+	case MAX8997_MOVIE_MODE:
+		mask = led->id ?
+		      MAX8997_LED1_MOVIE_MASK : MAX8997_LED0_MOVIE_MASK;
+		break;
+	case MAX8997_FLASH_PIN_CONTROL_MODE:
+		mask = led->id ?
+		      MAX8997_LED1_FLASH_PIN_MASK : MAX8997_LED0_FLASH_PIN_MASK;
+		break;
+	case MAX8997_MOVIE_PIN_CONTROL_MODE:
+		mask = led->id ?
+		      MAX8997_LED1_MOVIE_PIN_MASK : MAX8997_LED0_MOVIE_PIN_MASK;
+		break;
+	default:
+		break;
+	}
+
+	if (mask) {
+		ret = max8997_update_reg(client,
+				MAX8997_REG_LEN_CNTL, val, mask);
+		if (ret)
+			dev_err(led->iodev->dev,
+				"failed to update register(%d)\n", ret);
+	}
+}
+
+static void max8997_led_set_mode(struct max8997_led *led,
+			enum max8997_led_mode mode)
+{
+	int ret;
+	struct i2c_client *client = led->iodev->i2c;
+	u8 mask = 0;
+
+	/* First, clear the previous mode */
+	max8997_led_clear_mode(led, led->led_mode);
+
+	switch (mode) {
+	case MAX8997_FLASH_MODE:
+		mask = led->id ?
+		      MAX8997_LED1_FLASH_MASK : MAX8997_LED0_FLASH_MASK;
+		led->cdev.max_brightness = MAX8997_LED_FLASH_MAX_BRIGHTNESS;
+		break;
+	case MAX8997_MOVIE_MODE:
+		mask = led->id ?
+		      MAX8997_LED1_MOVIE_MASK : MAX8997_LED0_MOVIE_MASK;
+		led->cdev.max_brightness = MAX8997_LED_MOVIE_MAX_BRIGHTNESS;
+		break;
+	case MAX8997_FLASH_PIN_CONTROL_MODE:
+		mask = led->id ?
+		      MAX8997_LED1_FLASH_PIN_MASK : MAX8997_LED0_FLASH_PIN_MASK;
+		led->cdev.max_brightness = MAX8997_LED_FLASH_MAX_BRIGHTNESS;
+		break;
+	case MAX8997_MOVIE_PIN_CONTROL_MODE:
+		mask = led->id ?
+		      MAX8997_LED1_MOVIE_PIN_MASK : MAX8997_LED0_MOVIE_PIN_MASK;
+		led->cdev.max_brightness = MAX8997_LED_MOVIE_MAX_BRIGHTNESS;
+		break;
+	default:
+		led->cdev.max_brightness = MAX8997_LED_NONE_MAX_BRIGHTNESS;
+		break;
+	}
+
+	if (mask) {
+		ret = max8997_update_reg(client,
+				MAX8997_REG_LEN_CNTL, mask, mask);
+		if (ret)
+			dev_err(led->iodev->dev,
+				"failed to update register(%d)\n", ret);
+	}
+
+	led->led_mode = mode;
+}
+
+static void max8997_led_enable(struct max8997_led *led, bool enable)
+{
+	int ret;
+	struct i2c_client *client = led->iodev->i2c;
+	u8 val = 0, mask = MAX8997_LED_BOOST_ENABLE_MASK;
+
+	if (led->enabled == enable)
+		return;
+
+	val = enable ? MAX8997_LED_BOOST_ENABLE_MASK : 0;
+
+	ret = max8997_update_reg(client, MAX8997_REG_BOOST_CNTL, val, mask);
+	if (ret)
+		dev_err(led->iodev->dev,
+			"failed to update register(%d)\n", ret);
+
+	led->enabled = enable;
+}
+
+static void max8997_led_set_current(struct max8997_led *led,
+				enum led_brightness value)
+{
+	int ret;
+	struct i2c_client *client = led->iodev->i2c;
+	u8 val = 0, mask = 0, reg = 0;
+
+	switch (led->led_mode) {
+	case MAX8997_FLASH_MODE:
+	case MAX8997_FLASH_PIN_CONTROL_MODE:
+		val = value << MAX8997_LED_FLASH_SHIFT;
+		mask = MAX8997_LED_FLASH_CUR_MASK;
+		reg = led->id ? MAX8997_REG_FLASH2_CUR : MAX8997_REG_FLASH1_CUR;
+		break;
+	case MAX8997_MOVIE_MODE:
+	case MAX8997_MOVIE_PIN_CONTROL_MODE:
+		val = value << MAX8997_LED_MOVIE_SHIFT;
+		mask = MAX8997_LED_MOVIE_CUR_MASK;
+		reg = MAX8997_REG_MOVIE_CUR;
+		break;
+	default:
+		break;
+	}
+
+	if (mask) {
+		ret = max8997_update_reg(client, reg, val, mask);
+		if (ret)
+			dev_err(led->iodev->dev,
+				"failed to update register(%d)\n", ret);
+	}
+}
+
+static void max8997_led_brightness_set(struct led_classdev *led_cdev,
+				enum led_brightness value)
+{
+	struct max8997_led *led =
+			container_of(led_cdev, struct max8997_led, cdev);
+
+	if (value) {
+		max8997_led_set_current(led, value);
+		max8997_led_enable(led, true);
+	} else {
+		max8997_led_set_current(led, value);
+		max8997_led_enable(led, false);
+	}
+}
+
+static ssize_t max8997_led_show_mode(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct led_classdev *led_cdev = dev_get_drvdata(dev);
+	struct max8997_led *led =
+			container_of(led_cdev, struct max8997_led, cdev);
+	ssize_t ret = 0;
+
+	mutex_lock(&led->mutex);
+
+	switch (led->led_mode) {
+	case MAX8997_FLASH_MODE:
+		ret += sprintf(buf, "FLASH\n");
+		break;
+	case MAX8997_MOVIE_MODE:
+		ret += sprintf(buf, "MOVIE\n");
+		break;
+	case MAX8997_FLASH_PIN_CONTROL_MODE:
+		ret += sprintf(buf, "FLASH_PIN_CONTROL\n");
+		break;
+	case MAX8997_MOVIE_PIN_CONTROL_MODE:
+		ret += sprintf(buf, "MOVIE_PIN_CONTROL\n");
+		break;
+	default:
+		ret += sprintf(buf, "NONE\n");
+		break;
+	}
+
+	mutex_unlock(&led->mutex);
+
+	return ret;
+}
+
+static ssize_t max8997_led_store_mode(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	struct led_classdev *led_cdev = dev_get_drvdata(dev);
+	struct max8997_led *led =
+			container_of(led_cdev, struct max8997_led, cdev);
+	enum max8997_led_mode mode;
+
+	mutex_lock(&led->mutex);
+
+	if (!strncmp(buf, "FLASH_PIN_CONTROL", 17))
+		mode = MAX8997_FLASH_PIN_CONTROL_MODE;
+	else if (!strncmp(buf, "MOVIE_PIN_CONTROL", 17))
+		mode = MAX8997_MOVIE_PIN_CONTROL_MODE;
+	else if (!strncmp(buf, "FLASH", 5))
+		mode = MAX8997_FLASH_MODE;
+	else if (!strncmp(buf, "MOVIE", 5))
+		mode = MAX8997_MOVIE_MODE;
+	else
+		mode = MAX8997_NONE;
+
+	max8997_led_set_mode(led, mode);
+
+	mutex_unlock(&led->mutex);
+
+	return size;
+}
+
+static DEVICE_ATTR(mode, 0644, max8997_led_show_mode, max8997_led_store_mode);
+
+static int __devinit max8997_led_probe(struct platform_device *pdev)
+{
+	struct max8997_dev *iodev = dev_get_drvdata(pdev->dev.parent);
+	struct max8997_platform_data *pdata = dev_get_platdata(iodev->dev);
+	struct max8997_led *led;
+	char name[20];
+	int ret = 0;
+
+	if (pdata == NULL) {
+		dev_err(&pdev->dev, "no platform data\n");
+		return -ENODEV;
+	}
+
+	led = kzalloc(sizeof(*led), GFP_KERNEL);
+	if (led == NULL) {
+		ret = -ENOMEM;
+		goto err_mem;
+	}
+
+	led->id = pdev->id;
+	snprintf(name, sizeof(name), "max8997-led%d", pdev->id);
+
+	led->cdev.name = name;
+	led->cdev.brightness_set = max8997_led_brightness_set;
+	led->cdev.flags |= LED_CORE_SUSPENDRESUME;
+	led->cdev.brightness = 0;
+	led->iodev = iodev;
+
+	/* initialize mode and brightness according to platform_data */
+	if (pdata->led_pdata) {
+		u8 mode = 0, brightness = 0;
+
+		mode = pdata->led_pdata->mode[led->id];
+		brightness = pdata->led_pdata->brightness[led->id];
+
+		max8997_led_set_mode(led, pdata->led_pdata->mode[led->id]);
+
+		if (brightness > led->cdev.max_brightness)
+			brightness = led->cdev.max_brightness;
+		max8997_led_set_current(led, brightness);
+		led->cdev.brightness = brightness;
+	} else {
+		max8997_led_set_mode(led, MAX8997_NONE);
+		max8997_led_set_current(led, 0);
+	}
+
+	mutex_init(&led->mutex);
+
+	platform_set_drvdata(pdev, led);
+
+	ret = led_classdev_register(&pdev->dev, &led->cdev);
+	if (ret < 0)
+		goto err_led;
+
+	ret = device_create_file(led->cdev.dev, &dev_attr_mode);
+	if (ret != 0) {
+		dev_err(&pdev->dev,
+			"failed to create file: %d\n", ret);
+		goto err_file;
+	}
+
+	return 0;
+
+err_file:
+	led_classdev_unregister(&led->cdev);
+err_led:
+	kfree(led);
+err_mem:
+	return ret;
+}
+
+static int __devexit max8997_led_remove(struct platform_device *pdev)
+{
+	struct max8997_led *led = platform_get_drvdata(pdev);
+
+	device_remove_file(led->cdev.dev, &dev_attr_mode);
+	led_classdev_unregister(&led->cdev);
+	kfree(led);
+
+	return 0;
+}
+
+static struct platform_driver max8997_led_driver = {
+	.driver = {
+		.name  = "max8997-led",
+		.owner = THIS_MODULE,
+	},
+	.probe  = max8997_led_probe,
+	.remove = __devexit_p(max8997_led_remove),
+};
+
+static int __init max8997_led_init(void)
+{
+	return platform_driver_register(&max8997_led_driver);
+}
+module_init(max8997_led_init);
+
+static void __exit max8997_led_exit(void)
+{
+	platform_driver_unregister(&max8997_led_driver);
+}
+module_exit(max8997_led_exit);
+
+MODULE_AUTHOR("Donggeun Kim <dg77.kim@samsung.com>");
+MODULE_DESCRIPTION("MAX8997 LED driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:max8997-led");

diff --git a/drivers/lguest/Makefile b/drivers/lguest/Makefile
index 8ac947c..c419750 100644
--- a/drivers/lguest/Makefile
+++ b/drivers/lguest/Makefile

@@ -18,7 +18,7 @@
 Beer:
 	@for f in Preparation Guest Drivers Launcher Host Switcher Mastery; do echo "{==- $$f -==}"; make -s $$f; done; echo "{==-==}"
 Preparation Preparation! Guest Drivers Launcher Host Switcher Mastery:
-	@sh ../../Documentation/virtual/lguest/extract $(PREFIX) `find ../../* -name '*.[chS]' -wholename '*lguest*'`
+	@sh ../../tools/lguest/extract $(PREFIX) `find ../../* -name '*.[chS]' -wholename '*lguest*'`
 Puppy:
 	@clear
 	@printf "      __  \n (___()'\`;\n /,    /\`\n \\\\\\\"--\\\\\\   \n"

diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 595d731..9e8388e 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c

@@ -241,7 +241,7 @@
 }
 
 /* An extern declaration inside a C file is bad form.  Don't do it. */
-extern void lguest_setup_irq(unsigned int irq);
+extern int lguest_setup_irq(unsigned int irq);
 
 /*
  * This routine finds the Nth virtqueue described in the configuration of
@@ -292,17 +292,21 @@
 
 	/*
 	 * OK, tell virtio_ring.c to set up a virtqueue now we know its size
-	 * and we've got a pointer to its pages.
+	 * and we've got a pointer to its pages.  Note that we set weak_barriers
+	 * to 'true': the host just a(nother) SMP CPU, so we only need inter-cpu
+	 * barriers.
 	 */
-	vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN,
-				 vdev, lvq->pages, lg_notify, callback, name);
+	vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN, vdev,
+				 true, lvq->pages, lg_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto unmap;
 	}
 
 	/* Make sure the interrupt is allocated. */
-	lguest_setup_irq(lvq->config.irq);
+	err = lguest_setup_irq(lvq->config.irq);
+	if (err)
+		goto destroy_vring;
 
 	/*
 	 * Tell the interrupt for this virtqueue to go to the virtio_ring
@@ -315,7 +319,7 @@
 	err = request_irq(lvq->config.irq, vring_interrupt, IRQF_SHARED,
 			  dev_name(&vdev->dev), vq);
 	if (err)
-		goto destroy_vring;
+		goto free_desc;
 
 	/*
 	 * Last of all we hook up our 'struct lguest_vq_info" to the
@@ -324,6 +328,8 @@
 	vq->priv = lvq;
 	return vq;
 
+free_desc:
+	irq_free_desc(lvq->config.irq);
 destroy_vring:
 	vring_del_virtqueue(vq);
 unmap:

diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c
index ede4658..c4fb424 100644
--- a/drivers/lguest/segments.c
+++ b/drivers/lguest/segments.c

@@ -81,8 +81,8 @@
 		 * sometimes careless and leaves this as 0, even though it's
 		 * running at privilege level 1.  If so, we fix it here.
 		 */
-		if ((cpu->arch.gdt[i].b & 0x00006000) == 0)
-			cpu->arch.gdt[i].b |= (GUEST_PL << 13);
+		if (cpu->arch.gdt[i].dpl == 0)
+			cpu->arch.gdt[i].dpl |= GUEST_PL;
 
 		/*
 		 * Each descriptor has an "accessed" bit.  If we don't set it
@@ -90,7 +90,7 @@
 		 * that entry into a segment register.  But the GDT isn't
 		 * writable by the Guest, so bad things can happen.
 		 */
-		cpu->arch.gdt[i].b |= 0x00000100;
+		cpu->arch.gdt[i].type |= 0x1;
 	}
 }
 
@@ -114,13 +114,19 @@
 
 	/*
 	 * The TSS segment refers to the TSS entry for this particular CPU.
-	 * Forgive the magic flags: the 0x8900 means the entry is Present, it's
-	 * privilege level 0 Available 386 TSS system segment, and the 0x67
-	 * means Saturn is eclipsed by Mercury in the twelfth house.
 	 */
-	gdt[GDT_ENTRY_TSS].a = 0x00000067 | (tss << 16);
-	gdt[GDT_ENTRY_TSS].b = 0x00008900 | (tss & 0xFF000000)
-		| ((tss >> 16) & 0x000000FF);
+	gdt[GDT_ENTRY_TSS].a = 0;
+	gdt[GDT_ENTRY_TSS].b = 0;
+
+	gdt[GDT_ENTRY_TSS].limit0 = 0x67;
+	gdt[GDT_ENTRY_TSS].base0  = tss & 0xFFFF;
+	gdt[GDT_ENTRY_TSS].base1  = (tss >> 16) & 0xFF;
+	gdt[GDT_ENTRY_TSS].base2  = tss >> 24;
+	gdt[GDT_ENTRY_TSS].type   = 0x9; /* 32-bit TSS (available) */
+	gdt[GDT_ENTRY_TSS].p      = 0x1; /* Entry is present */
+	gdt[GDT_ENTRY_TSS].dpl    = 0x0; /* Privilege level 0 */
+	gdt[GDT_ENTRY_TSS].s      = 0x0; /* system segment */
+
 }
 
 /*
@@ -135,8 +141,8 @@
 	 */
 	cpu->arch.gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT;
 	cpu->arch.gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT;
-	cpu->arch.gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13);
-	cpu->arch.gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13);
+	cpu->arch.gdt[GDT_ENTRY_KERNEL_CS].dpl |= GUEST_PL;
+	cpu->arch.gdt[GDT_ENTRY_KERNEL_DS].dpl |= GUEST_PL;
 }
 
 /*H:650

diff --git a/drivers/mfd/88pm860x-i2c.c b/drivers/mfd/88pm860x-i2c.c
index e017dc8..f93dd95 100644
--- a/drivers/mfd/88pm860x-i2c.c
+++ b/drivers/mfd/88pm860x-i2c.c

@@ -12,51 +12,20 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
+#include <linux/err.h>
+#include <linux/regmap.h>
 #include <linux/mfd/88pm860x.h>
 #include <linux/slab.h>
 
-static inline int pm860x_read_device(struct i2c_client *i2c,
-				     int reg, int bytes, void *dest)
-{
-	unsigned char data;
-	int ret;
-
-	data = (unsigned char)reg;
-	ret = i2c_master_send(i2c, &data, 1);
-	if (ret < 0)
-		return ret;
-
-	ret = i2c_master_recv(i2c, dest, bytes);
-	if (ret < 0)
-		return ret;
-	return 0;
-}
-
-static inline int pm860x_write_device(struct i2c_client *i2c,
-				      int reg, int bytes, void *src)
-{
-	unsigned char buf[bytes + 1];
-	int ret;
-
-	buf[0] = (unsigned char)reg;
-	memcpy(&buf[1], src, bytes);
-
-	ret = i2c_master_send(i2c, buf, bytes + 1);
-	if (ret < 0)
-		return ret;
-	return 0;
-}
-
 int pm860x_reg_read(struct i2c_client *i2c, int reg)
 {
 	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
-	unsigned char data;
+	struct regmap *map = (i2c == chip->client) ? chip->regmap
+				: chip->regmap_companion;
+	unsigned int data;
 	int ret;
 
-	mutex_lock(&chip->io_lock);
-	ret = pm860x_read_device(i2c, reg, 1, &data);
-	mutex_unlock(&chip->io_lock);
-
+	ret = regmap_read(map, reg, &data);
 	if (ret < 0)
 		return ret;
 	else
@@ -68,12 +37,11 @@
 		     unsigned char data)
 {
 	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
+	struct regmap *map = (i2c == chip->client) ? chip->regmap
+				: chip->regmap_companion;
 	int ret;
 
-	mutex_lock(&chip->io_lock);
-	ret = pm860x_write_device(i2c, reg, 1, &data);
-	mutex_unlock(&chip->io_lock);
-
+	ret = regmap_write(map, reg, data);
 	return ret;
 }
 EXPORT_SYMBOL(pm860x_reg_write);
@@ -82,12 +50,11 @@
 		     int count, unsigned char *buf)
 {
 	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
+	struct regmap *map = (i2c == chip->client) ? chip->regmap
+				: chip->regmap_companion;
 	int ret;
 
-	mutex_lock(&chip->io_lock);
-	ret = pm860x_read_device(i2c, reg, count, buf);
-	mutex_unlock(&chip->io_lock);
-
+	ret = regmap_raw_read(map, reg, buf, count);
 	return ret;
 }
 EXPORT_SYMBOL(pm860x_bulk_read);
@@ -96,12 +63,11 @@
 		      int count, unsigned char *buf)
 {
 	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
+	struct regmap *map = (i2c == chip->client) ? chip->regmap
+				: chip->regmap_companion;
 	int ret;
 
-	mutex_lock(&chip->io_lock);
-	ret = pm860x_write_device(i2c, reg, count, buf);
-	mutex_unlock(&chip->io_lock);
-
+	ret = regmap_raw_write(map, reg, buf, count);
 	return ret;
 }
 EXPORT_SYMBOL(pm860x_bulk_write);
@@ -110,39 +76,78 @@
 		    unsigned char mask, unsigned char data)
 {
 	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
-	unsigned char value;
+	struct regmap *map = (i2c == chip->client) ? chip->regmap
+				: chip->regmap_companion;
 	int ret;
 
-	mutex_lock(&chip->io_lock);
-	ret = pm860x_read_device(i2c, reg, 1, &value);
-	if (ret < 0)
-		goto out;
-	value &= ~mask;
-	value |= data;
-	ret = pm860x_write_device(i2c, reg, 1, &value);
-out:
-	mutex_unlock(&chip->io_lock);
+	ret = regmap_update_bits(map, reg, mask, data);
 	return ret;
 }
 EXPORT_SYMBOL(pm860x_set_bits);
 
+static int read_device(struct i2c_client *i2c, int reg,
+		       int bytes, void *dest)
+{
+	unsigned char msgbuf0[I2C_SMBUS_BLOCK_MAX + 3];
+	unsigned char msgbuf1[I2C_SMBUS_BLOCK_MAX + 2];
+	struct i2c_adapter *adap = i2c->adapter;
+	struct i2c_msg msg[2] = {{i2c->addr, 0, 1, msgbuf0},
+				 {i2c->addr, I2C_M_RD, 0, msgbuf1},
+				};
+	int num = 1, ret = 0;
+
+	if (dest == NULL)
+		return -EINVAL;
+	msgbuf0[0] = (unsigned char)reg;	/* command */
+	msg[1].len = bytes;
+
+	/* if data needs to read back, num should be 2 */
+	if (bytes > 0)
+		num = 2;
+	ret = adap->algo->master_xfer(adap, msg, num);
+	memcpy(dest, msgbuf1, bytes);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+static int write_device(struct i2c_client *i2c, int reg,
+			int bytes, void *src)
+{
+	unsigned char buf[bytes + 1];
+	struct i2c_adapter *adap = i2c->adapter;
+	struct i2c_msg msg;
+	int ret;
+
+	buf[0] = (unsigned char)reg;
+	memcpy(&buf[1], src, bytes);
+	msg.addr = i2c->addr;
+	msg.flags = 0;
+	msg.len = bytes + 1;
+	msg.buf = buf;
+
+	ret = adap->algo->master_xfer(adap, &msg, 1);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
 int pm860x_page_reg_read(struct i2c_client *i2c, int reg)
 {
-	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
 	unsigned char zero = 0;
 	unsigned char data;
 	int ret;
 
-	mutex_lock(&chip->io_lock);
-	pm860x_write_device(i2c, 0xFA, 0, &zero);
-	pm860x_write_device(i2c, 0xFB, 0, &zero);
-	pm860x_write_device(i2c, 0xFF, 0, &zero);
-	ret = pm860x_read_device(i2c, reg, 1, &data);
+	i2c_lock_adapter(i2c->adapter);
+	read_device(i2c, 0xFA, 0, &zero);
+	read_device(i2c, 0xFB, 0, &zero);
+	read_device(i2c, 0xFF, 0, &zero);
+	ret = read_device(i2c, reg, 1, &data);
 	if (ret >= 0)
 		ret = (int)data;
-	pm860x_write_device(i2c, 0xFE, 0, &zero);
-	pm860x_write_device(i2c, 0xFC, 0, &zero);
-	mutex_unlock(&chip->io_lock);
+	read_device(i2c, 0xFE, 0, &zero);
+	read_device(i2c, 0xFC, 0, &zero);
+	i2c_unlock_adapter(i2c->adapter);
 	return ret;
 }
 EXPORT_SYMBOL(pm860x_page_reg_read);
@@ -150,18 +155,17 @@
 int pm860x_page_reg_write(struct i2c_client *i2c, int reg,
 			  unsigned char data)
 {
-	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
 	unsigned char zero;
 	int ret;
 
-	mutex_lock(&chip->io_lock);
-	pm860x_write_device(i2c, 0xFA, 0, &zero);
-	pm860x_write_device(i2c, 0xFB, 0, &zero);
-	pm860x_write_device(i2c, 0xFF, 0, &zero);
-	ret = pm860x_write_device(i2c, reg, 1, &data);
-	pm860x_write_device(i2c, 0xFE, 0, &zero);
-	pm860x_write_device(i2c, 0xFC, 0, &zero);
-	mutex_unlock(&chip->io_lock);
+	i2c_lock_adapter(i2c->adapter);
+	read_device(i2c, 0xFA, 0, &zero);
+	read_device(i2c, 0xFB, 0, &zero);
+	read_device(i2c, 0xFF, 0, &zero);
+	ret = write_device(i2c, reg, 1, &data);
+	read_device(i2c, 0xFE, 0, &zero);
+	read_device(i2c, 0xFC, 0, &zero);
+	i2c_unlock_adapter(i2c->adapter);
 	return ret;
 }
 EXPORT_SYMBOL(pm860x_page_reg_write);
@@ -169,18 +173,17 @@
 int pm860x_page_bulk_read(struct i2c_client *i2c, int reg,
 			  int count, unsigned char *buf)
 {
-	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
 	unsigned char zero = 0;
 	int ret;
 
-	mutex_lock(&chip->io_lock);
-	pm860x_write_device(i2c, 0xFA, 0, &zero);
-	pm860x_write_device(i2c, 0xFB, 0, &zero);
-	pm860x_write_device(i2c, 0xFF, 0, &zero);
-	ret = pm860x_read_device(i2c, reg, count, buf);
-	pm860x_write_device(i2c, 0xFE, 0, &zero);
-	pm860x_write_device(i2c, 0xFC, 0, &zero);
-	mutex_unlock(&chip->io_lock);
+	i2c_lock_adapter(i2c->adapter);
+	read_device(i2c, 0xfa, 0, &zero);
+	read_device(i2c, 0xfb, 0, &zero);
+	read_device(i2c, 0xff, 0, &zero);
+	ret = read_device(i2c, reg, count, buf);
+	read_device(i2c, 0xFE, 0, &zero);
+	read_device(i2c, 0xFC, 0, &zero);
+	i2c_unlock_adapter(i2c->adapter);
 	return ret;
 }
 EXPORT_SYMBOL(pm860x_page_bulk_read);
@@ -188,18 +191,18 @@
 int pm860x_page_bulk_write(struct i2c_client *i2c, int reg,
 			   int count, unsigned char *buf)
 {
-	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
 	unsigned char zero = 0;
 	int ret;
 
-	mutex_lock(&chip->io_lock);
-	pm860x_write_device(i2c, 0xFA, 0, &zero);
-	pm860x_write_device(i2c, 0xFB, 0, &zero);
-	pm860x_write_device(i2c, 0xFF, 0, &zero);
-	ret = pm860x_write_device(i2c, reg, count, buf);
-	pm860x_write_device(i2c, 0xFE, 0, &zero);
-	pm860x_write_device(i2c, 0xFC, 0, &zero);
-	mutex_unlock(&chip->io_lock);
+	i2c_lock_adapter(i2c->adapter);
+	read_device(i2c, 0xFA, 0, &zero);
+	read_device(i2c, 0xFB, 0, &zero);
+	read_device(i2c, 0xFF, 0, &zero);
+	ret = write_device(i2c, reg, count, buf);
+	read_device(i2c, 0xFE, 0, &zero);
+	read_device(i2c, 0xFC, 0, &zero);
+	i2c_unlock_adapter(i2c->adapter);
+	i2c_unlock_adapter(i2c->adapter);
 	return ret;
 }
 EXPORT_SYMBOL(pm860x_page_bulk_write);
@@ -207,25 +210,24 @@
 int pm860x_page_set_bits(struct i2c_client *i2c, int reg,
 			 unsigned char mask, unsigned char data)
 {
-	struct pm860x_chip *chip = i2c_get_clientdata(i2c);
 	unsigned char zero;
 	unsigned char value;
 	int ret;
 
-	mutex_lock(&chip->io_lock);
-	pm860x_write_device(i2c, 0xFA, 0, &zero);
-	pm860x_write_device(i2c, 0xFB, 0, &zero);
-	pm860x_write_device(i2c, 0xFF, 0, &zero);
-	ret = pm860x_read_device(i2c, reg, 1, &value);
+	i2c_lock_adapter(i2c->adapter);
+	read_device(i2c, 0xFA, 0, &zero);
+	read_device(i2c, 0xFB, 0, &zero);
+	read_device(i2c, 0xFF, 0, &zero);
+	ret = read_device(i2c, reg, 1, &value);
 	if (ret < 0)
 		goto out;
 	value &= ~mask;
 	value |= data;
-	ret = pm860x_write_device(i2c, reg, 1, &value);
+	ret = write_device(i2c, reg, 1, &value);
 out:
-	pm860x_write_device(i2c, 0xFE, 0, &zero);
-	pm860x_write_device(i2c, 0xFC, 0, &zero);
-	mutex_unlock(&chip->io_lock);
+	read_device(i2c, 0xFE, 0, &zero);
+	read_device(i2c, 0xFC, 0, &zero);
+	i2c_unlock_adapter(i2c->adapter);
 	return ret;
 }
 EXPORT_SYMBOL(pm860x_page_set_bits);
@@ -257,11 +259,17 @@
 	return 0;
 }
 
+static struct regmap_config pm860x_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+};
+
 static int __devinit pm860x_probe(struct i2c_client *client,
 				  const struct i2c_device_id *id)
 {
 	struct pm860x_platform_data *pdata = client->dev.platform_data;
 	struct pm860x_chip *chip;
+	int ret;
 
 	if (!pdata) {
 		pr_info("No platform data in %s!\n", __func__);
@@ -273,10 +281,17 @@
 		return -ENOMEM;
 
 	chip->id = verify_addr(client);
+	chip->regmap = regmap_init_i2c(client, &pm860x_regmap_config);
+	if (IS_ERR(chip->regmap)) {
+		ret = PTR_ERR(chip->regmap);
+		dev_err(&client->dev, "Failed to allocate register map: %d\n",
+				ret);
+		kfree(chip);
+		return ret;
+	}
 	chip->client = client;
 	i2c_set_clientdata(client, chip);
 	chip->dev = &client->dev;
-	mutex_init(&chip->io_lock);
 	dev_set_drvdata(chip->dev, chip);
 
 	/*
@@ -290,6 +305,14 @@
 		chip->companion_addr = pdata->companion_addr;
 		chip->companion = i2c_new_dummy(chip->client->adapter,
 						chip->companion_addr);
+		chip->regmap_companion = regmap_init_i2c(chip->companion,
+							&pm860x_regmap_config);
+		if (IS_ERR(chip->regmap_companion)) {
+			ret = PTR_ERR(chip->regmap_companion);
+			dev_err(&chip->companion->dev,
+				"Failed to allocate register map: %d\n", ret);
+			return ret;
+		}
 		i2c_set_clientdata(chip->companion, chip);
 	}
 
@@ -302,7 +325,11 @@
 	struct pm860x_chip *chip = i2c_get_clientdata(client);
 
 	pm860x_device_exit(chip);
-	i2c_unregister_device(chip->companion);
+	if (chip->companion) {
+		regmap_exit(chip->regmap_companion);
+		i2c_unregister_device(chip->companion);
+	}
+	regmap_exit(chip->regmap);
 	kfree(chip);
 	return 0;
 }

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 053208d..cd13e9f 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig

@@ -12,6 +12,7 @@
 config MFD_88PM860X
 	bool "Support Marvell 88PM8606/88PM8607"
 	depends on I2C=y && GENERIC_HARDIRQS
+	select REGMAP_I2C
 	select MFD_CORE
 	help
 	  This supports for Marvell 88PM8606/88PM8607 Power Management IC.
@@ -199,7 +200,7 @@
 
 config TWL4030_CORE
 	bool "Texas Instruments TWL4030/TWL5030/TWL6030/TPS659x0 Support"
-	depends on I2C=y && GENERIC_HARDIRQS
+	depends on I2C=y && GENERIC_HARDIRQS && IRQ_DOMAIN
 	help
 	  Say yes here if you have TWL4030 / TWL6030 family chip on your board.
 	  This core driver provides register access and IRQ handling
@@ -257,7 +258,7 @@
 
 config MFD_STMPE
 	bool "Support STMicroelectronics STMPE"
-	depends on I2C=y && GENERIC_HARDIRQS
+	depends on (I2C=y || SPI_MASTER=y) && GENERIC_HARDIRQS
 	select MFD_CORE
 	help
 	  Support for the STMPE family of I/O Expanders from
@@ -278,6 +279,23 @@
 		Keypad: stmpe-keypad
 		Touchscreen: stmpe-ts
 
+menu "STMPE Interface Drivers"
+depends on MFD_STMPE
+
+config STMPE_I2C
+	bool "STMPE I2C Inteface"
+	depends on I2C=y
+	default y
+	help
+	  This is used to enable I2C interface of STMPE
+
+config STMPE_SPI
+	bool "STMPE SPI Inteface"
+	depends on SPI_MASTER
+	help
+	  This is used to enable SPI interface of STMPE
+endmenu
+
 config MFD_TC3589X
 	bool "Support Toshiba TC35892 and variants"
 	depends on I2C=y && GENERIC_HARDIRQS
@@ -311,7 +329,7 @@
 
 config MFD_TC6393XB
 	bool "Support Toshiba TC6393XB"
-	depends on GPIOLIB && ARM
+	depends on GPIOLIB && ARM && HAVE_CLK
 	select MFD_CORE
 	select MFD_TMIO
 	help
@@ -399,6 +417,17 @@
 	  additional drivers must be enabled in order to use the functionality
 	  of the device.
 
+config MFD_S5M_CORE
+	bool "SAMSUNG S5M Series Support"
+	depends on I2C=y && GENERIC_HARDIRQS
+	select MFD_CORE
+	select REGMAP_I2C
+	help
+	 Support for the Samsung Electronics S5M MFD series.
+	 This driver provies common support for accessing the device,
+	 additional drivers must be enabled in order to use the functionality
+	 of the device
+
 config MFD_WM8400
 	tristate "Support Wolfson Microelectronics WM8400"
 	select MFD_CORE

diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 47591fc..b953bab 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile

@@ -16,6 +16,8 @@
 obj-$(CONFIG_MFD_TI_SSP)	+= ti-ssp.o
 
 obj-$(CONFIG_MFD_STMPE)		+= stmpe.o
+obj-$(CONFIG_STMPE_I2C)		+= stmpe-i2c.o
+obj-$(CONFIG_STMPE_SPI)		+= stmpe-spi.o
 obj-$(CONFIG_MFD_TC3589X)	+= tc3589x.o
 obj-$(CONFIG_MFD_T7L66XB)	+= t7l66xb.o tmio_core.o
 obj-$(CONFIG_MFD_TC6387XB)	+= tc6387xb.o tmio_core.o
@@ -109,3 +111,4 @@
 obj-$(CONFIG_TPS65911_COMPARATOR)	+= tps65911-comparator.o
 obj-$(CONFIG_MFD_AAT2870_CORE)	+= aat2870-core.o
 obj-$(CONFIG_MFD_INTEL_MSIC)	+= intel_msic.o
+obj-$(CONFIG_MFD_S5M_CORE)	+= s5m-core.o s5m-irq.o

diff --git a/drivers/mfd/aat2870-core.c b/drivers/mfd/aat2870-core.c
index 02c4201..3aa36eb 100644
--- a/drivers/mfd/aat2870-core.c
+++ b/drivers/mfd/aat2870-core.c

@@ -407,13 +407,13 @@
 		aat2870->init(aat2870);
 
 	if (aat2870->en_pin >= 0) {
-		ret = gpio_request(aat2870->en_pin, "aat2870-en");
+		ret = gpio_request_one(aat2870->en_pin, GPIOF_OUT_INIT_HIGH,
+				       "aat2870-en");
 		if (ret < 0) {
 			dev_err(&client->dev,
 				"Failed to request GPIO %d\n", aat2870->en_pin);
 			goto out_kfree;
 		}
-		gpio_direction_output(aat2870->en_pin, 1);
 	}
 
 	aat2870_enable(aat2870);
@@ -468,9 +468,10 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int aat2870_i2c_suspend(struct i2c_client *client, pm_message_t state)
+#ifdef CONFIG_PM_SLEEP
+static int aat2870_i2c_suspend(struct device *dev)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	struct aat2870_data *aat2870 = i2c_get_clientdata(client);
 
 	aat2870_disable(aat2870);
@@ -478,8 +479,9 @@
 	return 0;
 }
 
-static int aat2870_i2c_resume(struct i2c_client *client)
+static int aat2870_i2c_resume(struct device *dev)
 {
+	struct i2c_client *client = to_i2c_client(dev);
 	struct aat2870_data *aat2870 = i2c_get_clientdata(client);
 	struct aat2870_register *reg = NULL;
 	int i;
@@ -495,12 +497,12 @@
 
 	return 0;
 }
-#else
-#define aat2870_i2c_suspend	NULL
-#define aat2870_i2c_resume	NULL
-#endif /* CONFIG_PM */
+#endif /* CONFIG_PM_SLEEP */
 
-static struct i2c_device_id aat2870_i2c_id_table[] = {
+static SIMPLE_DEV_PM_OPS(aat2870_pm_ops, aat2870_i2c_suspend,
+			 aat2870_i2c_resume);
+
+static const struct i2c_device_id aat2870_i2c_id_table[] = {
 	{ "aat2870", 0 },
 	{ }
 };
@@ -510,11 +512,10 @@
 	.driver = {
 		.name	= "aat2870",
 		.owner	= THIS_MODULE,
+		.pm	= &aat2870_pm_ops,
 	},
 	.probe		= aat2870_i2c_probe,
 	.remove		= aat2870_i2c_remove,
-	.suspend	= aat2870_i2c_suspend,
-	.resume		= aat2870_i2c_resume,
 	.id_table	= aat2870_i2c_id_table,
 };
 

diff --git a/drivers/mfd/ab5500-core.c b/drivers/mfd/ab5500-core.c
index ec10629..bd56a76 100644
--- a/drivers/mfd/ab5500-core.c
+++ b/drivers/mfd/ab5500-core.c

@@ -22,8 +22,8 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/random.h>
-#include <linux/mfd/ab5500/ab5500.h>
 #include <linux/mfd/abx500.h>
+#include <linux/mfd/abx500/ab5500.h>
 #include <linux/list.h>
 #include <linux/bitops.h>
 #include <linux/spinlock.h>

diff --git a/drivers/mfd/ab5500-debugfs.c b/drivers/mfd/ab5500-debugfs.c
index b7b2d348..7200694 100644
--- a/drivers/mfd/ab5500-debugfs.c
+++ b/drivers/mfd/ab5500-debugfs.c

@@ -7,8 +7,8 @@
 #include <linux/module.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
-#include <linux/mfd/ab5500/ab5500.h>
 #include <linux/mfd/abx500.h>
+#include <linux/mfd/abx500/ab5500.h>
 #include <linux/uaccess.h>
 
 #include "ab5500-core.h"

diff --git a/drivers/mfd/ab8500-core.c b/drivers/mfd/ab8500-core.c
index d3d572b..53e2a80 100644
--- a/drivers/mfd/ab8500-core.c
+++ b/drivers/mfd/ab8500-core.c

@@ -17,7 +17,7 @@
 #include <linux/platform_device.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/abx500.h>
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 #include <linux/regulator/ab8500.h>
 
 /*

diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c
index dedb7f6..9a0211a 100644
--- a/drivers/mfd/ab8500-debugfs.c
+++ b/drivers/mfd/ab8500-debugfs.c

@@ -13,7 +13,7 @@
 #include <linux/platform_device.h>
 
 #include <linux/mfd/abx500.h>
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 
 static u32 debug_bank;
 static u32 debug_address;

diff --git a/drivers/mfd/ab8500-gpadc.c b/drivers/mfd/ab8500-gpadc.c
index e985d17..c39fc71 100644
--- a/drivers/mfd/ab8500-gpadc.c
+++ b/drivers/mfd/ab8500-gpadc.c

@@ -18,9 +18,9 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/list.h>
-#include <linux/mfd/ab8500.h>
 #include <linux/mfd/abx500.h>
-#include <linux/mfd/ab8500/gpadc.h>
+#include <linux/mfd/abx500/ab8500.h>
+#include <linux/mfd/abx500/ab8500-gpadc.h>
 
 /*
  * GPADC register offsets

diff --git a/drivers/mfd/ab8500-i2c.c b/drivers/mfd/ab8500-i2c.c
index 9be541c..087fecd 100644
--- a/drivers/mfd/ab8500-i2c.c
+++ b/drivers/mfd/ab8500-i2c.c

@@ -10,7 +10,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 #include <linux/mfd/db8500-prcmu.h>
 
 static int ab8500_i2c_write(struct ab8500 *ab8500, u16 addr, u8 data)

diff --git a/drivers/mfd/ab8500-sysctrl.c b/drivers/mfd/ab8500-sysctrl.c
index f20feef..c28d4eb 100644
--- a/drivers/mfd/ab8500-sysctrl.c
+++ b/drivers/mfd/ab8500-sysctrl.c

@@ -7,9 +7,9 @@
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
-#include <linux/mfd/ab8500.h>
 #include <linux/mfd/abx500.h>
-#include <linux/mfd/ab8500/sysctrl.h>
+#include <linux/mfd/abx500/ab8500.h>
+#include <linux/mfd/abx500/ab8500-sysctrl.h>
 
 static struct device *sysctrl_dev;
 

diff --git a/drivers/mfd/cs5535-mfd.c b/drivers/mfd/cs5535-mfd.c
index 155fa04..315fef5 100644
--- a/drivers/mfd/cs5535-mfd.c
+++ b/drivers/mfd/cs5535-mfd.c

@@ -172,14 +172,14 @@
 	pci_disable_device(pdev);
 }
 
-static struct pci_device_id cs5535_mfd_pci_tbl[] = {
+static DEFINE_PCI_DEVICE_TABLE(cs5535_mfd_pci_tbl) = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_CS5535_ISA) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA) },
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, cs5535_mfd_pci_tbl);
 
-static struct pci_driver cs5535_mfd_drv = {
+static struct pci_driver cs5535_mfd_driver = {
 	.name = DRV_NAME,
 	.id_table = cs5535_mfd_pci_tbl,
 	.probe = cs5535_mfd_probe,
@@ -188,12 +188,12 @@
 
 static int __init cs5535_mfd_init(void)
 {
-	return pci_register_driver(&cs5535_mfd_drv);
+	return pci_register_driver(&cs5535_mfd_driver);
 }
 
 static void __exit cs5535_mfd_exit(void)
 {
-	pci_unregister_driver(&cs5535_mfd_drv);
+	pci_unregister_driver(&cs5535_mfd_driver);
 }
 
 module_init(cs5535_mfd_init);

diff --git a/drivers/mfd/dm355evm_msp.c b/drivers/mfd/dm355evm_msp.c
index 8ad88da..7710227 100644
--- a/drivers/mfd/dm355evm_msp.c
+++ b/drivers/mfd/dm355evm_msp.c

@@ -308,8 +308,7 @@
 	for (i = 0; i < ARRAY_SIZE(config_inputs); i++) {
 		int gpio = dm355evm_msp_gpio.base + config_inputs[i].offset;
 
-		gpio_request(gpio, config_inputs[i].label);
-		gpio_direction_input(gpio);
+		gpio_request_one(gpio, GPIOF_IN, config_inputs[i].label);
 
 		/* make it easy for userspace to see these */
 		gpio_export(gpio, false);

diff --git a/drivers/mfd/intel_msic.c b/drivers/mfd/intel_msic.c
index 97c2776..b76657e 100644
--- a/drivers/mfd/intel_msic.c
+++ b/drivers/mfd/intel_msic.c

@@ -485,17 +485,7 @@
 	},
 };
 
-static int __init intel_msic_init(void)
-{
-	return platform_driver_register(&intel_msic_driver);
-}
-module_init(intel_msic_init);
-
-static void __exit intel_msic_exit(void)
-{
-	platform_driver_unregister(&intel_msic_driver);
-}
-module_exit(intel_msic_exit);
+module_platform_driver(intel_msic_driver);
 
 MODULE_DESCRIPTION("Driver for Intel MSIC");
 MODULE_AUTHOR("Mika Westerberg <mika.westerberg@linux.intel.com>");

diff --git a/drivers/mfd/jz4740-adc.c b/drivers/mfd/jz4740-adc.c
index ef39528..87662a1 100644
--- a/drivers/mfd/jz4740-adc.c
+++ b/drivers/mfd/jz4740-adc.c

@@ -181,7 +181,7 @@
 	},
 };
 
-const struct mfd_cell jz4740_adc_cells[] = {
+static struct mfd_cell jz4740_adc_cells[] = {
 	{
 		.id = 0,
 		.name = "jz4740-hwmon",
@@ -338,17 +338,7 @@
 	},
 };
 
-static int __init jz4740_adc_init(void)
-{
-	return platform_driver_register(&jz4740_adc_driver);
-}
-module_init(jz4740_adc_init);
-
-static void __exit jz4740_adc_exit(void)
-{
-	platform_driver_unregister(&jz4740_adc_driver);
-}
-module_exit(jz4740_adc_exit);
+module_platform_driver(jz4740_adc_driver);
 
 MODULE_DESCRIPTION("JZ4740 SoC ADC driver");
 MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");

diff --git a/drivers/mfd/lpc_sch.c b/drivers/mfd/lpc_sch.c
index ea1169b..abc4213 100644
--- a/drivers/mfd/lpc_sch.c
+++ b/drivers/mfd/lpc_sch.c

@@ -74,7 +74,7 @@
 	},
 };
 
-static struct pci_device_id lpc_sch_ids[] = {
+static DEFINE_PCI_DEVICE_TABLE(lpc_sch_ids) = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SCH_LPC) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ITC_LPC) },
 	{ 0, }

diff --git a/drivers/mfd/max8925-i2c.c b/drivers/mfd/max8925-i2c.c
index 0219115..d9e4b36 100644
--- a/drivers/mfd/max8925-i2c.c
+++ b/drivers/mfd/max8925-i2c.c

@@ -161,6 +161,8 @@
 	chip->adc = i2c_new_dummy(chip->i2c->adapter, ADC_I2C_ADDR);
 	i2c_set_clientdata(chip->adc, chip);
 
+	device_init_wakeup(&client->dev, 1);
+
 	max8925_device_init(chip, pdata);
 
 	return 0;
@@ -177,10 +179,35 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int max8925_suspend(struct device *dev)
+{
+	struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+	struct max8925_chip *chip = i2c_get_clientdata(client);
+
+	if (device_may_wakeup(dev) && chip->wakeup_flag)
+		enable_irq_wake(chip->core_irq);
+	return 0;
+}
+
+static int max8925_resume(struct device *dev)
+{
+	struct i2c_client *client = container_of(dev, struct i2c_client, dev);
+	struct max8925_chip *chip = i2c_get_clientdata(client);
+
+	if (device_may_wakeup(dev) && chip->wakeup_flag)
+		disable_irq_wake(chip->core_irq);
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(max8925_pm_ops, max8925_suspend, max8925_resume);
+
 static struct i2c_driver max8925_driver = {
 	.driver	= {
 		.name	= "max8925",
 		.owner	= THIS_MODULE,
+		.pm     = &max8925_pm_ops,
 	},
 	.probe		= max8925_probe,
 	.remove		= __devexit_p(max8925_remove),

diff --git a/drivers/mfd/max8997.c b/drivers/mfd/max8997.c
index 5be53ae..cb83a7a 100644
--- a/drivers/mfd/max8997.c
+++ b/drivers/mfd/max8997.c

@@ -43,7 +43,8 @@
 	{ .name = "max8997-battery", },
 	{ .name = "max8997-haptic", },
 	{ .name = "max8997-muic", },
-	{ .name = "max8997-flash", },
+	{ .name = "max8997-led", .id = 1 },
+	{ .name = "max8997-led", .id = 2 },
 };
 
 int max8997_read_reg(struct i2c_client *i2c, u8 reg, u8 *dest)

diff --git a/drivers/mfd/max8998.c b/drivers/mfd/max8998.c
index de4096a..6ef56d2 100644
--- a/drivers/mfd/max8998.c
+++ b/drivers/mfd/max8998.c

@@ -176,6 +176,8 @@
 	if (ret < 0)
 		goto err;
 
+	device_init_wakeup(max8998->dev, max8998->wakeup);
+
 	return ret;
 
 err:
@@ -210,7 +212,7 @@
 	struct i2c_client *i2c = container_of(dev, struct i2c_client, dev);
 	struct max8998_dev *max8998 = i2c_get_clientdata(i2c);
 
-	if (max8998->wakeup)
+	if (device_may_wakeup(dev))
 		irq_set_irq_wake(max8998->irq, 1);
 	return 0;
 }
@@ -220,7 +222,7 @@
 	struct i2c_client *i2c = container_of(dev, struct i2c_client, dev);
 	struct max8998_dev *max8998 = i2c_get_clientdata(i2c);
 
-	if (max8998->wakeup)
+	if (device_may_wakeup(dev))
 		irq_set_irq_wake(max8998->irq, 0);
 	/*
 	 * In LP3974, if IRQ registers are not "read & clear"

diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c
index e9619ac..7122386 100644
--- a/drivers/mfd/mc13xxx-core.c
+++ b/drivers/mfd/mc13xxx-core.c

@@ -18,11 +18,15 @@
 #include <linux/spi/spi.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/mc13xxx.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_gpio.h>
 
 struct mc13xxx {
 	struct spi_device *spidev;
 	struct mutex lock;
 	int irq;
+	int flags;
 
 	irq_handler_t irqhandler[MC13XXX_NUM_IRQ];
 	void *irqdata[MC13XXX_NUM_IRQ];
@@ -550,10 +554,7 @@
 
 int mc13xxx_get_flags(struct mc13xxx *mc13xxx)
 {
-	struct mc13xxx_platform_data *pdata =
-		dev_get_platdata(&mc13xxx->spidev->dev);
-
-	return pdata->flags;
+	return mc13xxx->flags;
 }
 EXPORT_SYMBOL(mc13xxx_get_flags);
 
@@ -615,13 +616,13 @@
 		break;
 
 	case MC13XXX_ADC_MODE_SINGLE_CHAN:
-		adc0 |= old_adc0 & MC13XXX_ADC0_TSMOD_MASK;
+		adc0 |= old_adc0 & MC13XXX_ADC0_CONFIG_MASK;
 		adc1 |= (channel & 0x7) << MC13XXX_ADC1_CHAN0_SHIFT;
 		adc1 |= MC13XXX_ADC1_RAND;
 		break;
 
 	case MC13XXX_ADC_MODE_MULT_CHAN:
-		adc0 |= old_adc0 & MC13XXX_ADC0_TSMOD_MASK;
+		adc0 |= old_adc0 & MC13XXX_ADC0_CONFIG_MASK;
 		adc1 |= 4 << MC13XXX_ADC1_CHAN1_SHIFT;
 		break;
 
@@ -696,17 +697,67 @@
 	return mc13xxx_add_subdevice_pdata(mc13xxx, format, NULL, 0);
 }
 
+#ifdef CONFIG_OF
+static int mc13xxx_probe_flags_dt(struct mc13xxx *mc13xxx)
+{
+	struct device_node *np = mc13xxx->spidev->dev.of_node;
+
+	if (!np)
+		return -ENODEV;
+
+	if (of_get_property(np, "fsl,mc13xxx-uses-adc", NULL))
+		mc13xxx->flags |= MC13XXX_USE_ADC;
+
+	if (of_get_property(np, "fsl,mc13xxx-uses-codec", NULL))
+		mc13xxx->flags |= MC13XXX_USE_CODEC;
+
+	if (of_get_property(np, "fsl,mc13xxx-uses-rtc", NULL))
+		mc13xxx->flags |= MC13XXX_USE_RTC;
+
+	if (of_get_property(np, "fsl,mc13xxx-uses-touch", NULL))
+		mc13xxx->flags |= MC13XXX_USE_TOUCHSCREEN;
+
+	return 0;
+}
+#else
+static inline int mc13xxx_probe_flags_dt(struct mc13xxx *mc13xxx)
+{
+	return -ENODEV;
+}
+#endif
+
+static const struct spi_device_id mc13xxx_device_id[] = {
+	{
+		.name = "mc13783",
+		.driver_data = MC13XXX_ID_MC13783,
+	}, {
+		.name = "mc13892",
+		.driver_data = MC13XXX_ID_MC13892,
+	}, {
+		/* sentinel */
+	}
+};
+MODULE_DEVICE_TABLE(spi, mc13xxx_device_id);
+
+static const struct of_device_id mc13xxx_dt_ids[] = {
+	{ .compatible = "fsl,mc13783", .data = (void *) MC13XXX_ID_MC13783, },
+	{ .compatible = "fsl,mc13892", .data = (void *) MC13XXX_ID_MC13892, },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, mc13xxx_dt_ids);
+
 static int mc13xxx_probe(struct spi_device *spi)
 {
+	const struct of_device_id *of_id;
+	struct spi_driver *sdrv = to_spi_driver(spi->dev.driver);
 	struct mc13xxx *mc13xxx;
 	struct mc13xxx_platform_data *pdata = dev_get_platdata(&spi->dev);
 	enum mc13xxx_id id;
 	int ret;
 
-	if (!pdata) {
-		dev_err(&spi->dev, "invalid platform data\n");
-		return -EINVAL;
-	}
+	of_id = of_match_device(mc13xxx_dt_ids, &spi->dev);
+	if (of_id)
+		sdrv->id_table = &mc13xxx_device_id[(enum mc13xxx_id) of_id->data];
 
 	mc13xxx = kzalloc(sizeof(*mc13xxx), GFP_KERNEL);
 	if (!mc13xxx)
@@ -749,28 +800,33 @@
 
 	mc13xxx_unlock(mc13xxx);
 
-	if (pdata->flags & MC13XXX_USE_ADC)
+	if (mc13xxx_probe_flags_dt(mc13xxx) < 0 && pdata)
+		mc13xxx->flags = pdata->flags;
+
+	if (mc13xxx->flags & MC13XXX_USE_ADC)
 		mc13xxx_add_subdevice(mc13xxx, "%s-adc");
 
-	if (pdata->flags & MC13XXX_USE_CODEC)
+	if (mc13xxx->flags & MC13XXX_USE_CODEC)
 		mc13xxx_add_subdevice(mc13xxx, "%s-codec");
 
-	mc13xxx_add_subdevice_pdata(mc13xxx, "%s-regulator",
-		&pdata->regulators, sizeof(pdata->regulators));
-
-	if (pdata->flags & MC13XXX_USE_RTC)
+	if (mc13xxx->flags & MC13XXX_USE_RTC)
 		mc13xxx_add_subdevice(mc13xxx, "%s-rtc");
 
-	if (pdata->flags & MC13XXX_USE_TOUCHSCREEN)
+	if (mc13xxx->flags & MC13XXX_USE_TOUCHSCREEN)
 		mc13xxx_add_subdevice(mc13xxx, "%s-ts");
 
-	if (pdata->leds)
+	if (pdata) {
+		mc13xxx_add_subdevice_pdata(mc13xxx, "%s-regulator",
+			&pdata->regulators, sizeof(pdata->regulators));
 		mc13xxx_add_subdevice_pdata(mc13xxx, "%s-led",
 				pdata->leds, sizeof(*pdata->leds));
-
-	if (pdata->buttons)
 		mc13xxx_add_subdevice_pdata(mc13xxx, "%s-pwrbutton",
 				pdata->buttons, sizeof(*pdata->buttons));
+	} else {
+		mc13xxx_add_subdevice(mc13xxx, "%s-regulator");
+		mc13xxx_add_subdevice(mc13xxx, "%s-led");
+		mc13xxx_add_subdevice(mc13xxx, "%s-pwrbutton");
+	}
 
 	return 0;
 }
@@ -788,25 +844,12 @@
 	return 0;
 }
 
-static const struct spi_device_id mc13xxx_device_id[] = {
-	{
-		.name = "mc13783",
-		.driver_data = MC13XXX_ID_MC13783,
-	}, {
-		.name = "mc13892",
-		.driver_data = MC13XXX_ID_MC13892,
-	}, {
-		/* sentinel */
-	}
-};
-MODULE_DEVICE_TABLE(spi, mc13xxx_device_id);
-
 static struct spi_driver mc13xxx_driver = {
 	.id_table = mc13xxx_device_id,
 	.driver = {
 		.name = "mc13xxx",
-		.bus = &spi_bus_type,
 		.owner = THIS_MODULE,
+		.of_match_table = mc13xxx_dt_ids,
 	},
 	.probe = mc13xxx_probe,
 	.remove = __devexit_p(mc13xxx_remove),

diff --git a/drivers/mfd/mcp-core.c b/drivers/mfd/mcp-core.c
index 84815f9..63be60b 100644
--- a/drivers/mfd/mcp-core.c
+++ b/drivers/mfd/mcp-core.c

@@ -26,9 +26,35 @@
 #define to_mcp(d)		container_of(d, struct mcp, attached_device)
 #define to_mcp_driver(d)	container_of(d, struct mcp_driver, drv)
 
+static const struct mcp_device_id *mcp_match_id(const struct mcp_device_id *id,
+						const char *codec)
+{
+	while (id->name[0]) {
+		if (strcmp(codec, id->name) == 0)
+			return id;
+		id++;
+	}
+	return NULL;
+}
+
+const struct mcp_device_id *mcp_get_device_id(const struct mcp *mcp)
+{
+	const struct mcp_driver *driver =
+		to_mcp_driver(mcp->attached_device.driver);
+
+	return mcp_match_id(driver->id_table, mcp->codec);
+}
+EXPORT_SYMBOL(mcp_get_device_id);
+
 static int mcp_bus_match(struct device *dev, struct device_driver *drv)
 {
-	return 1;
+	const struct mcp *mcp = to_mcp(dev);
+	const struct mcp_driver *driver = to_mcp_driver(drv);
+
+	if (driver->id_table)
+		return !!mcp_match_id(driver->id_table, mcp->codec);
+
+	return 0;
 }
 
 static int mcp_bus_probe(struct device *dev)
@@ -74,9 +100,18 @@
 	return ret;
 }
 
+static int mcp_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	struct mcp *mcp = to_mcp(dev);
+
+	add_uevent_var(env, "MODALIAS=%s%s", MCP_MODULE_PREFIX, mcp->codec);
+	return 0;
+}
+
 static struct bus_type mcp_bus_type = {
 	.name		= "mcp",
 	.match		= mcp_bus_match,
+	.uevent		= mcp_bus_uevent,
 	.probe		= mcp_bus_probe,
 	.remove		= mcp_bus_remove,
 	.suspend	= mcp_bus_suspend,
@@ -212,9 +247,14 @@
 }
 EXPORT_SYMBOL(mcp_host_alloc);
 
-int mcp_host_register(struct mcp *mcp)
+int mcp_host_register(struct mcp *mcp, void *pdata)
 {
+	if (!mcp->codec)
+		return -EINVAL;
+
+	mcp->attached_device.platform_data = pdata;
 	dev_set_name(&mcp->attached_device, "mcp0");
+	request_module("%s%s", MCP_MODULE_PREFIX, mcp->codec);
 	return device_register(&mcp->attached_device);
 }
 EXPORT_SYMBOL(mcp_host_register);

diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c
index 2dab02d..9adc2eb 100644
--- a/drivers/mfd/mcp-sa11x0.c
+++ b/drivers/mfd/mcp-sa11x0.c

@@ -19,6 +19,7 @@
 #include <linux/spinlock.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/mcp.h>
+#include <linux/io.h>
 
 #include <mach/dma.h>
 #include <mach/hardware.h>
@@ -26,12 +27,19 @@
 #include <asm/system.h>
 #include <mach/mcp.h>
 
-#include <mach/assabet.h>
-
+/* Register offsets */
+#define MCCR0	0x00
+#define MCDR0	0x08
+#define MCDR1	0x0C
+#define MCDR2	0x10
+#define MCSR	0x18
+#define MCCR1	0x00
 
 struct mcp_sa11x0 {
-	u32	mccr0;
-	u32	mccr1;
+	u32		mccr0;
+	u32		mccr1;
+	unsigned char	*mccr0_base;
+	unsigned char	*mccr1_base;
 };
 
 #define priv(mcp)	((struct mcp_sa11x0 *)mcp_priv(mcp))
@@ -39,25 +47,25 @@
 static void
 mcp_sa11x0_set_telecom_divisor(struct mcp *mcp, unsigned int divisor)
 {
-	unsigned int mccr0;
+	struct mcp_sa11x0 *priv = priv(mcp);
 
 	divisor /= 32;
 
-	mccr0 = Ser4MCCR0 & ~0x00007f00;
-	mccr0 |= divisor << 8;
-	Ser4MCCR0 = mccr0;
+	priv->mccr0 &= ~0x00007f00;
+	priv->mccr0 |= divisor << 8;
+	__raw_writel(priv->mccr0, priv->mccr0_base + MCCR0);
 }
 
 static void
 mcp_sa11x0_set_audio_divisor(struct mcp *mcp, unsigned int divisor)
 {
-	unsigned int mccr0;
+	struct mcp_sa11x0 *priv = priv(mcp);
 
 	divisor /= 32;
 
-	mccr0 = Ser4MCCR0 & ~0x0000007f;
-	mccr0 |= divisor;
-	Ser4MCCR0 = mccr0;
+	priv->mccr0 &= ~0x0000007f;
+	priv->mccr0 |= divisor;
+	__raw_writel(priv->mccr0, priv->mccr0_base + MCCR0);
 }
 
 /*
@@ -71,12 +79,16 @@
 {
 	int ret = -ETIME;
 	int i;
+	u32 mcpreg;
+	struct mcp_sa11x0 *priv = priv(mcp);
 
-	Ser4MCDR2 = reg << 17 | MCDR2_Wr | (val & 0xffff);
+	mcpreg = reg << 17 | MCDR2_Wr | (val & 0xffff);
+	__raw_writel(mcpreg, priv->mccr0_base + MCDR2);
 
 	for (i = 0; i < 2; i++) {
 		udelay(mcp->rw_timeout);
-		if (Ser4MCSR & MCSR_CWC) {
+		mcpreg = __raw_readl(priv->mccr0_base + MCSR);
+		if (mcpreg & MCSR_CWC) {
 			ret = 0;
 			break;
 		}
@@ -97,13 +109,18 @@
 {
 	int ret = -ETIME;
 	int i;
+	u32 mcpreg;
+	struct mcp_sa11x0 *priv = priv(mcp);
 
-	Ser4MCDR2 = reg << 17 | MCDR2_Rd;
+	mcpreg = reg << 17 | MCDR2_Rd;
+	__raw_writel(mcpreg, priv->mccr0_base + MCDR2);
 
 	for (i = 0; i < 2; i++) {
 		udelay(mcp->rw_timeout);
-		if (Ser4MCSR & MCSR_CRC) {
-			ret = Ser4MCDR2 & 0xffff;
+		mcpreg = __raw_readl(priv->mccr0_base + MCSR);
+		if (mcpreg & MCSR_CRC) {
+			ret = __raw_readl(priv->mccr0_base + MCDR2)
+				& 0xffff;
 			break;
 		}
 	}
@@ -116,13 +133,19 @@
 
 static void mcp_sa11x0_enable(struct mcp *mcp)
 {
-	Ser4MCSR = -1;
-	Ser4MCCR0 |= MCCR0_MCE;
+	struct mcp_sa11x0 *priv = priv(mcp);
+
+	__raw_writel(-1, priv->mccr0_base + MCSR);
+	priv->mccr0 |= MCCR0_MCE;
+	__raw_writel(priv->mccr0, priv->mccr0_base + MCCR0);
 }
 
 static void mcp_sa11x0_disable(struct mcp *mcp)
 {
-	Ser4MCCR0 &= ~MCCR0_MCE;
+	struct mcp_sa11x0 *priv = priv(mcp);
+
+	priv->mccr0 &= ~MCCR0_MCE;
+	__raw_writel(priv->mccr0, priv->mccr0_base + MCCR0);
 }
 
 /*
@@ -142,50 +165,69 @@
 	struct mcp_plat_data *data = pdev->dev.platform_data;
 	struct mcp *mcp;
 	int ret;
+	struct mcp_sa11x0 *priv;
+	struct resource *res_mem0, *res_mem1;
+	u32 size0, size1;
 
 	if (!data)
 		return -ENODEV;
 
-	if (!request_mem_region(0x80060000, 0x60, "sa11x0-mcp"))
+	if (!data->codec)
+		return -ENODEV;
+
+	res_mem0 = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res_mem0)
+		return -ENODEV;
+	size0 = res_mem0->end - res_mem0->start + 1;
+
+	res_mem1 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!res_mem1)
+		return -ENODEV;
+	size1 = res_mem1->end - res_mem1->start + 1;
+
+	if (!request_mem_region(res_mem0->start, size0, "sa11x0-mcp"))
 		return -EBUSY;
 
+	if (!request_mem_region(res_mem1->start, size1, "sa11x0-mcp")) {
+		ret = -EBUSY;
+		goto release;
+	}
+
 	mcp = mcp_host_alloc(&pdev->dev, sizeof(struct mcp_sa11x0));
 	if (!mcp) {
 		ret = -ENOMEM;
-		goto release;
+		goto release2;
 	}
 
+	priv = priv(mcp);
+
 	mcp->owner		= THIS_MODULE;
 	mcp->ops		= &mcp_sa11x0;
 	mcp->sclk_rate		= data->sclk_rate;
-	mcp->dma_audio_rd	= DMA_Ser4MCP0Rd;
-	mcp->dma_audio_wr	= DMA_Ser4MCP0Wr;
-	mcp->dma_telco_rd	= DMA_Ser4MCP1Rd;
-	mcp->dma_telco_wr	= DMA_Ser4MCP1Wr;
-	mcp->gpio_base		= data->gpio_base;
+	mcp->dma_audio_rd	= DDAR_DevAdd(res_mem0->start + MCDR0)
+				+ DDAR_DevRd + DDAR_Brst4 + DDAR_8BitDev;
+	mcp->dma_audio_wr	= DDAR_DevAdd(res_mem0->start + MCDR0)
+				+ DDAR_DevWr + DDAR_Brst4 + DDAR_8BitDev;
+	mcp->dma_telco_rd	= DDAR_DevAdd(res_mem0->start + MCDR1)
+				+ DDAR_DevRd + DDAR_Brst4 + DDAR_8BitDev;
+	mcp->dma_telco_wr	= DDAR_DevAdd(res_mem0->start + MCDR1)
+				+ DDAR_DevWr + DDAR_Brst4 + DDAR_8BitDev;
+	mcp->codec		= data->codec;
 
 	platform_set_drvdata(pdev, mcp);
 
-	if (machine_is_assabet()) {
-		ASSABET_BCR_set(ASSABET_BCR_CODEC_RST);
-	}
-
-	/*
-	 * Setup the PPC unit correctly.
-	 */
-	PPDR &= ~PPC_RXD4;
-	PPDR |= PPC_TXD4 | PPC_SCLK | PPC_SFRM;
-	PSDR |= PPC_RXD4;
-	PSDR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM);
-	PPSR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM);
-
 	/*
 	 * Initialise device.  Note that we initially
 	 * set the sampling rate to minimum.
 	 */
-	Ser4MCSR = -1;
-	Ser4MCCR1 = data->mccr1;
-	Ser4MCCR0 = data->mccr0 | 0x7f7f;
+	priv->mccr0_base = ioremap(res_mem0->start, size0);
+	priv->mccr1_base = ioremap(res_mem1->start, size1);
+
+	__raw_writel(-1, priv->mccr0_base + MCSR);
+	priv->mccr1 = data->mccr1;
+	priv->mccr0 = data->mccr0 | 0x7f7f;
+	__raw_writel(priv->mccr0, priv->mccr0_base + MCCR0);
+	__raw_writel(priv->mccr1, priv->mccr1_base + MCCR1);
 
 	/*
 	 * Calculate the read/write timeout (us) from the bit clock
@@ -195,36 +237,53 @@
 	mcp->rw_timeout = (64 * 3 * 1000000 + mcp->sclk_rate - 1) /
 			  mcp->sclk_rate;
 
-	ret = mcp_host_register(mcp);
+	ret = mcp_host_register(mcp, data->codec_pdata);
 	if (ret == 0)
 		goto out;
 
+ release2:
+	release_mem_region(res_mem1->start, size1);
  release:
-	release_mem_region(0x80060000, 0x60);
+	release_mem_region(res_mem0->start, size0);
 	platform_set_drvdata(pdev, NULL);
 
  out:
 	return ret;
 }
 
-static int mcp_sa11x0_remove(struct platform_device *dev)
+static int mcp_sa11x0_remove(struct platform_device *pdev)
 {
-	struct mcp *mcp = platform_get_drvdata(dev);
+	struct mcp *mcp = platform_get_drvdata(pdev);
+	struct mcp_sa11x0 *priv = priv(mcp);
+	struct resource *res_mem;
+	u32 size;
 
-	platform_set_drvdata(dev, NULL);
+	platform_set_drvdata(pdev, NULL);
 	mcp_host_unregister(mcp);
-	release_mem_region(0x80060000, 0x60);
 
+	res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (res_mem) {
+		size = res_mem->end - res_mem->start + 1;
+		release_mem_region(res_mem->start, size);
+	}
+	res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (res_mem) {
+		size = res_mem->end - res_mem->start + 1;
+		release_mem_region(res_mem->start, size);
+	}
+	iounmap(priv->mccr0_base);
+	iounmap(priv->mccr1_base);
 	return 0;
 }
 
 static int mcp_sa11x0_suspend(struct platform_device *dev, pm_message_t state)
 {
 	struct mcp *mcp = platform_get_drvdata(dev);
+	struct mcp_sa11x0 *priv = priv(mcp);
+	u32 mccr0;
 
-	priv(mcp)->mccr0 = Ser4MCCR0;
-	priv(mcp)->mccr1 = Ser4MCCR1;
-	Ser4MCCR0 &= ~MCCR0_MCE;
+	mccr0 = priv->mccr0 & ~MCCR0_MCE;
+	__raw_writel(mccr0, priv->mccr0_base + MCCR0);
 
 	return 0;
 }
@@ -232,9 +291,10 @@
 static int mcp_sa11x0_resume(struct platform_device *dev)
 {
 	struct mcp *mcp = platform_get_drvdata(dev);
+	struct mcp_sa11x0 *priv = priv(mcp);
 
-	Ser4MCCR1 = priv(mcp)->mccr1;
-	Ser4MCCR0 = priv(mcp)->mccr0;
+	__raw_writel(priv->mccr0, priv->mccr0_base + MCCR0);
+	__raw_writel(priv->mccr1, priv->mccr1_base + MCCR1);
 
 	return 0;
 }
@@ -251,24 +311,14 @@
 	.resume		= mcp_sa11x0_resume,
 	.driver		= {
 		.name	= "sa11x0-mcp",
+		.owner  = THIS_MODULE,
 	},
 };
 
 /*
  * This needs re-working
  */
-static int __init mcp_sa11x0_init(void)
-{
-	return platform_driver_register(&mcp_sa11x0_driver);
-}
-
-static void __exit mcp_sa11x0_exit(void)
-{
-	platform_driver_unregister(&mcp_sa11x0_driver);
-}
-
-module_init(mcp_sa11x0_init);
-module_exit(mcp_sa11x0_exit);
+module_platform_driver(mcp_sa11x0_driver);
 
 MODULE_AUTHOR("Russell King <rmk@arm.linux.org.uk>");
 MODULE_DESCRIPTION("SA11x0 multimedia communications port driver");

diff --git a/drivers/mfd/omap-usb-host.c b/drivers/mfd/omap-usb-host.c
index 3f565ef..68ac2c5 100644
--- a/drivers/mfd/omap-usb-host.c
+++ b/drivers/mfd/omap-usb-host.c

@@ -503,19 +503,13 @@
 	spin_lock_irqsave(&omap->lock, flags);
 
 	if (pdata->ehci_data->phy_reset) {
-		if (gpio_is_valid(pdata->ehci_data->reset_gpio_port[0])) {
-			gpio_request(pdata->ehci_data->reset_gpio_port[0],
-						"USB1 PHY reset");
-			gpio_direction_output
-				(pdata->ehci_data->reset_gpio_port[0], 0);
-		}
+		if (gpio_is_valid(pdata->ehci_data->reset_gpio_port[0]))
+			gpio_request_one(pdata->ehci_data->reset_gpio_port[0],
+					 GPIOF_OUT_INIT_LOW, "USB1 PHY reset");
 
-		if (gpio_is_valid(pdata->ehci_data->reset_gpio_port[1])) {
-			gpio_request(pdata->ehci_data->reset_gpio_port[1],
-						"USB2 PHY reset");
-			gpio_direction_output
-				(pdata->ehci_data->reset_gpio_port[1], 0);
-		}
+		if (gpio_is_valid(pdata->ehci_data->reset_gpio_port[1]))
+			gpio_request_one(pdata->ehci_data->reset_gpio_port[1],
+					 GPIOF_OUT_INIT_LOW, "USB2 PHY reset");
 
 		/* Hold the PHY in RESET for enough time till DIR is high */
 		udelay(10);

diff --git a/drivers/mfd/pcf50633-adc.c b/drivers/mfd/pcf50633-adc.c
index aed0d2a..3927c17 100644
--- a/drivers/mfd/pcf50633-adc.c
+++ b/drivers/mfd/pcf50633-adc.c

@@ -249,17 +249,7 @@
 	.remove = __devexit_p(pcf50633_adc_remove),
 };
 
-static int __init pcf50633_adc_init(void)
-{
-	return platform_driver_register(&pcf50633_adc_driver);
-}
-module_init(pcf50633_adc_init);
-
-static void __exit pcf50633_adc_exit(void)
-{
-	platform_driver_unregister(&pcf50633_adc_driver);
-}
-module_exit(pcf50633_adc_exit);
+module_platform_driver(pcf50633_adc_driver);
 
 MODULE_AUTHOR("Balaji Rao <balajirrao@openmoko.org>");
 MODULE_DESCRIPTION("PCF50633 adc driver");

diff --git a/drivers/mfd/s5m-core.c b/drivers/mfd/s5m-core.c
new file mode 100644
index 0000000..e075c11
--- /dev/null
+++ b/drivers/mfd/s5m-core.c

@@ -0,0 +1,176 @@
+/*
+ * s5m87xx.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd
+ *              http://www.samsung.com
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/pm_runtime.h>
+#include <linux/mutex.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/s5m87xx/s5m-core.h>
+#include <linux/mfd/s5m87xx/s5m-pmic.h>
+#include <linux/mfd/s5m87xx/s5m-rtc.h>
+#include <linux/regmap.h>
+
+static struct mfd_cell s5m87xx_devs[] = {
+	{
+		.name = "s5m8767-pmic",
+	}, {
+		.name = "s5m-rtc",
+	},
+};
+
+int s5m_reg_read(struct s5m87xx_dev *s5m87xx, u8 reg, void *dest)
+{
+	return regmap_read(s5m87xx->regmap, reg, dest);
+}
+EXPORT_SYMBOL_GPL(s5m_reg_read);
+
+int s5m_bulk_read(struct s5m87xx_dev *s5m87xx, u8 reg, int count, u8 *buf)
+{
+	return regmap_bulk_read(s5m87xx->regmap, reg, buf, count);;
+}
+EXPORT_SYMBOL_GPL(s5m_bulk_read);
+
+int s5m_reg_write(struct s5m87xx_dev *s5m87xx, u8 reg, u8 value)
+{
+	return regmap_write(s5m87xx->regmap, reg, value);
+}
+EXPORT_SYMBOL_GPL(s5m_reg_write);
+
+int s5m_bulk_write(struct s5m87xx_dev *s5m87xx, u8 reg, int count, u8 *buf)
+{
+	return regmap_raw_write(s5m87xx->regmap, reg, buf, count * sizeof(u16));
+}
+EXPORT_SYMBOL_GPL(s5m_bulk_write);
+
+int s5m_reg_update(struct s5m87xx_dev *s5m87xx, u8 reg, u8 val, u8 mask)
+{
+	return regmap_update_bits(s5m87xx->regmap, reg, mask, val);
+}
+EXPORT_SYMBOL_GPL(s5m_reg_update);
+
+static struct regmap_config s5m_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+};
+
+static int s5m87xx_i2c_probe(struct i2c_client *i2c,
+			    const struct i2c_device_id *id)
+{
+	struct s5m_platform_data *pdata = i2c->dev.platform_data;
+	struct s5m87xx_dev *s5m87xx;
+	int ret = 0;
+	int error;
+
+	s5m87xx = kzalloc(sizeof(struct s5m87xx_dev), GFP_KERNEL);
+	if (s5m87xx == NULL)
+		return -ENOMEM;
+
+	i2c_set_clientdata(i2c, s5m87xx);
+	s5m87xx->dev = &i2c->dev;
+	s5m87xx->i2c = i2c;
+	s5m87xx->irq = i2c->irq;
+	s5m87xx->type = id->driver_data;
+
+	if (pdata) {
+		s5m87xx->device_type = pdata->device_type;
+		s5m87xx->ono = pdata->ono;
+		s5m87xx->irq_base = pdata->irq_base;
+		s5m87xx->wakeup = pdata->wakeup;
+	}
+
+	s5m87xx->regmap = regmap_init_i2c(i2c, &s5m_regmap_config);
+	if (IS_ERR(s5m87xx->regmap)) {
+		error = PTR_ERR(s5m87xx->regmap);
+		dev_err(&i2c->dev, "Failed to allocate register map: %d\n",
+			error);
+		goto err;
+	}
+
+	s5m87xx->rtc = i2c_new_dummy(i2c->adapter, RTC_I2C_ADDR);
+	i2c_set_clientdata(s5m87xx->rtc, s5m87xx);
+
+	if (pdata->cfg_pmic_irq)
+		pdata->cfg_pmic_irq();
+
+	s5m_irq_init(s5m87xx);
+
+	pm_runtime_set_active(s5m87xx->dev);
+
+	ret = mfd_add_devices(s5m87xx->dev, -1,
+				s5m87xx_devs, ARRAY_SIZE(s5m87xx_devs),
+				NULL, 0);
+
+	if (ret < 0)
+		goto err;
+
+	return ret;
+
+err:
+	mfd_remove_devices(s5m87xx->dev);
+	s5m_irq_exit(s5m87xx);
+	i2c_unregister_device(s5m87xx->rtc);
+	regmap_exit(s5m87xx->regmap);
+	kfree(s5m87xx);
+	return ret;
+}
+
+static int s5m87xx_i2c_remove(struct i2c_client *i2c)
+{
+	struct s5m87xx_dev *s5m87xx = i2c_get_clientdata(i2c);
+
+	mfd_remove_devices(s5m87xx->dev);
+	s5m_irq_exit(s5m87xx);
+	i2c_unregister_device(s5m87xx->rtc);
+	regmap_exit(s5m87xx->regmap);
+	kfree(s5m87xx);
+	return 0;
+}
+
+static const struct i2c_device_id s5m87xx_i2c_id[] = {
+	{ "s5m87xx", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, s5m87xx_i2c_id);
+
+static struct i2c_driver s5m87xx_i2c_driver = {
+	.driver = {
+		   .name = "s5m87xx",
+		   .owner = THIS_MODULE,
+	},
+	.probe = s5m87xx_i2c_probe,
+	.remove = s5m87xx_i2c_remove,
+	.id_table = s5m87xx_i2c_id,
+};
+
+static int __init s5m87xx_i2c_init(void)
+{
+	return i2c_add_driver(&s5m87xx_i2c_driver);
+}
+
+subsys_initcall(s5m87xx_i2c_init);
+
+static void __exit s5m87xx_i2c_exit(void)
+{
+	i2c_del_driver(&s5m87xx_i2c_driver);
+}
+module_exit(s5m87xx_i2c_exit);
+
+MODULE_AUTHOR("Sangbeom Kim <sbkim73@samsung.com>");
+MODULE_DESCRIPTION("Core support for the S5M MFD");
+MODULE_LICENSE("GPL");

diff --git a/drivers/mfd/s5m-irq.c b/drivers/mfd/s5m-irq.c
new file mode 100644
index 0000000..de76dfb
--- /dev/null
+++ b/drivers/mfd/s5m-irq.c

@@ -0,0 +1,487 @@
+/*
+ * s5m-irq.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd
+ *              http://www.samsung.com
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/mfd/s5m87xx/s5m-core.h>
+
+struct s5m_irq_data {
+	int reg;
+	int mask;
+};
+
+static struct s5m_irq_data s5m8767_irqs[] = {
+	[S5M8767_IRQ_PWRR] = {
+		.reg = 1,
+		.mask = S5M8767_IRQ_PWRR_MASK,
+	},
+	[S5M8767_IRQ_PWRF] = {
+		.reg = 1,
+		.mask = S5M8767_IRQ_PWRF_MASK,
+	},
+	[S5M8767_IRQ_PWR1S] = {
+		.reg = 1,
+		.mask = S5M8767_IRQ_PWR1S_MASK,
+	},
+	[S5M8767_IRQ_JIGR] = {
+		.reg = 1,
+		.mask = S5M8767_IRQ_JIGR_MASK,
+	},
+	[S5M8767_IRQ_JIGF] = {
+		.reg = 1,
+		.mask = S5M8767_IRQ_JIGF_MASK,
+	},
+	[S5M8767_IRQ_LOWBAT2] = {
+		.reg = 1,
+		.mask = S5M8767_IRQ_LOWBAT2_MASK,
+	},
+	[S5M8767_IRQ_LOWBAT1] = {
+		.reg = 1,
+		.mask = S5M8767_IRQ_LOWBAT1_MASK,
+	},
+	[S5M8767_IRQ_MRB] = {
+		.reg = 2,
+		.mask = S5M8767_IRQ_MRB_MASK,
+	},
+	[S5M8767_IRQ_DVSOK2] = {
+		.reg = 2,
+		.mask = S5M8767_IRQ_DVSOK2_MASK,
+	},
+	[S5M8767_IRQ_DVSOK3] = {
+		.reg = 2,
+		.mask = S5M8767_IRQ_DVSOK3_MASK,
+	},
+	[S5M8767_IRQ_DVSOK4] = {
+		.reg = 2,
+		.mask = S5M8767_IRQ_DVSOK4_MASK,
+	},
+	[S5M8767_IRQ_RTC60S] = {
+		.reg = 3,
+		.mask = S5M8767_IRQ_RTC60S_MASK,
+	},
+	[S5M8767_IRQ_RTCA1] = {
+		.reg = 3,
+		.mask = S5M8767_IRQ_RTCA1_MASK,
+	},
+	[S5M8767_IRQ_RTCA2] = {
+		.reg = 3,
+		.mask = S5M8767_IRQ_RTCA2_MASK,
+	},
+	[S5M8767_IRQ_SMPL] = {
+		.reg = 3,
+		.mask = S5M8767_IRQ_SMPL_MASK,
+	},
+	[S5M8767_IRQ_RTC1S] = {
+		.reg = 3,
+		.mask = S5M8767_IRQ_RTC1S_MASK,
+	},
+	[S5M8767_IRQ_WTSR] = {
+		.reg = 3,
+		.mask = S5M8767_IRQ_WTSR_MASK,
+	},
+};
+
+static struct s5m_irq_data s5m8763_irqs[] = {
+	[S5M8763_IRQ_DCINF] = {
+		.reg = 1,
+		.mask = S5M8763_IRQ_DCINF_MASK,
+	},
+	[S5M8763_IRQ_DCINR] = {
+		.reg = 1,
+		.mask = S5M8763_IRQ_DCINR_MASK,
+	},
+	[S5M8763_IRQ_JIGF] = {
+		.reg = 1,
+		.mask = S5M8763_IRQ_JIGF_MASK,
+	},
+	[S5M8763_IRQ_JIGR] = {
+		.reg = 1,
+		.mask = S5M8763_IRQ_JIGR_MASK,
+	},
+	[S5M8763_IRQ_PWRONF] = {
+		.reg = 1,
+		.mask = S5M8763_IRQ_PWRONF_MASK,
+	},
+	[S5M8763_IRQ_PWRONR] = {
+		.reg = 1,
+		.mask = S5M8763_IRQ_PWRONR_MASK,
+	},
+	[S5M8763_IRQ_WTSREVNT] = {
+		.reg = 2,
+		.mask = S5M8763_IRQ_WTSREVNT_MASK,
+	},
+	[S5M8763_IRQ_SMPLEVNT] = {
+		.reg = 2,
+		.mask = S5M8763_IRQ_SMPLEVNT_MASK,
+	},
+	[S5M8763_IRQ_ALARM1] = {
+		.reg = 2,
+		.mask = S5M8763_IRQ_ALARM1_MASK,
+	},
+	[S5M8763_IRQ_ALARM0] = {
+		.reg = 2,
+		.mask = S5M8763_IRQ_ALARM0_MASK,
+	},
+	[S5M8763_IRQ_ONKEY1S] = {
+		.reg = 3,
+		.mask = S5M8763_IRQ_ONKEY1S_MASK,
+	},
+	[S5M8763_IRQ_TOPOFFR] = {
+		.reg = 3,
+		.mask = S5M8763_IRQ_TOPOFFR_MASK,
+	},
+	[S5M8763_IRQ_DCINOVPR] = {
+		.reg = 3,
+		.mask = S5M8763_IRQ_DCINOVPR_MASK,
+	},
+	[S5M8763_IRQ_CHGRSTF] = {
+		.reg = 3,
+		.mask = S5M8763_IRQ_CHGRSTF_MASK,
+	},
+	[S5M8763_IRQ_DONER] = {
+		.reg = 3,
+		.mask = S5M8763_IRQ_DONER_MASK,
+	},
+	[S5M8763_IRQ_CHGFAULT] = {
+		.reg = 3,
+		.mask = S5M8763_IRQ_CHGFAULT_MASK,
+	},
+	[S5M8763_IRQ_LOBAT1] = {
+		.reg = 4,
+		.mask = S5M8763_IRQ_LOBAT1_MASK,
+	},
+	[S5M8763_IRQ_LOBAT2] = {
+		.reg = 4,
+		.mask = S5M8763_IRQ_LOBAT2_MASK,
+	},
+};
+
+static inline struct s5m_irq_data *
+irq_to_s5m8767_irq(struct s5m87xx_dev *s5m87xx, int irq)
+{
+	return &s5m8767_irqs[irq - s5m87xx->irq_base];
+}
+
+static void s5m8767_irq_lock(struct irq_data *data)
+{
+	struct s5m87xx_dev *s5m87xx = irq_data_get_irq_chip_data(data);
+
+	mutex_lock(&s5m87xx->irqlock);
+}
+
+static void s5m8767_irq_sync_unlock(struct irq_data *data)
+{
+	struct s5m87xx_dev *s5m87xx = irq_data_get_irq_chip_data(data);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(s5m87xx->irq_masks_cur); i++) {
+		if (s5m87xx->irq_masks_cur[i] != s5m87xx->irq_masks_cache[i]) {
+			s5m87xx->irq_masks_cache[i] = s5m87xx->irq_masks_cur[i];
+			s5m_reg_write(s5m87xx, S5M8767_REG_INT1M + i,
+					s5m87xx->irq_masks_cur[i]);
+		}
+	}
+
+	mutex_unlock(&s5m87xx->irqlock);
+}
+
+static void s5m8767_irq_unmask(struct irq_data *data)
+{
+	struct s5m87xx_dev *s5m87xx = irq_data_get_irq_chip_data(data);
+	struct s5m_irq_data *irq_data = irq_to_s5m8767_irq(s5m87xx,
+							       data->irq);
+
+	s5m87xx->irq_masks_cur[irq_data->reg - 1] &= ~irq_data->mask;
+}
+
+static void s5m8767_irq_mask(struct irq_data *data)
+{
+	struct s5m87xx_dev *s5m87xx = irq_data_get_irq_chip_data(data);
+	struct s5m_irq_data *irq_data = irq_to_s5m8767_irq(s5m87xx,
+							       data->irq);
+
+	s5m87xx->irq_masks_cur[irq_data->reg - 1] |= irq_data->mask;
+}
+
+static struct irq_chip s5m8767_irq_chip = {
+	.name = "s5m8767",
+	.irq_bus_lock = s5m8767_irq_lock,
+	.irq_bus_sync_unlock = s5m8767_irq_sync_unlock,
+	.irq_mask = s5m8767_irq_mask,
+	.irq_unmask = s5m8767_irq_unmask,
+};
+
+static inline struct s5m_irq_data *
+irq_to_s5m8763_irq(struct s5m87xx_dev *s5m87xx, int irq)
+{
+	return &s5m8763_irqs[irq - s5m87xx->irq_base];
+}
+
+static void s5m8763_irq_lock(struct irq_data *data)
+{
+	struct s5m87xx_dev *s5m87xx = irq_data_get_irq_chip_data(data);
+
+	mutex_lock(&s5m87xx->irqlock);
+}
+
+static void s5m8763_irq_sync_unlock(struct irq_data *data)
+{
+	struct s5m87xx_dev *s5m87xx = irq_data_get_irq_chip_data(data);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(s5m87xx->irq_masks_cur); i++) {
+		if (s5m87xx->irq_masks_cur[i] != s5m87xx->irq_masks_cache[i]) {
+			s5m87xx->irq_masks_cache[i] = s5m87xx->irq_masks_cur[i];
+			s5m_reg_write(s5m87xx, S5M8763_REG_IRQM1 + i,
+					s5m87xx->irq_masks_cur[i]);
+		}
+	}
+
+	mutex_unlock(&s5m87xx->irqlock);
+}
+
+static void s5m8763_irq_unmask(struct irq_data *data)
+{
+	struct s5m87xx_dev *s5m87xx = irq_data_get_irq_chip_data(data);
+	struct s5m_irq_data *irq_data = irq_to_s5m8763_irq(s5m87xx,
+							       data->irq);
+
+	s5m87xx->irq_masks_cur[irq_data->reg - 1] &= ~irq_data->mask;
+}
+
+static void s5m8763_irq_mask(struct irq_data *data)
+{
+	struct s5m87xx_dev *s5m87xx = irq_data_get_irq_chip_data(data);
+	struct s5m_irq_data *irq_data = irq_to_s5m8763_irq(s5m87xx,
+							       data->irq);
+
+	s5m87xx->irq_masks_cur[irq_data->reg - 1] |= irq_data->mask;
+}
+
+static struct irq_chip s5m8763_irq_chip = {
+	.name = "s5m8763",
+	.irq_bus_lock = s5m8763_irq_lock,
+	.irq_bus_sync_unlock = s5m8763_irq_sync_unlock,
+	.irq_mask = s5m8763_irq_mask,
+	.irq_unmask = s5m8763_irq_unmask,
+};
+
+
+static irqreturn_t s5m8767_irq_thread(int irq, void *data)
+{
+	struct s5m87xx_dev *s5m87xx = data;
+	u8 irq_reg[NUM_IRQ_REGS-1];
+	int ret;
+	int i;
+
+
+	ret = s5m_bulk_read(s5m87xx, S5M8767_REG_INT1,
+				NUM_IRQ_REGS - 1, irq_reg);
+	if (ret < 0) {
+		dev_err(s5m87xx->dev, "Failed to read interrupt register: %d\n",
+				ret);
+		return IRQ_NONE;
+	}
+
+	for (i = 0; i < NUM_IRQ_REGS - 1; i++)
+		irq_reg[i] &= ~s5m87xx->irq_masks_cur[i];
+
+	for (i = 0; i < S5M8767_IRQ_NR; i++) {
+		if (irq_reg[s5m8767_irqs[i].reg - 1] & s5m8767_irqs[i].mask)
+			handle_nested_irq(s5m87xx->irq_base + i);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t s5m8763_irq_thread(int irq, void *data)
+{
+	struct s5m87xx_dev *s5m87xx = data;
+	u8 irq_reg[NUM_IRQ_REGS];
+	int ret;
+	int i;
+
+	ret = s5m_bulk_read(s5m87xx, S5M8763_REG_IRQ1,
+				NUM_IRQ_REGS, irq_reg);
+	if (ret < 0) {
+		dev_err(s5m87xx->dev, "Failed to read interrupt register: %d\n",
+				ret);
+		return IRQ_NONE;
+	}
+
+	for (i = 0; i < NUM_IRQ_REGS; i++)
+		irq_reg[i] &= ~s5m87xx->irq_masks_cur[i];
+
+	for (i = 0; i < S5M8763_IRQ_NR; i++) {
+		if (irq_reg[s5m8763_irqs[i].reg - 1] & s5m8763_irqs[i].mask)
+			handle_nested_irq(s5m87xx->irq_base + i);
+	}
+
+	return IRQ_HANDLED;
+}
+
+int s5m_irq_resume(struct s5m87xx_dev *s5m87xx)
+{
+	if (s5m87xx->irq && s5m87xx->irq_base){
+		switch (s5m87xx->device_type) {
+		case S5M8763X:
+			s5m8763_irq_thread(s5m87xx->irq_base, s5m87xx);
+			break;
+		case S5M8767X:
+			s5m8767_irq_thread(s5m87xx->irq_base, s5m87xx);
+			break;
+		default:
+			break;
+
+		}
+	}
+	return 0;
+}
+
+int s5m_irq_init(struct s5m87xx_dev *s5m87xx)
+{
+	int i;
+	int cur_irq;
+	int ret = 0;
+	int type = s5m87xx->device_type;
+
+	if (!s5m87xx->irq) {
+		dev_warn(s5m87xx->dev,
+			 "No interrupt specified, no interrupts\n");
+		s5m87xx->irq_base = 0;
+		return 0;
+	}
+
+	if (!s5m87xx->irq_base) {
+		dev_err(s5m87xx->dev,
+			"No interrupt base specified, no interrupts\n");
+		return 0;
+	}
+
+	mutex_init(&s5m87xx->irqlock);
+
+	switch (type) {
+	case S5M8763X:
+		for (i = 0; i < NUM_IRQ_REGS; i++) {
+			s5m87xx->irq_masks_cur[i] = 0xff;
+			s5m87xx->irq_masks_cache[i] = 0xff;
+			s5m_reg_write(s5m87xx, S5M8763_REG_IRQM1 + i,
+						0xff);
+		}
+
+		s5m_reg_write(s5m87xx, S5M8763_REG_STATUSM1, 0xff);
+		s5m_reg_write(s5m87xx, S5M8763_REG_STATUSM2, 0xff);
+
+		for (i = 0; i < S5M8763_IRQ_NR; i++) {
+			cur_irq = i + s5m87xx->irq_base;
+			irq_set_chip_data(cur_irq, s5m87xx);
+			irq_set_chip_and_handler(cur_irq, &s5m8763_irq_chip,
+						 handle_edge_irq);
+			irq_set_nested_thread(cur_irq, 1);
+#ifdef CONFIG_ARM
+			set_irq_flags(cur_irq, IRQF_VALID);
+#else
+			irq_set_noprobe(cur_irq);
+#endif
+		}
+
+		ret = request_threaded_irq(s5m87xx->irq, NULL,
+					s5m8763_irq_thread,
+					IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+					"s5m87xx-irq", s5m87xx);
+		if (ret) {
+			dev_err(s5m87xx->dev, "Failed to request IRQ %d: %d\n",
+				s5m87xx->irq, ret);
+			return ret;
+		}
+		break;
+	case S5M8767X:
+		for (i = 0; i < NUM_IRQ_REGS - 1; i++) {
+			s5m87xx->irq_masks_cur[i] = 0xff;
+			s5m87xx->irq_masks_cache[i] = 0xff;
+			s5m_reg_write(s5m87xx, S5M8767_REG_INT1M + i,
+						0xff);
+		}
+		for (i = 0; i < S5M8767_IRQ_NR; i++) {
+			cur_irq = i + s5m87xx->irq_base;
+			irq_set_chip_data(cur_irq, s5m87xx);
+			if (ret) {
+				dev_err(s5m87xx->dev,
+					"Failed to irq_set_chip_data %d: %d\n",
+					s5m87xx->irq, ret);
+				return ret;
+			}
+
+			irq_set_chip_and_handler(cur_irq, &s5m8767_irq_chip,
+						 handle_edge_irq);
+			irq_set_nested_thread(cur_irq, 1);
+#ifdef CONFIG_ARM
+			set_irq_flags(cur_irq, IRQF_VALID);
+#else
+			irq_set_noprobe(cur_irq);
+#endif
+		}
+
+		ret = request_threaded_irq(s5m87xx->irq, NULL,
+					   s5m8767_irq_thread,
+					   IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+					   "s5m87xx-irq", s5m87xx);
+		if (ret) {
+			dev_err(s5m87xx->dev, "Failed to request IRQ %d: %d\n",
+				s5m87xx->irq, ret);
+			return ret;
+		}
+		break;
+	default:
+		break;
+	}
+
+	if (!s5m87xx->ono)
+		return 0;
+
+	switch (type) {
+	case S5M8763X:
+		ret = request_threaded_irq(s5m87xx->ono, NULL,
+						s5m8763_irq_thread,
+						IRQF_TRIGGER_FALLING |
+						IRQF_TRIGGER_RISING |
+						IRQF_ONESHOT, "s5m87xx-ono",
+						s5m87xx);
+		break;
+	case S5M8767X:
+		ret = request_threaded_irq(s5m87xx->ono, NULL,
+					s5m8767_irq_thread,
+					IRQF_TRIGGER_FALLING |
+					IRQF_TRIGGER_RISING |
+					IRQF_ONESHOT, "s5m87xx-ono", s5m87xx);
+		break;
+	default:
+		break;
+	}
+
+	if (ret)
+		dev_err(s5m87xx->dev, "Failed to request IRQ %d: %d\n",
+			s5m87xx->ono, ret);
+
+	return 0;
+}
+
+void s5m_irq_exit(struct s5m87xx_dev *s5m87xx)
+{
+	if (s5m87xx->ono)
+		free_irq(s5m87xx->ono, s5m87xx);
+
+	if (s5m87xx->irq)
+		free_irq(s5m87xx->irq, s5m87xx);
+}

diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index df3702c..f4d8611 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c

@@ -1720,7 +1720,7 @@
 	return 0;
 }
 
-static struct pci_device_id sm501_pci_tbl[] = {
+static DEFINE_PCI_DEVICE_TABLE(sm501_pci_tbl) = {
 	{ 0x126f, 0x0501, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
 	{ 0, },
 };

diff --git a/drivers/mfd/stmpe-i2c.c b/drivers/mfd/stmpe-i2c.c
new file mode 100644
index 0000000..373f423
--- /dev/null
+++ b/drivers/mfd/stmpe-i2c.c

@@ -0,0 +1,109 @@
+/*
+ * ST Microelectronics MFD: stmpe's i2c client specific driver
+ *
+ * Copyright (C) ST-Ericsson SA 2010
+ * Copyright (C) ST Microelectronics SA 2011
+ *
+ * License Terms: GNU General Public License, version 2
+ * Author: Rabin Vincent <rabin.vincent@stericsson.com> for ST-Ericsson
+ * Author: Viresh Kumar <viresh.kumar@st.com> for ST Microelectronics
+ */
+
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include "stmpe.h"
+
+static int i2c_reg_read(struct stmpe *stmpe, u8 reg)
+{
+	struct i2c_client *i2c = stmpe->client;
+
+	return i2c_smbus_read_byte_data(i2c, reg);
+}
+
+static int i2c_reg_write(struct stmpe *stmpe, u8 reg, u8 val)
+{
+	struct i2c_client *i2c = stmpe->client;
+
+	return i2c_smbus_write_byte_data(i2c, reg, val);
+}
+
+static int i2c_block_read(struct stmpe *stmpe, u8 reg, u8 length, u8 *values)
+{
+	struct i2c_client *i2c = stmpe->client;
+
+	return i2c_smbus_read_i2c_block_data(i2c, reg, length, values);
+}
+
+static int i2c_block_write(struct stmpe *stmpe, u8 reg, u8 length,
+		const u8 *values)
+{
+	struct i2c_client *i2c = stmpe->client;
+
+	return i2c_smbus_write_i2c_block_data(i2c, reg, length, values);
+}
+
+static struct stmpe_client_info i2c_ci = {
+	.read_byte = i2c_reg_read,
+	.write_byte = i2c_reg_write,
+	.read_block = i2c_block_read,
+	.write_block = i2c_block_write,
+};
+
+static int __devinit
+stmpe_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
+{
+	i2c_ci.data = (void *)id;
+	i2c_ci.irq = i2c->irq;
+	i2c_ci.client = i2c;
+	i2c_ci.dev = &i2c->dev;
+
+	return stmpe_probe(&i2c_ci, id->driver_data);
+}
+
+static int __devexit stmpe_i2c_remove(struct i2c_client *i2c)
+{
+	struct stmpe *stmpe = dev_get_drvdata(&i2c->dev);
+
+	return stmpe_remove(stmpe);
+}
+
+static const struct i2c_device_id stmpe_i2c_id[] = {
+	{ "stmpe610", STMPE610 },
+	{ "stmpe801", STMPE801 },
+	{ "stmpe811", STMPE811 },
+	{ "stmpe1601", STMPE1601 },
+	{ "stmpe2401", STMPE2401 },
+	{ "stmpe2403", STMPE2403 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, stmpe_id);
+
+static struct i2c_driver stmpe_i2c_driver = {
+	.driver.name	= "stmpe-i2c",
+	.driver.owner	= THIS_MODULE,
+#ifdef CONFIG_PM
+	.driver.pm	= &stmpe_dev_pm_ops,
+#endif
+	.probe		= stmpe_i2c_probe,
+	.remove		= __devexit_p(stmpe_i2c_remove),
+	.id_table	= stmpe_i2c_id,
+};
+
+static int __init stmpe_init(void)
+{
+	return i2c_add_driver(&stmpe_i2c_driver);
+}
+subsys_initcall(stmpe_init);
+
+static void __exit stmpe_exit(void)
+{
+	i2c_del_driver(&stmpe_i2c_driver);
+}
+module_exit(stmpe_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("STMPE MFD I2C Interface Driver");
+MODULE_AUTHOR("Rabin Vincent <rabin.vincent@stericsson.com>");

diff --git a/drivers/mfd/stmpe-spi.c b/drivers/mfd/stmpe-spi.c
new file mode 100644
index 0000000..b58c43c
--- /dev/null
+++ b/drivers/mfd/stmpe-spi.c

@@ -0,0 +1,150 @@
+/*
+ * ST Microelectronics MFD: stmpe's spi client specific driver
+ *
+ * Copyright (C) ST Microelectronics SA 2011
+ *
+ * License Terms: GNU General Public License, version 2
+ * Author: Viresh Kumar <viresh.kumar@st.com> for ST Microelectronics
+ */
+
+#include <linux/spi/spi.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include "stmpe.h"
+
+#define READ_CMD	(1 << 7)
+
+static int spi_reg_read(struct stmpe *stmpe, u8 reg)
+{
+	struct spi_device *spi = stmpe->client;
+	int status = spi_w8r16(spi, reg | READ_CMD);
+
+	return (status < 0) ? status : status >> 8;
+}
+
+static int spi_reg_write(struct stmpe *stmpe, u8 reg, u8 val)
+{
+	struct spi_device *spi = stmpe->client;
+	u16 cmd = (val << 8) | reg;
+
+	return spi_write(spi, (const u8 *)&cmd, 2);
+}
+
+static int spi_block_read(struct stmpe *stmpe, u8 reg, u8 length, u8 *values)
+{
+	int ret, i;
+
+	for (i = 0; i < length; i++) {
+		ret = spi_reg_read(stmpe, reg + i);
+		if (ret < 0)
+			return ret;
+		*(values + i) = ret;
+	}
+
+	return 0;
+}
+
+static int spi_block_write(struct stmpe *stmpe, u8 reg, u8 length,
+		const u8 *values)
+{
+	int ret = 0, i;
+
+	for (i = length; i > 0; i--, reg++) {
+		ret = spi_reg_write(stmpe, reg, *(values + i - 1));
+		if (ret < 0)
+			return ret;
+	}
+
+	return ret;
+}
+
+static void spi_init(struct stmpe *stmpe)
+{
+	struct spi_device *spi = stmpe->client;
+
+	spi->bits_per_word = 8;
+
+	/* This register is only present for stmpe811 */
+	if (stmpe->variant->id_val == 0x0811)
+		spi_reg_write(stmpe, STMPE811_REG_SPI_CFG, spi->mode);
+
+	if (spi_setup(spi) < 0)
+		dev_dbg(&spi->dev, "spi_setup failed\n");
+}
+
+static struct stmpe_client_info spi_ci = {
+	.read_byte = spi_reg_read,
+	.write_byte = spi_reg_write,
+	.read_block = spi_block_read,
+	.write_block = spi_block_write,
+	.init = spi_init,
+};
+
+static int __devinit
+stmpe_spi_probe(struct spi_device *spi)
+{
+	const struct spi_device_id *id = spi_get_device_id(spi);
+
+	/* don't exceed max specified rate - 1MHz - Limitation of STMPE */
+	if (spi->max_speed_hz > 1000000) {
+		dev_dbg(&spi->dev, "f(sample) %d KHz?\n",
+				(spi->max_speed_hz/1000));
+		return -EINVAL;
+	}
+
+	spi_ci.irq = spi->irq;
+	spi_ci.client = spi;
+	spi_ci.dev = &spi->dev;
+
+	return stmpe_probe(&spi_ci, id->driver_data);
+}
+
+static int __devexit stmpe_spi_remove(struct spi_device *spi)
+{
+	struct stmpe *stmpe = dev_get_drvdata(&spi->dev);
+
+	return stmpe_remove(stmpe);
+}
+
+static const struct spi_device_id stmpe_spi_id[] = {
+	{ "stmpe610", STMPE610 },
+	{ "stmpe801", STMPE801 },
+	{ "stmpe811", STMPE811 },
+	{ "stmpe1601", STMPE1601 },
+	{ "stmpe2401", STMPE2401 },
+	{ "stmpe2403", STMPE2403 },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, stmpe_id);
+
+static struct spi_driver stmpe_spi_driver = {
+	.driver = {
+		.name	= "stmpe-spi",
+		.bus	= &spi_bus_type,
+		.owner	= THIS_MODULE,
+#ifdef CONFIG_PM
+		.pm	= &stmpe_dev_pm_ops,
+#endif
+	},
+	.probe		= stmpe_spi_probe,
+	.remove		= __devexit_p(stmpe_spi_remove),
+	.id_table	= stmpe_spi_id,
+};
+
+static int __init stmpe_init(void)
+{
+	return spi_register_driver(&stmpe_spi_driver);
+}
+subsys_initcall(stmpe_init);
+
+static void __exit stmpe_exit(void)
+{
+	spi_unregister_driver(&stmpe_spi_driver);
+}
+module_exit(stmpe_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("STMPE MFD SPI Interface Driver");
+MODULE_AUTHOR("Viresh Kumar <viresh.kumar@st.com>");

diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
index 2963689c..e07947e 100644
--- a/drivers/mfd/stmpe.c
+++ b/drivers/mfd/stmpe.c

@@ -1,18 +1,20 @@
 /*
+ * ST Microelectronics MFD: stmpe's driver
+ *
  * Copyright (C) ST-Ericsson SA 2010
  *
  * License Terms: GNU General Public License, version 2
  * Author: Rabin Vincent <rabin.vincent@stericsson.com> for ST-Ericsson
  */
 
+#include <linux/gpio.h>
+#include <linux/export.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/pm.h>
 #include <linux/slab.h>
-#include <linux/i2c.h>
 #include <linux/mfd/core.h>
-#include <linux/mfd/stmpe.h>
 #include "stmpe.h"
 
 static int __stmpe_enable(struct stmpe *stmpe, unsigned int blocks)
@@ -29,10 +31,9 @@
 {
 	int ret;
 
-	ret = i2c_smbus_read_byte_data(stmpe->i2c, reg);
+	ret = stmpe->ci->read_byte(stmpe, reg);
 	if (ret < 0)
-		dev_err(stmpe->dev, "failed to read reg %#x: %d\n",
-			reg, ret);
+		dev_err(stmpe->dev, "failed to read reg %#x: %d\n", reg, ret);
 
 	dev_vdbg(stmpe->dev, "rd: reg %#x => data %#x\n", reg, ret);
 
@@ -45,10 +46,9 @@
 
 	dev_vdbg(stmpe->dev, "wr: reg %#x <= %#x\n", reg, val);
 
-	ret = i2c_smbus_write_byte_data(stmpe->i2c, reg, val);
+	ret = stmpe->ci->write_byte(stmpe, reg, val);
 	if (ret < 0)
-		dev_err(stmpe->dev, "failed to write reg %#x: %d\n",
-			reg, ret);
+		dev_err(stmpe->dev, "failed to write reg %#x: %d\n", reg, ret);
 
 	return ret;
 }
@@ -72,10 +72,9 @@
 {
 	int ret;
 
-	ret = i2c_smbus_read_i2c_block_data(stmpe->i2c, reg, length, values);
+	ret = stmpe->ci->read_block(stmpe, reg, length, values);
 	if (ret < 0)
-		dev_err(stmpe->dev, "failed to read regs %#x: %d\n",
-			reg, ret);
+		dev_err(stmpe->dev, "failed to read regs %#x: %d\n", reg, ret);
 
 	dev_vdbg(stmpe->dev, "rd: reg %#x (%d) => ret %#x\n", reg, length, ret);
 	stmpe_dump_bytes("stmpe rd: ", values, length);
@@ -91,11 +90,9 @@
 	dev_vdbg(stmpe->dev, "wr: regs %#x (%d)\n", reg, length);
 	stmpe_dump_bytes("stmpe wr: ", values, length);
 
-	ret = i2c_smbus_write_i2c_block_data(stmpe->i2c, reg, length,
-					     values);
+	ret = stmpe->ci->write_block(stmpe, reg, length, values);
 	if (ret < 0)
-		dev_err(stmpe->dev, "failed to write regs %#x: %d\n",
-			reg, ret);
+		dev_err(stmpe->dev, "failed to write regs %#x: %d\n", reg, ret);
 
 	return ret;
 }
@@ -245,12 +242,14 @@
 	u8 regaddr = stmpe->regs[STMPE_IDX_GPAFR_U_MSB];
 	int af_bits = variant->af_bits;
 	int numregs = DIV_ROUND_UP(stmpe->num_gpios * af_bits, 8);
-	int afperreg = 8 / af_bits;
 	int mask = (1 << af_bits) - 1;
 	u8 regs[numregs];
-	int af;
-	int ret;
+	int af, afperreg, ret;
 
+	if (!variant->get_altfunc)
+		return 0;
+
+	afperreg = 8 / af_bits;
 	mutex_lock(&stmpe->lock);
 
 	ret = __stmpe_enable(stmpe, STMPE_BLOCK_GPIO);
@@ -325,7 +324,51 @@
 };
 
 /*
- * Touchscreen (STMPE811)
+ * STMPE801
+ */
+static const u8 stmpe801_regs[] = {
+	[STMPE_IDX_CHIP_ID]	= STMPE801_REG_CHIP_ID,
+	[STMPE_IDX_ICR_LSB]	= STMPE801_REG_SYS_CTRL,
+	[STMPE_IDX_GPMR_LSB]	= STMPE801_REG_GPIO_MP_STA,
+	[STMPE_IDX_GPSR_LSB]	= STMPE801_REG_GPIO_SET_PIN,
+	[STMPE_IDX_GPCR_LSB]	= STMPE801_REG_GPIO_SET_PIN,
+	[STMPE_IDX_GPDR_LSB]	= STMPE801_REG_GPIO_DIR,
+	[STMPE_IDX_IEGPIOR_LSB] = STMPE801_REG_GPIO_INT_EN,
+	[STMPE_IDX_ISGPIOR_MSB] = STMPE801_REG_GPIO_INT_STA,
+
+};
+
+static struct stmpe_variant_block stmpe801_blocks[] = {
+	{
+		.cell	= &stmpe_gpio_cell,
+		.irq	= 0,
+		.block	= STMPE_BLOCK_GPIO,
+	},
+};
+
+static int stmpe801_enable(struct stmpe *stmpe, unsigned int blocks,
+			   bool enable)
+{
+	if (blocks & STMPE_BLOCK_GPIO)
+		return 0;
+	else
+		return -EINVAL;
+}
+
+static struct stmpe_variant_info stmpe801 = {
+	.name		= "stmpe801",
+	.id_val		= STMPE801_ID,
+	.id_mask	= 0xffff,
+	.num_gpios	= 8,
+	.regs		= stmpe801_regs,
+	.blocks		= stmpe801_blocks,
+	.num_blocks	= ARRAY_SIZE(stmpe801_blocks),
+	.num_irqs	= STMPE801_NR_INTERNAL_IRQS,
+	.enable		= stmpe801_enable,
+};
+
+/*
+ * Touchscreen (STMPE811 or STMPE610)
  */
 
 static struct resource stmpe_ts_resources[] = {
@@ -350,7 +393,7 @@
 };
 
 /*
- * STMPE811
+ * STMPE811 or STMPE610
  */
 
 static const u8 stmpe811_regs[] = {
@@ -421,6 +464,21 @@
 	.get_altfunc	= stmpe811_get_altfunc,
 };
 
+/* Similar to 811, except number of gpios */
+static struct stmpe_variant_info stmpe610 = {
+	.name		= "stmpe610",
+	.id_val		= 0x0811,
+	.id_mask	= 0xffff,
+	.num_gpios	= 6,
+	.af_bits	= 1,
+	.regs		= stmpe811_regs,
+	.blocks		= stmpe811_blocks,
+	.num_blocks	= ARRAY_SIZE(stmpe811_blocks),
+	.num_irqs	= STMPE811_NR_INTERNAL_IRQS,
+	.enable		= stmpe811_enable,
+	.get_altfunc	= stmpe811_get_altfunc,
+};
+
 /*
  * STMPE1601
  */
@@ -655,6 +713,8 @@
 };
 
 static struct stmpe_variant_info *stmpe_variant_info[] = {
+	[STMPE610]	= &stmpe610,
+	[STMPE801]	= &stmpe801,
 	[STMPE811]	= &stmpe811,
 	[STMPE1601]	= &stmpe1601,
 	[STMPE2401]	= &stmpe2401,
@@ -671,6 +731,11 @@
 	int ret;
 	int i;
 
+	if (variant->id_val == STMPE801_ID) {
+		handle_nested_irq(stmpe->irq_base);
+		return IRQ_HANDLED;
+	}
+
 	ret = stmpe_block_read(stmpe, israddr, num, isr);
 	if (ret < 0)
 		return IRQ_NONE;
@@ -757,14 +822,17 @@
 
 static int __devinit stmpe_irq_init(struct stmpe *stmpe)
 {
+	struct irq_chip *chip = NULL;
 	int num_irqs = stmpe->variant->num_irqs;
 	int base = stmpe->irq_base;
 	int irq;
 
+	if (stmpe->variant->id_val != STMPE801_ID)
+		chip = &stmpe_irq_chip;
+
 	for (irq = base; irq < base + num_irqs; irq++) {
 		irq_set_chip_data(irq, stmpe);
-		irq_set_chip_and_handler(irq, &stmpe_irq_chip,
-					 handle_edge_irq);
+		irq_set_chip_and_handler(irq, chip, handle_edge_irq);
 		irq_set_nested_thread(irq, 1);
 #ifdef CONFIG_ARM
 		set_irq_flags(irq, IRQF_VALID);
@@ -796,7 +864,7 @@
 	unsigned int irq_trigger = stmpe->pdata->irq_trigger;
 	int autosleep_timeout = stmpe->pdata->autosleep_timeout;
 	struct stmpe_variant_info *variant = stmpe->variant;
-	u8 icr = STMPE_ICR_LSB_GIM;
+	u8 icr;
 	unsigned int id;
 	u8 data[2];
 	int ret;
@@ -819,16 +887,32 @@
 	if (ret)
 		return ret;
 
-	if (irq_trigger == IRQF_TRIGGER_FALLING ||
-	    irq_trigger == IRQF_TRIGGER_RISING)
-		icr |= STMPE_ICR_LSB_EDGE;
+	if (id == STMPE801_ID)
+		icr = STMPE801_REG_SYS_CTRL_INT_EN;
+	else
+		icr = STMPE_ICR_LSB_GIM;
+
+	/* STMPE801 doesn't support Edge interrupts */
+	if (id != STMPE801_ID) {
+		if (irq_trigger == IRQF_TRIGGER_FALLING ||
+				irq_trigger == IRQF_TRIGGER_RISING)
+			icr |= STMPE_ICR_LSB_EDGE;
+	}
 
 	if (irq_trigger == IRQF_TRIGGER_RISING ||
-	    irq_trigger == IRQF_TRIGGER_HIGH)
-		icr |= STMPE_ICR_LSB_HIGH;
+			irq_trigger == IRQF_TRIGGER_HIGH) {
+		if (id == STMPE801_ID)
+			icr |= STMPE801_REG_SYS_CTRL_INT_HI;
+		else
+			icr |= STMPE_ICR_LSB_HIGH;
+	}
 
-	if (stmpe->pdata->irq_invert_polarity)
-		icr ^= STMPE_ICR_LSB_HIGH;
+	if (stmpe->pdata->irq_invert_polarity) {
+		if (id == STMPE801_ID)
+			icr ^= STMPE801_REG_SYS_CTRL_INT_HI;
+		else
+			icr ^= STMPE_ICR_LSB_HIGH;
+	}
 
 	if (stmpe->pdata->autosleep) {
 		ret = stmpe_autosleep(stmpe, autosleep_timeout);
@@ -873,32 +957,10 @@
 	return ret;
 }
 
-#ifdef CONFIG_PM
-static int stmpe_suspend(struct device *dev)
+/* Called from client specific probe routines */
+int __devinit stmpe_probe(struct stmpe_client_info *ci, int partnum)
 {
-	struct i2c_client *i2c = to_i2c_client(dev);
-
-	if (device_may_wakeup(&i2c->dev))
-		enable_irq_wake(i2c->irq);
-
-	return 0;
-}
-
-static int stmpe_resume(struct device *dev)
-{
-	struct i2c_client *i2c = to_i2c_client(dev);
-
-	if (device_may_wakeup(&i2c->dev))
-		disable_irq_wake(i2c->irq);
-
-	return 0;
-}
-#endif
-
-static int __devinit stmpe_probe(struct i2c_client *i2c,
-				 const struct i2c_device_id *id)
-{
-	struct stmpe_platform_data *pdata = i2c->dev.platform_data;
+	struct stmpe_platform_data *pdata = dev_get_platdata(ci->dev);
 	struct stmpe *stmpe;
 	int ret;
 
@@ -912,30 +974,43 @@
 	mutex_init(&stmpe->irq_lock);
 	mutex_init(&stmpe->lock);
 
-	stmpe->dev = &i2c->dev;
-	stmpe->i2c = i2c;
-
+	stmpe->dev = ci->dev;
+	stmpe->client = ci->client;
 	stmpe->pdata = pdata;
 	stmpe->irq_base = pdata->irq_base;
-
-	stmpe->partnum = id->driver_data;
-	stmpe->variant = stmpe_variant_info[stmpe->partnum];
+	stmpe->ci = ci;
+	stmpe->partnum = partnum;
+	stmpe->variant = stmpe_variant_info[partnum];
 	stmpe->regs = stmpe->variant->regs;
 	stmpe->num_gpios = stmpe->variant->num_gpios;
+	dev_set_drvdata(stmpe->dev, stmpe);
 
-	i2c_set_clientdata(i2c, stmpe);
+	if (ci->init)
+		ci->init(stmpe);
+
+	if (pdata->irq_over_gpio) {
+		ret = gpio_request_one(pdata->irq_gpio, GPIOF_DIR_IN, "stmpe");
+		if (ret) {
+			dev_err(stmpe->dev, "failed to request IRQ GPIO: %d\n",
+					ret);
+			goto out_free;
+		}
+
+		stmpe->irq = gpio_to_irq(pdata->irq_gpio);
+	} else {
+		stmpe->irq = ci->irq;
+	}
 
 	ret = stmpe_chip_init(stmpe);
 	if (ret)
-		goto out_free;
+		goto free_gpio;
 
 	ret = stmpe_irq_init(stmpe);
 	if (ret)
-		goto out_free;
+		goto free_gpio;
 
-	ret = request_threaded_irq(stmpe->i2c->irq, NULL, stmpe_irq,
-				   pdata->irq_trigger | IRQF_ONESHOT,
-				   "stmpe", stmpe);
+	ret = request_threaded_irq(stmpe->irq, NULL, stmpe_irq,
+			pdata->irq_trigger | IRQF_ONESHOT, "stmpe", stmpe);
 	if (ret) {
 		dev_err(stmpe->dev, "failed to request IRQ: %d\n", ret);
 		goto out_removeirq;
@@ -951,67 +1026,55 @@
 
 out_removedevs:
 	mfd_remove_devices(stmpe->dev);
-	free_irq(stmpe->i2c->irq, stmpe);
+	free_irq(stmpe->irq, stmpe);
 out_removeirq:
 	stmpe_irq_remove(stmpe);
+free_gpio:
+	if (pdata->irq_over_gpio)
+		gpio_free(pdata->irq_gpio);
 out_free:
 	kfree(stmpe);
 	return ret;
 }
 
-static int __devexit stmpe_remove(struct i2c_client *client)
+int stmpe_remove(struct stmpe *stmpe)
 {
-	struct stmpe *stmpe = i2c_get_clientdata(client);
-
 	mfd_remove_devices(stmpe->dev);
 
-	free_irq(stmpe->i2c->irq, stmpe);
+	free_irq(stmpe->irq, stmpe);
 	stmpe_irq_remove(stmpe);
 
+	if (stmpe->pdata->irq_over_gpio)
+		gpio_free(stmpe->pdata->irq_gpio);
+
 	kfree(stmpe);
 
 	return 0;
 }
 
-static const struct i2c_device_id stmpe_id[] = {
-	{ "stmpe811", STMPE811 },
-	{ "stmpe1601", STMPE1601 },
-	{ "stmpe2401", STMPE2401 },
-	{ "stmpe2403", STMPE2403 },
-	{ }
-};
-MODULE_DEVICE_TABLE(i2c, stmpe_id);
-
 #ifdef CONFIG_PM
-static const struct dev_pm_ops stmpe_dev_pm_ops = {
+static int stmpe_suspend(struct device *dev)
+{
+	struct stmpe *stmpe = dev_get_drvdata(dev);
+
+	if (device_may_wakeup(dev))
+		enable_irq_wake(stmpe->irq);
+
+	return 0;
+}
+
+static int stmpe_resume(struct device *dev)
+{
+	struct stmpe *stmpe = dev_get_drvdata(dev);
+
+	if (device_may_wakeup(dev))
+		disable_irq_wake(stmpe->irq);
+
+	return 0;
+}
+
+const struct dev_pm_ops stmpe_dev_pm_ops = {
 	.suspend	= stmpe_suspend,
 	.resume		= stmpe_resume,
 };
 #endif
-
-static struct i2c_driver stmpe_driver = {
-	.driver.name	= "stmpe",
-	.driver.owner	= THIS_MODULE,
-#ifdef CONFIG_PM
-	.driver.pm	= &stmpe_dev_pm_ops,
-#endif
-	.probe		= stmpe_probe,
-	.remove		= __devexit_p(stmpe_remove),
-	.id_table	= stmpe_id,
-};
-
-static int __init stmpe_init(void)
-{
-	return i2c_add_driver(&stmpe_driver);
-}
-subsys_initcall(stmpe_init);
-
-static void __exit stmpe_exit(void)
-{
-	i2c_del_driver(&stmpe_driver);
-}
-module_exit(stmpe_exit);
-
-MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("STMPE MFD core driver");
-MODULE_AUTHOR("Rabin Vincent <rabin.vincent@stericsson.com>");

diff --git a/drivers/mfd/stmpe.h b/drivers/mfd/stmpe.h
index e4ee3895..7b8e13f 100644
--- a/drivers/mfd/stmpe.h
+++ b/drivers/mfd/stmpe.h

@@ -8,6 +8,14 @@
 #ifndef __STMPE_H
 #define __STMPE_H
 
+#include <linux/device.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/stmpe.h>
+#include <linux/printk.h>
+#include <linux/types.h>
+
+extern const struct dev_pm_ops stmpe_dev_pm_ops;
+
 #ifdef STMPE_DUMP_BYTES
 static inline void stmpe_dump_bytes(const char *str, const void *buf,
 				    size_t len)
@@ -67,11 +75,55 @@
 	int (*enable_autosleep)(struct stmpe *stmpe, int autosleep_timeout);
 };
 
+/**
+ * struct stmpe_client_info - i2c or spi specific routines/info
+ * @data: client specific data
+ * @read_byte: read single byte
+ * @write_byte: write single byte
+ * @read_block: read block or multiple bytes
+ * @write_block: write block or multiple bytes
+ * @init: client init routine, called during probe
+ */
+struct stmpe_client_info {
+	void *data;
+	int irq;
+	void *client;
+	struct device *dev;
+	int (*read_byte)(struct stmpe *stmpe, u8 reg);
+	int (*write_byte)(struct stmpe *stmpe, u8 reg, u8 val);
+	int (*read_block)(struct stmpe *stmpe, u8 reg, u8 len, u8 *values);
+	int (*write_block)(struct stmpe *stmpe, u8 reg, u8 len,
+			const u8 *values);
+	void (*init)(struct stmpe *stmpe);
+};
+
+int stmpe_probe(struct stmpe_client_info *ci, int partnum);
+int stmpe_remove(struct stmpe *stmpe);
+
 #define STMPE_ICR_LSB_HIGH	(1 << 2)
 #define STMPE_ICR_LSB_EDGE	(1 << 1)
 #define STMPE_ICR_LSB_GIM	(1 << 0)
 
 /*
+ * STMPE801
+ */
+#define STMPE801_ID			0x0108
+#define STMPE801_NR_INTERNAL_IRQS	1
+
+#define STMPE801_REG_CHIP_ID		0x00
+#define STMPE801_REG_VERSION_ID		0x02
+#define STMPE801_REG_SYS_CTRL		0x04
+#define STMPE801_REG_GPIO_INT_EN	0x08
+#define STMPE801_REG_GPIO_INT_STA	0x09
+#define STMPE801_REG_GPIO_MP_STA	0x10
+#define STMPE801_REG_GPIO_SET_PIN	0x11
+#define STMPE801_REG_GPIO_DIR		0x12
+
+#define STMPE801_REG_SYS_CTRL_RESET	(1 << 7)
+#define STMPE801_REG_SYS_CTRL_INT_EN	(1 << 2)
+#define STMPE801_REG_SYS_CTRL_INT_HI	(1 << 0)
+
+/*
  * STMPE811
  */
 
@@ -87,6 +139,7 @@
 
 #define STMPE811_REG_CHIP_ID		0x00
 #define STMPE811_REG_SYS_CTRL2		0x04
+#define STMPE811_REG_SPI_CFG		0x08
 #define STMPE811_REG_INT_CTRL		0x09
 #define STMPE811_REG_INT_EN		0x0A
 #define STMPE811_REG_INT_STA		0x0B

diff --git a/drivers/mfd/t7l66xb.c b/drivers/mfd/t7l66xb.c
index 91ad21e..2d9e879 100644
--- a/drivers/mfd/t7l66xb.c
+++ b/drivers/mfd/t7l66xb.c

@@ -442,21 +442,7 @@
 
 /*--------------------------------------------------------------------------*/
 
-static int __init t7l66xb_init(void)
-{
-	int retval = 0;
-
-	retval = platform_driver_register(&t7l66xb_platform_driver);
-	return retval;
-}
-
-static void __exit t7l66xb_exit(void)
-{
-	platform_driver_unregister(&t7l66xb_platform_driver);
-}
-
-module_init(t7l66xb_init);
-module_exit(t7l66xb_exit);
+module_platform_driver(t7l66xb_platform_driver);
 
 MODULE_DESCRIPTION("Toshiba T7L66XB core driver");
 MODULE_LICENSE("GPL v2");

diff --git a/drivers/mfd/tc6387xb.c b/drivers/mfd/tc6387xb.c
index 71bc835..d20a284 100644
--- a/drivers/mfd/tc6387xb.c
+++ b/drivers/mfd/tc6387xb.c

@@ -234,19 +234,7 @@
 	.resume         = tc6387xb_resume,
 };
 
-
-static int __init tc6387xb_init(void)
-{
-	return platform_driver_register(&tc6387xb_platform_driver);
-}
-
-static void __exit tc6387xb_exit(void)
-{
-	platform_driver_unregister(&tc6387xb_platform_driver);
-}
-
-module_init(tc6387xb_init);
-module_exit(tc6387xb_exit);
+module_platform_driver(tc6387xb_platform_driver);
 
 MODULE_DESCRIPTION("Toshiba TC6387XB core driver");
 MODULE_LICENSE("GPL v2");

diff --git a/drivers/mfd/ti-ssp.c b/drivers/mfd/ti-ssp.c
index af9ab0e..4fb0e6c 100644
--- a/drivers/mfd/ti-ssp.c
+++ b/drivers/mfd/ti-ssp.c

@@ -458,17 +458,7 @@
 	}
 };
 
-static int __init ti_ssp_init(void)
-{
-	return platform_driver_register(&ti_ssp_driver);
-}
-module_init(ti_ssp_init);
-
-static void __exit ti_ssp_exit(void)
-{
-	platform_driver_unregister(&ti_ssp_driver);
-}
-module_exit(ti_ssp_exit);
+module_platform_driver(ti_ssp_driver);
 
 MODULE_DESCRIPTION("Sequencer Serial Port (SSP) Driver");
 MODULE_AUTHOR("Cyril Chemparathy");

diff --git a/drivers/mfd/timberdale.c b/drivers/mfd/timberdale.c
index 02d6569..0ba26fb 100644
--- a/drivers/mfd/timberdale.c
+++ b/drivers/mfd/timberdale.c

@@ -857,7 +857,7 @@
 	kfree(priv);
 }
 
-static struct pci_device_id timberdale_pci_tbl[] = {
+static DEFINE_PCI_DEVICE_TABLE(timberdale_pci_tbl) = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_TIMB, PCI_DEVICE_ID_TIMB) },
 	{ 0 }
 };

diff --git a/drivers/mfd/tps65910-irq.c b/drivers/mfd/tps65910-irq.c
index a56be93..95c0d79 100644
--- a/drivers/mfd/tps65910-irq.c
+++ b/drivers/mfd/tps65910-irq.c

@@ -215,6 +215,7 @@
 
 int tps65910_irq_exit(struct tps65910 *tps65910)
 {
-	free_irq(tps65910->chip_irq, tps65910);
+	if (tps65910->chip_irq)
+		free_irq(tps65910->chip_irq, tps65910);
 	return 0;
 }

diff --git a/drivers/mfd/tps65910.c b/drivers/mfd/tps65910.c
index c1da84b..01cf501 100644
--- a/drivers/mfd/tps65910.c
+++ b/drivers/mfd/tps65910.c

@@ -172,15 +172,12 @@
 
 	tps65910_gpio_init(tps65910, pmic_plat_data->gpio_base);
 
-	ret = tps65910_irq_init(tps65910, init_data->irq, init_data);
-	if (ret < 0)
-		goto err;
+	tps65910_irq_init(tps65910, init_data->irq, init_data);
 
 	kfree(init_data);
 	return ret;
 
 err:
-	mfd_remove_devices(tps65910->dev);
 	kfree(tps65910);
 	kfree(init_data);
 	return ret;
@@ -190,8 +187,8 @@
 {
 	struct tps65910 *tps65910 = i2c_get_clientdata(i2c);
 
-	mfd_remove_devices(tps65910->dev);
 	tps65910_irq_exit(tps65910);
+	mfd_remove_devices(tps65910->dev);
 	kfree(tps65910);
 
 	return 0;

diff --git a/drivers/mfd/tps65912-spi.c b/drivers/mfd/tps65912-spi.c
index 6d71e0d..27d3302 100644
--- a/drivers/mfd/tps65912-spi.c
+++ b/drivers/mfd/tps65912-spi.c

@@ -111,7 +111,6 @@
 static struct spi_driver tps65912_spi_driver = {
 	.driver = {
 		.name = "tps65912",
-		.bus = &spi_bus_type,
 		.owner = THIS_MODULE,
 	},
 	.probe	= tps65912_spi_probe,

diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index 61e70cf..e04e04d 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c

@@ -34,6 +34,11 @@
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 #include <linux/err.h>
+#include <linux/device.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/irqdomain.h>
 
 #include <linux/regulator/machine.h>
 
@@ -144,6 +149,9 @@
 
 #define TWL_MODULE_LAST TWL4030_MODULE_LAST
 
+#define TWL4030_NR_IRQS    8
+#define TWL6030_NR_IRQS    20
+
 /* Base Address defns for twl4030_map[] */
 
 /* subchip/slave 0 - USB ID */
@@ -255,6 +263,7 @@
 
 static struct twl_client twl_modules[TWL_NUM_SLAVES];
 
+static struct irq_domain domain;
 
 /* mapping the module id to slave id and base address */
 struct twl_mapping {
@@ -1183,14 +1192,48 @@
 	int				status;
 	unsigned			i;
 	struct twl4030_platform_data	*pdata = client->dev.platform_data;
+	struct device_node		*node = client->dev.of_node;
 	u8 temp;
 	int ret = 0;
+	int nr_irqs = TWL4030_NR_IRQS;
+
+	if ((id->driver_data) & TWL6030_CLASS)
+		nr_irqs = TWL6030_NR_IRQS;
+
+	if (node && !pdata) {
+		/*
+		 * XXX: Temporary pdata until the information is correctly
+		 * retrieved by every TWL modules from DT.
+		 */
+		pdata = devm_kzalloc(&client->dev,
+				     sizeof(struct twl4030_platform_data),
+				     GFP_KERNEL);
+		if (!pdata)
+			return -ENOMEM;
+	}
 
 	if (!pdata) {
 		dev_dbg(&client->dev, "no platform data?\n");
 		return -EINVAL;
 	}
 
+	status = irq_alloc_descs(-1, pdata->irq_base, nr_irqs, 0);
+	if (IS_ERR_VALUE(status)) {
+		dev_err(&client->dev, "Fail to allocate IRQ descs\n");
+		return status;
+	}
+
+	pdata->irq_base = status;
+	pdata->irq_end = pdata->irq_base + nr_irqs;
+
+	domain.irq_base = pdata->irq_base;
+	domain.nr_irq = nr_irqs;
+#ifdef CONFIG_OF_IRQ
+	domain.of_node = of_node_get(node);
+	domain.ops = &irq_domain_simple_ops;
+#endif
+	irq_domain_add(&domain);
+
 	if (i2c_check_functionality(client->adapter, I2C_FUNC_I2C) == 0) {
 		dev_dbg(&client->dev, "can't talk I2C?\n");
 		return -EIO;
@@ -1270,7 +1313,13 @@
 		twl_i2c_write_u8(TWL4030_MODULE_INTBR, temp, REG_GPPUPDCTR1);
 	}
 
-	status = add_children(pdata, id->driver_data);
+#ifdef CONFIG_OF_DEVICE
+	if (node)
+		status = of_platform_populate(node, NULL, NULL, &client->dev);
+	else
+#endif
+		status = add_children(pdata, id->driver_data);
+
 fail:
 	if (status < 0)
 		twl_remove(client);

diff --git a/drivers/mfd/twl4030-audio.c b/drivers/mfd/twl4030-audio.c
index ae51ab5..838ce4e 100644
--- a/drivers/mfd/twl4030-audio.c
+++ b/drivers/mfd/twl4030-audio.c

@@ -261,17 +261,7 @@
 	},
 };
 
-static int __devinit twl4030_audio_init(void)
-{
-	return platform_driver_register(&twl4030_audio_driver);
-}
-module_init(twl4030_audio_init);
-
-static void __devexit twl4030_audio_exit(void)
-{
-	platform_driver_unregister(&twl4030_audio_driver);
-}
-module_exit(twl4030_audio_exit);
+module_platform_driver(twl4030_audio_driver);
 
 MODULE_AUTHOR("Peter Ujfalusi <peter.ujfalusi@ti.com>");
 MODULE_LICENSE("GPL");

diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c
index 29f11e0..b69bb51 100644
--- a/drivers/mfd/twl4030-irq.c
+++ b/drivers/mfd/twl4030-irq.c

@@ -492,7 +492,7 @@
 			u8	bytes[4];
 		} imr;
 
-		/* byte[0] gets overwriten as we write ... */
+		/* byte[0] gets overwritten as we write ... */
 		imr.word = cpu_to_le32(agent->imr << 8);
 		agent->imr_change_pending = false;
 
@@ -667,6 +667,7 @@
 		irq_set_chip_data(irq, agent);
 		irq_set_chip_and_handler(irq, &twl4030_sih_irq_chip,
 					 handle_edge_irq);
+		irq_set_nested_thread(irq, 1);
 		activate_irq(irq);
 	}
 

diff --git a/drivers/mfd/twl4030-madc.c b/drivers/mfd/twl4030-madc.c
index 834f824..456ecb5 100644
--- a/drivers/mfd/twl4030-madc.c
+++ b/drivers/mfd/twl4030-madc.c

@@ -807,19 +807,7 @@
 		   },
 };
 
-static int __init twl4030_madc_init(void)
-{
-	return platform_driver_register(&twl4030_madc_driver);
-}
-
-module_init(twl4030_madc_init);
-
-static void __exit twl4030_madc_exit(void)
-{
-	platform_driver_unregister(&twl4030_madc_driver);
-}
-
-module_exit(twl4030_madc_exit);
+module_platform_driver(twl4030_madc_driver);
 
 MODULE_DESCRIPTION("TWL4030 ADC driver");
 MODULE_LICENSE("GPL");

diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c
index a764676..d905f51 100644
--- a/drivers/mfd/twl4030-power.c
+++ b/drivers/mfd/twl4030-power.c

@@ -34,7 +34,8 @@
 static u8 twl4030_start_script_address = 0x2b;
 
 #define PWR_P1_SW_EVENTS	0x10
-#define PWR_DEVOFF	(1<<0)
+#define PWR_DEVOFF		(1 << 0)
+#define SEQ_OFFSYNC		(1 << 0)
 
 #define PHY_TO_OFF_PM_MASTER(p)		(p - 0x36)
 #define PHY_TO_OFF_PM_RECEIVER(p)	(p - 0x5b)
@@ -511,12 +512,27 @@
 	return err;
 }
 
+/*
+ * In master mode, start the power off sequence.
+ * After a successful execution, TWL shuts down the power to the SoC
+ * and all peripherals connected to it.
+ */
+void twl4030_power_off(void)
+{
+	int err;
+
+	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, PWR_DEVOFF,
+			       TWL4030_PM_MASTER_P1_SW_EVENTS);
+	if (err)
+		pr_err("TWL4030 Unable to power off\n");
+}
+
 void __init twl4030_power_init(struct twl4030_power_data *twl4030_scripts)
 {
 	int err = 0;
 	int i;
 	struct twl4030_resconfig *resconfig;
-	u8 address = twl4030_start_script_address;
+	u8 val, address = twl4030_start_script_address;
 
 	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER,
 			TWL4030_PM_MASTER_KEY_CFG1,
@@ -548,6 +564,28 @@
 		}
 	}
 
+	/* Board has to be wired properly to use this feature */
+	if (twl4030_scripts->use_poweroff && !pm_power_off) {
+		/* Default for SEQ_OFFSYNC is set, lets ensure this */
+		err = twl_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &val,
+				      TWL4030_PM_MASTER_CFG_P123_TRANSITION);
+		if (err) {
+			pr_warning("TWL4030 Unable to read registers\n");
+
+		} else if (!(val & SEQ_OFFSYNC)) {
+			val |= SEQ_OFFSYNC;
+			err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, val,
+					TWL4030_PM_MASTER_CFG_P123_TRANSITION);
+			if (err) {
+				pr_err("TWL4030 Unable to setup SEQ_OFFSYNC\n");
+				goto relock;
+			}
+		}
+
+		pm_power_off = twl4030_power_off;
+	}
+
+relock:
 	err = twl_i2c_write_u8(TWL4030_MODULE_PM_MASTER, 0,
 			TWL4030_PM_MASTER_PROTECT_KEY);
 	if (err)

diff --git a/drivers/mfd/twl6040-core.c b/drivers/mfd/twl6040-core.c
index 268f80f..dda8629 100644
--- a/drivers/mfd/twl6040-core.c
+++ b/drivers/mfd/twl6040-core.c

@@ -509,13 +509,10 @@
 		twl6040->audpwron = -EINVAL;
 
 	if (gpio_is_valid(twl6040->audpwron)) {
-		ret = gpio_request(twl6040->audpwron, "audpwron");
+		ret = gpio_request_one(twl6040->audpwron, GPIOF_OUT_INIT_LOW,
+				       "audpwron");
 		if (ret)
 			goto gpio1_err;
-
-		ret = gpio_direction_output(twl6040->audpwron, 0);
-		if (ret)
-			goto gpio2_err;
 	}
 
 	/* codec interrupt */
@@ -619,18 +616,7 @@
 	},
 };
 
-static int __devinit twl6040_init(void)
-{
-	return platform_driver_register(&twl6040_driver);
-}
-module_init(twl6040_init);
-
-static void __devexit twl6040_exit(void)
-{
-	platform_driver_unregister(&twl6040_driver);
-}
-
-module_exit(twl6040_exit);
+module_platform_driver(twl6040_driver);
 
 MODULE_DESCRIPTION("TWL6040 MFD");
 MODULE_AUTHOR("Misael Lopez Cruz <misael.lopez@ti.com>");

diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c
index b281217..91c4f25 100644
--- a/drivers/mfd/ucb1x00-core.c
+++ b/drivers/mfd/ucb1x00-core.c

@@ -36,6 +36,15 @@
 static LIST_HEAD(ucb1x00_drivers);
 static LIST_HEAD(ucb1x00_devices);
 
+static struct mcp_device_id ucb1x00_id[] = {
+	{ "ucb1x00", 0 },  /* auto-detection */
+	{ "ucb1200", UCB_ID_1200 },
+	{ "ucb1300", UCB_ID_1300 },
+	{ "tc35143", UCB_ID_TC35143 },
+	{ }
+};
+MODULE_DEVICE_TABLE(mcp, ucb1x00_id);
+
 /**
  *	ucb1x00_io_set_dir - set IO direction
  *	@ucb: UCB1x00 structure describing chip
@@ -527,17 +536,33 @@
 
 static int ucb1x00_probe(struct mcp *mcp)
 {
+	const struct mcp_device_id *mid;
 	struct ucb1x00 *ucb;
 	struct ucb1x00_driver *drv;
+	struct ucb1x00_plat_data *pdata;
 	unsigned int id;
 	int ret = -ENODEV;
 	int temp;
 
 	mcp_enable(mcp);
 	id = mcp_reg_read(mcp, UCB_ID);
+	mid = mcp_get_device_id(mcp);
 
-	if (id != UCB_ID_1200 && id != UCB_ID_1300 && id != UCB_ID_TC35143) {
-		printk(KERN_WARNING "UCB1x00 ID not found: %04x\n", id);
+	if (mid && mid->driver_data) {
+		if (id != mid->driver_data) {
+			printk(KERN_WARNING "%s wrong ID %04x found: %04x\n",
+				mid->name, (unsigned int) mid->driver_data, id);
+			goto err_disable;
+		}
+	} else {
+		mid = &ucb1x00_id[1];
+		while (mid->driver_data) {
+			if (id == mid->driver_data)
+				break;
+			mid++;
+		}
+		printk(KERN_WARNING "%s ID not found: %04x\n",
+			ucb1x00_id[0].name, id);
 		goto err_disable;
 	}
 
@@ -546,28 +571,28 @@
 	if (!ucb)
 		goto err_disable;
 
-
+	pdata = mcp->attached_device.platform_data;
 	ucb->dev.class = &ucb1x00_class;
 	ucb->dev.parent = &mcp->attached_device;
-	dev_set_name(&ucb->dev, "ucb1x00");
+	dev_set_name(&ucb->dev, mid->name);
 
 	spin_lock_init(&ucb->lock);
 	spin_lock_init(&ucb->io_lock);
 	sema_init(&ucb->adc_sem, 1);
 
-	ucb->id  = id;
+	ucb->id  = mid;
 	ucb->mcp = mcp;
 	ucb->irq = ucb1x00_detect_irq(ucb);
 	if (ucb->irq == NO_IRQ) {
-		printk(KERN_ERR "UCB1x00: IRQ probe failed\n");
+		printk(KERN_ERR "%s: IRQ probe failed\n", mid->name);
 		ret = -ENODEV;
 		goto err_free;
 	}
 
 	ucb->gpio.base = -1;
-	if (mcp->gpio_base != 0) {
+	if (pdata && (pdata->gpio_base >= 0)) {
 		ucb->gpio.label = dev_name(&ucb->dev);
-		ucb->gpio.base = mcp->gpio_base;
+		ucb->gpio.base = pdata->gpio_base;
 		ucb->gpio.ngpio = 10;
 		ucb->gpio.set = ucb1x00_gpio_set;
 		ucb->gpio.get = ucb1x00_gpio_get;
@@ -580,10 +605,10 @@
 		dev_info(&ucb->dev, "gpio_base not set so no gpiolib support");
 
 	ret = request_irq(ucb->irq, ucb1x00_irq, IRQF_TRIGGER_RISING,
-			  "UCB1x00", ucb);
+			  mid->name, ucb);
 	if (ret) {
-		printk(KERN_ERR "ucb1x00: unable to grab irq%d: %d\n",
-			ucb->irq, ret);
+		printk(KERN_ERR "%s: unable to grab irq%d: %d\n",
+			mid->name, ucb->irq, ret);
 		goto err_gpio;
 	}
 
@@ -705,6 +730,7 @@
 	.remove		= ucb1x00_remove,
 	.suspend	= ucb1x00_suspend,
 	.resume		= ucb1x00_resume,
+	.id_table	= ucb1x00_id,
 };
 
 static int __init ucb1x00_init(void)

diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c
index 38ffbd5..40ec3c1 100644
--- a/drivers/mfd/ucb1x00-ts.c
+++ b/drivers/mfd/ucb1x00-ts.c

@@ -382,7 +382,7 @@
 	ts->adcsync = adcsync ? UCB_SYNC : UCB_NOSYNC;
 
 	idev->name       = "Touchscreen panel";
-	idev->id.product = ts->ucb->id;
+	idev->id.product = ts->ucb->id->driver_data;
 	idev->open       = ucb1x00_ts_open;
 	idev->close      = ucb1x00_ts_close;
 

diff --git a/drivers/mfd/vx855.c b/drivers/mfd/vx855.c
index d698703..b73cc15 100644
--- a/drivers/mfd/vx855.c
+++ b/drivers/mfd/vx855.c

@@ -118,7 +118,7 @@
 	pci_disable_device(pdev);
 }
 
-static struct pci_device_id vx855_pci_tbl[] = {
+static DEFINE_PCI_DEVICE_TABLE(vx855_pci_tbl) = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_VX855) },
 	{ 0, }
 };

diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index 0a2b8d4..f5e54fa 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c

@@ -559,6 +559,8 @@
 		dev_vdbg(wm831x->dev, "Write %04x to R%d(0x%x)\n",
 			 buf[i], reg + i, reg + i);
 		ret = regmap_write(wm831x->regmap, reg + i, buf[i]);
+		if (ret != 0)
+			return ret;
 	}
 
 	return 0;
@@ -1875,7 +1877,6 @@
 err_regmap:
 	mfd_remove_devices(wm831x->dev);
 	regmap_exit(wm831x->regmap);
-	kfree(wm831x);
 	return ret;
 }
 
@@ -1887,7 +1888,6 @@
 		free_irq(wm831x->irq_base + WM831X_IRQ_AUXADC_DATA, wm831x);
 	wm831x_irq_exit(wm831x);
 	regmap_exit(wm831x->regmap);
-	kfree(wm831x);
 }
 
 int wm831x_device_suspend(struct wm831x *wm831x)

diff --git a/drivers/mfd/wm831x-i2c.c b/drivers/mfd/wm831x-i2c.c
index ac8da1d..cb15609 100644
--- a/drivers/mfd/wm831x-i2c.c
+++ b/drivers/mfd/wm831x-i2c.c

@@ -30,7 +30,7 @@
 	struct wm831x *wm831x;
 	int ret;
 
-	wm831x = kzalloc(sizeof(struct wm831x), GFP_KERNEL);
+	wm831x = devm_kzalloc(&i2c->dev, sizeof(struct wm831x), GFP_KERNEL);
 	if (wm831x == NULL)
 		return -ENOMEM;
 
@@ -42,7 +42,6 @@
 		ret = PTR_ERR(wm831x->regmap);
 		dev_err(wm831x->dev, "Failed to allocate register map: %d\n",
 			ret);
-		kfree(wm831x);
 		return ret;
 	}
 

diff --git a/drivers/mfd/wm831x-irq.c b/drivers/mfd/wm831x-irq.c
index f4747a4..bec4d05 100644
--- a/drivers/mfd/wm831x-irq.c
+++ b/drivers/mfd/wm831x-irq.c

@@ -325,11 +325,6 @@
 	return WM831X_INTERRUPT_STATUS_1 - 1 + irq_data->reg;
 }
 
-static inline int irq_data_to_mask_reg(struct wm831x_irq_data *irq_data)
-{
-	return WM831X_INTERRUPT_STATUS_1_MASK - 1 + irq_data->reg;
-}
-
 static inline struct wm831x_irq_data *irq_to_wm831x_irq(struct wm831x *wm831x,
 							int irq)
 {
@@ -477,8 +472,7 @@
 		handle_nested_irq(wm831x->irq_base + WM831X_IRQ_TCHPD);
 	if (primary & WM831X_TCHDATA_INT)
 		handle_nested_irq(wm831x->irq_base + WM831X_IRQ_TCHDATA);
-	if (primary & (WM831X_TCHDATA_EINT | WM831X_TCHPD_EINT))
-		goto out;
+	primary &= ~(WM831X_TCHDATA_EINT | WM831X_TCHPD_EINT);
 
 	for (i = 0; i < ARRAY_SIZE(wm831x_irqs); i++) {
 		int offset = wm831x_irqs[i].reg - 1;

diff --git a/drivers/mfd/wm831x-spi.c b/drivers/mfd/wm831x-spi.c
index 8d6a9a9..62ef325 100644
--- a/drivers/mfd/wm831x-spi.c
+++ b/drivers/mfd/wm831x-spi.c

@@ -30,7 +30,7 @@
 
 	type = (enum wm831x_parent)id->driver_data;
 
-	wm831x = kzalloc(sizeof(struct wm831x), GFP_KERNEL);
+	wm831x = devm_kzalloc(&spi->dev, sizeof(struct wm831x), GFP_KERNEL);
 	if (wm831x == NULL)
 		return -ENOMEM;
 
@@ -45,7 +45,6 @@
 		ret = PTR_ERR(wm831x->regmap);
 		dev_err(wm831x->dev, "Failed to allocate register map: %d\n",
 			ret);
-		kfree(wm831x);
 		return ret;
 	}
 
@@ -95,7 +94,6 @@
 static struct spi_driver wm831x_spi_driver = {
 	.driver = {
 		.name	= "wm831x",
-		.bus	= &spi_bus_type,
 		.owner	= THIS_MODULE,
 		.pm	= &wm831x_spi_pm,
 	},

diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index e81cc31..dd1caaa 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c

@@ -573,6 +573,8 @@
 	u16 id1, id2, mask_rev;
 	u16 cust_id, mode, chip_rev;
 
+	dev_set_drvdata(wm8350->dev, wm8350);
+
 	/* get WM8350 revision and config mode */
 	ret = wm8350->read_dev(wm8350, WM8350_RESET_ID, sizeof(id1), &id1);
 	if (ret != 0) {

diff --git a/drivers/mfd/wm8350-i2c.c b/drivers/mfd/wm8350-i2c.c
index 5fe5de1..d955faa 100644
--- a/drivers/mfd/wm8350-i2c.c
+++ b/drivers/mfd/wm8350-i2c.c

@@ -63,7 +63,7 @@
 	struct wm8350 *wm8350;
 	int ret = 0;
 
-	wm8350 = kzalloc(sizeof(struct wm8350), GFP_KERNEL);
+	wm8350 = devm_kzalloc(&i2c->dev, sizeof(struct wm8350), GFP_KERNEL);
 	if (wm8350 == NULL)
 		return -ENOMEM;
 
@@ -80,7 +80,6 @@
 	return ret;
 
 err:
-	kfree(wm8350);
 	return ret;
 }
 
@@ -89,7 +88,6 @@
 	struct wm8350 *wm8350 = i2c_get_clientdata(i2c);
 
 	wm8350_device_exit(wm8350);
-	kfree(wm8350);
 
 	return 0;
 }

diff --git a/drivers/mfd/wm8400-core.c b/drivers/mfd/wm8400-core.c
index 62b4626..2204893 100644
--- a/drivers/mfd/wm8400-core.c
+++ b/drivers/mfd/wm8400-core.c

@@ -344,7 +344,7 @@
 	struct wm8400 *wm8400;
 	int ret;
 
-	wm8400 = kzalloc(sizeof(struct wm8400), GFP_KERNEL);
+	wm8400 = devm_kzalloc(&i2c->dev, sizeof(struct wm8400), GFP_KERNEL);
 	if (wm8400 == NULL) {
 		ret = -ENOMEM;
 		goto err;
@@ -353,7 +353,7 @@
 	wm8400->regmap = regmap_init_i2c(i2c, &wm8400_regmap_config);
 	if (IS_ERR(wm8400->regmap)) {
 		ret = PTR_ERR(wm8400->regmap);
-		goto struct_err;
+		goto err;
 	}
 
 	wm8400->dev = &i2c->dev;
@@ -367,8 +367,6 @@
 
 map_err:
 	regmap_exit(wm8400->regmap);
-struct_err:
-	kfree(wm8400);
 err:
 	return ret;
 }
@@ -379,7 +377,6 @@
 
 	wm8400_release(wm8400);
 	regmap_exit(wm8400->regmap);
-	kfree(wm8400);
 
 	return 0;
 }

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 5664696..6a1a092 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig

@@ -500,6 +500,14 @@
 	  stereo and mono audio, video, microphone and UART data to use
 	  a common connector port.
 
+config MAX8997_MUIC
+	tristate "MAX8997 MUIC Support"
+	depends on MFD_MAX8997
+	help
+	  If you say yes here you get support for the MUIC device of
+	  Maxim MAX8997 PMIC.
+	  The MAX8997 MUIC is a USB port accessory detector and switch.
+
 source "drivers/misc/c2port/Kconfig"
 source "drivers/misc/eeprom/Kconfig"
 source "drivers/misc/cb710/Kconfig"

diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index b26495a..3e1d801 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile

@@ -48,3 +48,4 @@
 obj-y				+= carma/
 obj-$(CONFIG_USB_SWITCH_FSA9480) += fsa9480.o
 obj-$(CONFIG_ALTERA_STAPL)	+=altera-stapl/
+obj-$(CONFIG_MAX8997_MUIC)	+= max8997-muic.o

diff --git a/drivers/misc/ab8500-pwm.c b/drivers/misc/ab8500-pwm.c
index 2208a9d..d7a9aa1 100644
--- a/drivers/misc/ab8500-pwm.c
+++ b/drivers/misc/ab8500-pwm.c

@@ -8,8 +8,8 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/pwm.h>
-#include <linux/mfd/ab8500.h>
 #include <linux/mfd/abx500.h>
+#include <linux/mfd/abx500/ab8500.h>
 #include <linux/module.h>
 
 /*

diff --git a/drivers/misc/max8997-muic.c b/drivers/misc/max8997-muic.c
new file mode 100644
index 0000000..d74ef41
--- /dev/null
+++ b/drivers/misc/max8997-muic.c

@@ -0,0 +1,505 @@
+/*
+ * max8997-muic.c - MAX8997 muic driver for the Maxim 8997
+ *
+ *  Copyright (C) 2011 Samsung Electrnoics
+ *  Donggeun Kim <dg77.kim@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/kobject.h>
+#include <linux/mfd/max8997.h>
+#include <linux/mfd/max8997-private.h>
+
+/* MAX8997-MUIC STATUS1 register */
+#define STATUS1_ADC_SHIFT		0
+#define STATUS1_ADCLOW_SHIFT		5
+#define STATUS1_ADCERR_SHIFT		6
+#define STATUS1_ADC_MASK		(0x1f << STATUS1_ADC_SHIFT)
+#define STATUS1_ADCLOW_MASK		(0x1 << STATUS1_ADCLOW_SHIFT)
+#define STATUS1_ADCERR_MASK		(0x1 << STATUS1_ADCERR_SHIFT)
+
+/* MAX8997-MUIC STATUS2 register */
+#define STATUS2_CHGTYP_SHIFT		0
+#define STATUS2_CHGDETRUN_SHIFT		3
+#define STATUS2_DCDTMR_SHIFT		4
+#define STATUS2_DBCHG_SHIFT		5
+#define STATUS2_VBVOLT_SHIFT		6
+#define STATUS2_CHGTYP_MASK		(0x7 << STATUS2_CHGTYP_SHIFT)
+#define STATUS2_CHGDETRUN_MASK		(0x1 << STATUS2_CHGDETRUN_SHIFT)
+#define STATUS2_DCDTMR_MASK		(0x1 << STATUS2_DCDTMR_SHIFT)
+#define STATUS2_DBCHG_MASK		(0x1 << STATUS2_DBCHG_SHIFT)
+#define STATUS2_VBVOLT_MASK		(0x1 << STATUS2_VBVOLT_SHIFT)
+
+/* MAX8997-MUIC STATUS3 register */
+#define STATUS3_OVP_SHIFT		2
+#define STATUS3_OVP_MASK		(0x1 << STATUS3_OVP_SHIFT)
+
+/* MAX8997-MUIC CONTROL1 register */
+#define COMN1SW_SHIFT			0
+#define COMP2SW_SHIFT			3
+#define COMN1SW_MASK			(0x7 << COMN1SW_SHIFT)
+#define COMP2SW_MASK			(0x7 << COMP2SW_SHIFT)
+#define SW_MASK				(COMP2SW_MASK | COMN1SW_MASK)
+
+#define MAX8997_SW_USB		((1 << COMP2SW_SHIFT) | (1 << COMN1SW_SHIFT))
+#define MAX8997_SW_AUDIO	((2 << COMP2SW_SHIFT) | (2 << COMN1SW_SHIFT))
+#define MAX8997_SW_UART		((3 << COMP2SW_SHIFT) | (3 << COMN1SW_SHIFT))
+#define MAX8997_SW_OPEN		((0 << COMP2SW_SHIFT) | (0 << COMN1SW_SHIFT))
+
+#define	MAX8997_ADC_GROUND		0x00
+#define	MAX8997_ADC_MHL			0x01
+#define	MAX8997_ADC_JIG_USB_1		0x18
+#define	MAX8997_ADC_JIG_USB_2		0x19
+#define	MAX8997_ADC_DESKDOCK		0x1a
+#define	MAX8997_ADC_JIG_UART		0x1c
+#define	MAX8997_ADC_CARDOCK		0x1d
+#define	MAX8997_ADC_OPEN		0x1f
+
+struct max8997_muic_irq {
+	unsigned int irq;
+	const char *name;
+};
+
+static struct max8997_muic_irq muic_irqs[] = {
+	{ MAX8997_MUICIRQ_ADCError, "muic-ADC_error" },
+	{ MAX8997_MUICIRQ_ADCLow, "muic-ADC_low" },
+	{ MAX8997_MUICIRQ_ADC, "muic-ADC" },
+	{ MAX8997_MUICIRQ_VBVolt, "muic-VB_voltage" },
+	{ MAX8997_MUICIRQ_DBChg, "muic-DB_charger" },
+	{ MAX8997_MUICIRQ_DCDTmr, "muic-DCD_timer" },
+	{ MAX8997_MUICIRQ_ChgDetRun, "muic-CDR_status" },
+	{ MAX8997_MUICIRQ_ChgTyp, "muic-charger_type" },
+	{ MAX8997_MUICIRQ_OVP, "muic-over_voltage" },
+};
+
+struct max8997_muic_info {
+	struct device *dev;
+	struct max8997_dev *iodev;
+	struct i2c_client *muic;
+	struct max8997_muic_platform_data *muic_pdata;
+
+	int irq;
+	struct work_struct irq_work;
+
+	enum max8997_muic_charger_type pre_charger_type;
+	int pre_adc;
+
+	struct mutex mutex;
+};
+
+static int max8997_muic_handle_usb(struct max8997_muic_info *info,
+			enum max8997_muic_usb_type usb_type, bool attached)
+{
+	struct max8997_muic_platform_data *mdata = info->muic_pdata;
+	int ret = 0;
+
+	if (usb_type == MAX8997_USB_HOST) {
+		/* switch to USB */
+		ret = max8997_update_reg(info->muic, MAX8997_MUIC_REG_CONTROL1,
+				attached ? MAX8997_SW_USB : MAX8997_SW_OPEN,
+				SW_MASK);
+		if (ret) {
+			dev_err(info->dev, "failed to update muic register\n");
+			goto out;
+		}
+	}
+
+	if (mdata->usb_callback)
+		mdata->usb_callback(usb_type, attached);
+out:
+	return ret;
+}
+
+static void max8997_muic_handle_mhl(struct max8997_muic_info *info,
+			bool attached)
+{
+	struct max8997_muic_platform_data *mdata = info->muic_pdata;
+
+	if (mdata->mhl_callback)
+		mdata->mhl_callback(attached);
+}
+
+static int max8997_muic_handle_dock(struct max8997_muic_info *info,
+			int adc, bool attached)
+{
+	struct max8997_muic_platform_data *mdata = info->muic_pdata;
+	int ret = 0;
+
+	/* switch to AUDIO */
+	ret = max8997_update_reg(info->muic, MAX8997_MUIC_REG_CONTROL1,
+				attached ? MAX8997_SW_AUDIO : MAX8997_SW_OPEN,
+				SW_MASK);
+	if (ret) {
+		dev_err(info->dev, "failed to update muic register\n");
+		goto out;
+	}
+
+	switch (adc) {
+	case MAX8997_ADC_DESKDOCK:
+		if (mdata->deskdock_callback)
+			mdata->deskdock_callback(attached);
+		break;
+	case MAX8997_ADC_CARDOCK:
+		if (mdata->cardock_callback)
+			mdata->cardock_callback(attached);
+		break;
+	default:
+		break;
+	}
+out:
+	return ret;
+}
+
+static int max8997_muic_handle_jig_uart(struct max8997_muic_info *info,
+			bool attached)
+{
+	struct max8997_muic_platform_data *mdata = info->muic_pdata;
+	int ret = 0;
+
+	/* switch to UART */
+	ret = max8997_update_reg(info->muic, MAX8997_MUIC_REG_CONTROL1,
+				attached ? MAX8997_SW_UART : MAX8997_SW_OPEN,
+				SW_MASK);
+	if (ret) {
+		dev_err(info->dev, "failed to update muic register\n");
+		goto out;
+	}
+
+	if (mdata->uart_callback)
+		mdata->uart_callback(attached);
+out:
+	return ret;
+}
+
+static int max8997_muic_handle_adc_detach(struct max8997_muic_info *info)
+{
+	int ret = 0;
+
+	switch (info->pre_adc) {
+	case MAX8997_ADC_GROUND:
+		ret = max8997_muic_handle_usb(info, MAX8997_USB_HOST, false);
+		break;
+	case MAX8997_ADC_MHL:
+		max8997_muic_handle_mhl(info, false);
+		break;
+	case MAX8997_ADC_JIG_USB_1:
+	case MAX8997_ADC_JIG_USB_2:
+		ret = max8997_muic_handle_usb(info, MAX8997_USB_DEVICE, false);
+		break;
+	case MAX8997_ADC_DESKDOCK:
+	case MAX8997_ADC_CARDOCK:
+		ret = max8997_muic_handle_dock(info, info->pre_adc, false);
+		break;
+	case MAX8997_ADC_JIG_UART:
+		ret = max8997_muic_handle_jig_uart(info, false);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static int max8997_muic_handle_adc(struct max8997_muic_info *info, int adc)
+{
+	int ret = 0;
+
+	switch (adc) {
+	case MAX8997_ADC_GROUND:
+		ret = max8997_muic_handle_usb(info, MAX8997_USB_HOST, true);
+		break;
+	case MAX8997_ADC_MHL:
+		max8997_muic_handle_mhl(info, true);
+		break;
+	case MAX8997_ADC_JIG_USB_1:
+	case MAX8997_ADC_JIG_USB_2:
+		ret = max8997_muic_handle_usb(info, MAX8997_USB_DEVICE, true);
+		break;
+	case MAX8997_ADC_DESKDOCK:
+	case MAX8997_ADC_CARDOCK:
+		ret = max8997_muic_handle_dock(info, adc, true);
+		break;
+	case MAX8997_ADC_JIG_UART:
+		ret = max8997_muic_handle_jig_uart(info, true);
+		break;
+	case MAX8997_ADC_OPEN:
+		ret = max8997_muic_handle_adc_detach(info);
+		break;
+	default:
+		break;
+	}
+
+	info->pre_adc = adc;
+
+	return ret;
+}
+
+static int max8997_muic_handle_charger_type(struct max8997_muic_info *info,
+				enum max8997_muic_charger_type charger_type)
+{
+	struct max8997_muic_platform_data *mdata = info->muic_pdata;
+	u8 adc;
+	int ret;
+
+	ret = max8997_read_reg(info->muic, MAX8997_MUIC_REG_STATUS1, &adc);
+	if (ret) {
+		dev_err(info->dev, "failed to read muic register\n");
+		goto out;
+	}
+
+	switch (charger_type) {
+	case MAX8997_CHARGER_TYPE_NONE:
+		if (mdata->charger_callback)
+			mdata->charger_callback(false, charger_type);
+		if (info->pre_charger_type == MAX8997_CHARGER_TYPE_USB) {
+			max8997_muic_handle_usb(info,
+					MAX8997_USB_DEVICE, false);
+		}
+		break;
+	case MAX8997_CHARGER_TYPE_USB:
+		if ((adc & STATUS1_ADC_MASK) == MAX8997_ADC_OPEN) {
+			max8997_muic_handle_usb(info,
+					MAX8997_USB_DEVICE, true);
+		}
+		if (mdata->charger_callback)
+			mdata->charger_callback(true, charger_type);
+		break;
+	case MAX8997_CHARGER_TYPE_DOWNSTREAM_PORT:
+	case MAX8997_CHARGER_TYPE_DEDICATED_CHG:
+	case MAX8997_CHARGER_TYPE_500MA:
+	case MAX8997_CHARGER_TYPE_1A:
+		if (mdata->charger_callback)
+			mdata->charger_callback(true, charger_type);
+		break;
+	default:
+		break;
+	}
+
+	info->pre_charger_type = charger_type;
+out:
+	return ret;
+}
+
+static void max8997_muic_irq_work(struct work_struct *work)
+{
+	struct max8997_muic_info *info = container_of(work,
+			struct max8997_muic_info, irq_work);
+	struct max8997_platform_data *pdata =
+				dev_get_platdata(info->iodev->dev);
+	u8 status[3];
+	u8 adc, chg_type;
+
+	int irq_type = info->irq - pdata->irq_base;
+	int ret;
+
+	mutex_lock(&info->mutex);
+
+	ret = max8997_bulk_read(info->muic, MAX8997_MUIC_REG_STATUS1,
+				3, status);
+	if (ret) {
+		dev_err(info->dev, "failed to read muic register\n");
+		mutex_unlock(&info->mutex);
+		return;
+	}
+
+	dev_dbg(info->dev, "%s: STATUS1:0x%x, 2:0x%x\n", __func__,
+			status[0], status[1]);
+
+	switch (irq_type) {
+	case MAX8997_MUICIRQ_ADC:
+		adc = status[0] & STATUS1_ADC_MASK;
+		adc >>= STATUS1_ADC_SHIFT;
+
+		max8997_muic_handle_adc(info, adc);
+		break;
+	case MAX8997_MUICIRQ_ChgTyp:
+		chg_type = status[1] & STATUS2_CHGTYP_MASK;
+		chg_type >>= STATUS2_CHGTYP_SHIFT;
+
+		max8997_muic_handle_charger_type(info, chg_type);
+		break;
+	default:
+		dev_info(info->dev, "misc interrupt: %s occurred\n",
+			 muic_irqs[irq_type].name);
+		break;
+	}
+
+	mutex_unlock(&info->mutex);
+
+	return;
+}
+
+static irqreturn_t max8997_muic_irq_handler(int irq, void *data)
+{
+	struct max8997_muic_info *info = data;
+
+	dev_dbg(info->dev, "irq:%d\n", irq);
+	info->irq = irq;
+
+	schedule_work(&info->irq_work);
+
+	return IRQ_HANDLED;
+}
+
+static void max8997_muic_detect_dev(struct max8997_muic_info *info)
+{
+	int ret;
+	u8 status[2], adc, chg_type;
+
+	ret = max8997_bulk_read(info->muic, MAX8997_MUIC_REG_STATUS1,
+				2, status);
+	if (ret) {
+		dev_err(info->dev, "failed to read muic register\n");
+		return;
+	}
+
+	dev_info(info->dev, "STATUS1:0x%x, STATUS2:0x%x\n",
+			status[0], status[1]);
+
+	adc = status[0] & STATUS1_ADC_MASK;
+	adc >>= STATUS1_ADC_SHIFT;
+
+	chg_type = status[1] & STATUS2_CHGTYP_MASK;
+	chg_type >>= STATUS2_CHGTYP_SHIFT;
+
+	max8997_muic_handle_adc(info, adc);
+	max8997_muic_handle_charger_type(info, chg_type);
+}
+
+static void max8997_initialize_device(struct max8997_muic_info *info)
+{
+	struct max8997_muic_platform_data *mdata = info->muic_pdata;
+	int i;
+
+	for (i = 0; i < mdata->num_init_data; i++) {
+		max8997_write_reg(info->muic, mdata->init_data[i].addr,
+				mdata->init_data[i].data);
+	}
+}
+
+static int __devinit max8997_muic_probe(struct platform_device *pdev)
+{
+	struct max8997_dev *iodev = dev_get_drvdata(pdev->dev.parent);
+	struct max8997_platform_data *pdata = dev_get_platdata(iodev->dev);
+	struct max8997_muic_info *info;
+	int ret, i;
+
+	info = kzalloc(sizeof(struct max8997_muic_info), GFP_KERNEL);
+	if (!info) {
+		dev_err(&pdev->dev, "failed to allocate memory\n");
+		ret = -ENOMEM;
+		goto err_kfree;
+	}
+
+	if (!pdata->muic_pdata) {
+		dev_err(&pdev->dev, "failed to get platform_data\n");
+		ret = -EINVAL;
+		goto err_pdata;
+	}
+	info->muic_pdata = pdata->muic_pdata;
+
+	info->dev = &pdev->dev;
+	info->iodev = iodev;
+	info->muic = iodev->muic;
+
+	platform_set_drvdata(pdev, info);
+	mutex_init(&info->mutex);
+
+	INIT_WORK(&info->irq_work, max8997_muic_irq_work);
+
+	for (i = 0; i < ARRAY_SIZE(muic_irqs); i++) {
+		struct max8997_muic_irq *muic_irq = &muic_irqs[i];
+
+		ret = request_threaded_irq(pdata->irq_base + muic_irq->irq,
+				NULL, max8997_muic_irq_handler,
+				0, muic_irq->name,
+				info);
+		if (ret) {
+			dev_err(&pdev->dev,
+				"failed: irq request (IRQ: %d,"
+				" error :%d)\n",
+				muic_irq->irq, ret);
+
+			for (i = i - 1; i >= 0; i--)
+				free_irq(muic_irq->irq, info);
+
+			goto err_irq;
+		}
+	}
+
+	/* Initialize registers according to platform data */
+	max8997_initialize_device(info);
+
+	/* Initial device detection */
+	max8997_muic_detect_dev(info);
+
+	return ret;
+
+err_irq:
+err_pdata:
+	kfree(info);
+err_kfree:
+	return ret;
+}
+
+static int __devexit max8997_muic_remove(struct platform_device *pdev)
+{
+	struct max8997_muic_info *info = platform_get_drvdata(pdev);
+	struct max8997_platform_data *pdata =
+				dev_get_platdata(info->iodev->dev);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(muic_irqs); i++)
+		free_irq(pdata->irq_base + muic_irqs[i].irq, info);
+	cancel_work_sync(&info->irq_work);
+
+	kfree(info);
+
+	return 0;
+}
+
+static struct platform_driver max8997_muic_driver = {
+	.driver		= {
+		.name	= "max8997-muic",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= max8997_muic_probe,
+	.remove		= __devexit_p(max8997_muic_remove),
+};
+
+static int __init max8997_muic_init(void)
+{
+	return platform_driver_register(&max8997_muic_driver);
+}
+module_init(max8997_muic_init);
+
+static void __exit max8997_muic_exit(void)
+{
+	platform_driver_unregister(&max8997_muic_driver);
+}
+module_exit(max8997_muic_exit);
+
+MODULE_DESCRIPTION("Maxim MAX8997 MUIC driver");
+MODULE_AUTHOR("Donggeun Kim <dg77.kim@samsung.com>");
+MODULE_LICENSE("GPL");

diff --git a/drivers/mmc/Makefile b/drivers/mmc/Makefile
index 12eef39..400756e 100644
--- a/drivers/mmc/Makefile
+++ b/drivers/mmc/Makefile

@@ -6,5 +6,4 @@
 
 obj-$(CONFIG_MMC)		+= core/
 obj-$(CONFIG_MMC)		+= card/
-obj-$(CONFIG_MMC)		+= host/
-
+obj-$(subst m,y,$(CONFIG_MMC))	+= host/

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 1e0e27c..0cad48a 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c

@@ -107,6 +107,8 @@
 	 */
 	unsigned int	part_curr;
 	struct device_attribute force_ro;
+	struct device_attribute power_ro_lock;
+	int	area_type;
 };
 
 static DEFINE_MUTEX(open_lock);
@@ -119,6 +121,7 @@
 	MMC_BLK_ABORT,
 	MMC_BLK_DATA_ERR,
 	MMC_BLK_ECC_ERR,
+	MMC_BLK_NOMEDIUM,
 };
 
 module_param(perdev_minors, int, 0444);
@@ -165,6 +168,70 @@
 	mutex_unlock(&open_lock);
 }
 
+static ssize_t power_ro_lock_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	int ret;
+	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
+	struct mmc_card *card = md->queue.card;
+	int locked = 0;
+
+	if (card->ext_csd.boot_ro_lock & EXT_CSD_BOOT_WP_B_PERM_WP_EN)
+		locked = 2;
+	else if (card->ext_csd.boot_ro_lock & EXT_CSD_BOOT_WP_B_PWR_WP_EN)
+		locked = 1;
+
+	ret = snprintf(buf, PAGE_SIZE, "%d\n", locked);
+
+	return ret;
+}
+
+static ssize_t power_ro_lock_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	struct mmc_blk_data *md, *part_md;
+	struct mmc_card *card;
+	unsigned long set;
+
+	if (kstrtoul(buf, 0, &set))
+		return -EINVAL;
+
+	if (set != 1)
+		return count;
+
+	md = mmc_blk_get(dev_to_disk(dev));
+	card = md->queue.card;
+
+	mmc_claim_host(card->host);
+
+	ret = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BOOT_WP,
+				card->ext_csd.boot_ro_lock |
+				EXT_CSD_BOOT_WP_B_PWR_WP_EN,
+				card->ext_csd.part_time);
+	if (ret)
+		pr_err("%s: Locking boot partition ro until next power on failed: %d\n", md->disk->disk_name, ret);
+	else
+		card->ext_csd.boot_ro_lock |= EXT_CSD_BOOT_WP_B_PWR_WP_EN;
+
+	mmc_release_host(card->host);
+
+	if (!ret) {
+		pr_info("%s: Locking boot partition ro until next power on\n",
+			md->disk->disk_name);
+		set_disk_ro(md->disk, 1);
+
+		list_for_each_entry(part_md, &md->part, part)
+			if (part_md->area_type == MMC_BLK_DATA_AREA_BOOT) {
+				pr_info("%s: Locking boot partition ro until next power on\n", part_md->disk->disk_name);
+				set_disk_ro(part_md->disk, 1);
+			}
+	}
+
+	mmc_blk_put(md);
+	return count;
+}
+
 static ssize_t force_ro_show(struct device *dev, struct device_attribute *attr,
 			     char *buf)
 {
@@ -266,6 +333,9 @@
 		goto idata_err;
 	}
 
+	if (!idata->buf_bytes)
+		return idata;
+
 	idata->buf = kzalloc(idata->buf_bytes, GFP_KERNEL);
 	if (!idata->buf) {
 		err = -ENOMEM;
@@ -312,25 +382,6 @@
 	if (IS_ERR(idata))
 		return PTR_ERR(idata);
 
-	cmd.opcode = idata->ic.opcode;
-	cmd.arg = idata->ic.arg;
-	cmd.flags = idata->ic.flags;
-
-	data.sg = &sg;
-	data.sg_len = 1;
-	data.blksz = idata->ic.blksz;
-	data.blocks = idata->ic.blocks;
-
-	sg_init_one(data.sg, idata->buf, idata->buf_bytes);
-
-	if (idata->ic.write_flag)
-		data.flags = MMC_DATA_WRITE;
-	else
-		data.flags = MMC_DATA_READ;
-
-	mrq.cmd = &cmd;
-	mrq.data = &data;
-
 	md = mmc_blk_get(bdev->bd_disk);
 	if (!md) {
 		err = -EINVAL;
@@ -343,6 +394,48 @@
 		goto cmd_done;
 	}
 
+	cmd.opcode = idata->ic.opcode;
+	cmd.arg = idata->ic.arg;
+	cmd.flags = idata->ic.flags;
+
+	if (idata->buf_bytes) {
+		data.sg = &sg;
+		data.sg_len = 1;
+		data.blksz = idata->ic.blksz;
+		data.blocks = idata->ic.blocks;
+
+		sg_init_one(data.sg, idata->buf, idata->buf_bytes);
+
+		if (idata->ic.write_flag)
+			data.flags = MMC_DATA_WRITE;
+		else
+			data.flags = MMC_DATA_READ;
+
+		/* data.flags must already be set before doing this. */
+		mmc_set_data_timeout(&data, card);
+
+		/* Allow overriding the timeout_ns for empirical tuning. */
+		if (idata->ic.data_timeout_ns)
+			data.timeout_ns = idata->ic.data_timeout_ns;
+
+		if ((cmd.flags & MMC_RSP_R1B) == MMC_RSP_R1B) {
+			/*
+			 * Pretend this is a data transfer and rely on the
+			 * host driver to compute timeout.  When all host
+			 * drivers support cmd.cmd_timeout for R1B, this
+			 * can be changed to:
+			 *
+			 *     mrq.data = NULL;
+			 *     cmd.cmd_timeout = idata->ic.cmd_timeout_ms;
+			 */
+			data.timeout_ns = idata->ic.cmd_timeout_ms * 1000000;
+		}
+
+		mrq.data = &data;
+	}
+
+	mrq.cmd = &cmd;
+
 	mmc_claim_host(card->host);
 
 	if (idata->ic.is_acmd) {
@@ -351,24 +444,6 @@
 			goto cmd_rel_host;
 	}
 
-	/* data.flags must already be set before doing this. */
-	mmc_set_data_timeout(&data, card);
-	/* Allow overriding the timeout_ns for empirical tuning. */
-	if (idata->ic.data_timeout_ns)
-		data.timeout_ns = idata->ic.data_timeout_ns;
-
-	if ((cmd.flags & MMC_RSP_R1B) == MMC_RSP_R1B) {
-		/*
-		 * Pretend this is a data transfer and rely on the host driver
-		 * to compute timeout.  When all host drivers support
-		 * cmd.cmd_timeout for R1B, this can be changed to:
-		 *
-		 *     mrq.data = NULL;
-		 *     cmd.cmd_timeout = idata->ic.cmd_timeout_ms;
-		 */
-		data.timeout_ns = idata->ic.cmd_timeout_ms * 1000000;
-	}
-
 	mmc_wait_for_req(card->host, &mrq);
 
 	if (cmd.error) {
@@ -565,6 +640,7 @@
 	return err;
 }
 
+#define ERR_NOMEDIUM	3
 #define ERR_RETRY	2
 #define ERR_ABORT	1
 #define ERR_CONTINUE	0
@@ -632,6 +708,9 @@
 	u32 status, stop_status = 0;
 	int err, retry;
 
+	if (mmc_card_removed(card))
+		return ERR_NOMEDIUM;
+
 	/*
 	 * Try to get card status which indicates both the card state
 	 * and why there was no response.  If the first attempt fails,
@@ -648,8 +727,12 @@
 	}
 
 	/* We couldn't get a response from the card.  Give up. */
-	if (err)
+	if (err) {
+		/* Check if the card is removed */
+		if (mmc_detect_card_removed(card->host))
+			return ERR_NOMEDIUM;
 		return ERR_ABORT;
+	}
 
 	/* Flag ECC errors */
 	if ((status & R1_CARD_ECC_FAILED) ||
@@ -922,6 +1005,8 @@
 			return MMC_BLK_RETRY;
 		case ERR_ABORT:
 			return MMC_BLK_ABORT;
+		case ERR_NOMEDIUM:
+			return MMC_BLK_NOMEDIUM;
 		case ERR_CONTINUE:
 			break;
 		}
@@ -1255,6 +1340,8 @@
 			if (!ret)
 				goto start_new_req;
 			break;
+		case MMC_BLK_NOMEDIUM:
+			goto cmd_abort;
 		}
 
 		if (ret) {
@@ -1271,6 +1358,8 @@
 
  cmd_abort:
 	spin_lock_irq(&md->lock);
+	if (mmc_card_removed(card))
+		req->cmd_flags |= REQ_QUIET;
 	while (ret)
 		ret = __blk_end_request(req, -EIO, blk_rq_cur_bytes(req));
 	spin_unlock_irq(&md->lock);
@@ -1339,7 +1428,8 @@
 					      struct device *parent,
 					      sector_t size,
 					      bool default_ro,
-					      const char *subname)
+					      const char *subname,
+					      int area_type)
 {
 	struct mmc_blk_data *md;
 	int devidx, ret;
@@ -1364,11 +1454,12 @@
 	if (!subname) {
 		md->name_idx = find_first_zero_bit(name_use, max_devices);
 		__set_bit(md->name_idx, name_use);
-	}
-	else
+	} else
 		md->name_idx = ((struct mmc_blk_data *)
 				dev_to_disk(parent)->private_data)->name_idx;
 
+	md->area_type = area_type;
+
 	/*
 	 * Set the read-only status based on the supported commands
 	 * and the write protect switch.
@@ -1462,7 +1553,8 @@
 		size = card->csd.capacity << (card->csd.read_blkbits - 9);
 	}
 
-	md = mmc_blk_alloc_req(card, &card->dev, size, false, NULL);
+	md = mmc_blk_alloc_req(card, &card->dev, size, false, NULL,
+					MMC_BLK_DATA_AREA_MAIN);
 	return md;
 }
 
@@ -1471,13 +1563,14 @@
 			      unsigned int part_type,
 			      sector_t size,
 			      bool default_ro,
-			      const char *subname)
+			      const char *subname,
+			      int area_type)
 {
 	char cap_str[10];
 	struct mmc_blk_data *part_md;
 
 	part_md = mmc_blk_alloc_req(card, disk_to_dev(md->disk), size, default_ro,
-				    subname);
+				    subname, area_type);
 	if (IS_ERR(part_md))
 		return PTR_ERR(part_md);
 	part_md->part_type = part_type;
@@ -1510,7 +1603,8 @@
 				card->part[idx].part_cfg,
 				card->part[idx].size >> 9,
 				card->part[idx].force_ro,
-				card->part[idx].name);
+				card->part[idx].name,
+				card->part[idx].area_type);
 			if (ret)
 				return ret;
 		}
@@ -1539,9 +1633,16 @@
 
 static void mmc_blk_remove_req(struct mmc_blk_data *md)
 {
+	struct mmc_card *card;
+
 	if (md) {
+		card = md->queue.card;
 		if (md->disk->flags & GENHD_FL_UP) {
 			device_remove_file(disk_to_dev(md->disk), &md->force_ro);
+			if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
+					card->ext_csd.boot_ro_lockable)
+				device_remove_file(disk_to_dev(md->disk),
+					&md->power_ro_lock);
 
 			/* Stop new requests from getting into the queue */
 			del_gendisk(md->disk);
@@ -1570,6 +1671,7 @@
 static int mmc_add_disk(struct mmc_blk_data *md)
 {
 	int ret;
+	struct mmc_card *card = md->queue.card;
 
 	add_disk(md->disk);
 	md->force_ro.show = force_ro_show;
@@ -1579,18 +1681,53 @@
 	md->force_ro.attr.mode = S_IRUGO | S_IWUSR;
 	ret = device_create_file(disk_to_dev(md->disk), &md->force_ro);
 	if (ret)
-		del_gendisk(md->disk);
+		goto force_ro_fail;
+
+	if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
+	     card->ext_csd.boot_ro_lockable) {
+		mode_t mode;
+
+		if (card->ext_csd.boot_ro_lock & EXT_CSD_BOOT_WP_B_PWR_WP_DIS)
+			mode = S_IRUGO;
+		else
+			mode = S_IRUGO | S_IWUSR;
+
+		md->power_ro_lock.show = power_ro_lock_show;
+		md->power_ro_lock.store = power_ro_lock_store;
+		md->power_ro_lock.attr.mode = mode;
+		md->power_ro_lock.attr.name =
+					"ro_lock_until_next_power_on";
+		ret = device_create_file(disk_to_dev(md->disk),
+				&md->power_ro_lock);
+		if (ret)
+			goto power_ro_lock_fail;
+	}
+	return ret;
+
+power_ro_lock_fail:
+	device_remove_file(disk_to_dev(md->disk), &md->force_ro);
+force_ro_fail:
+	del_gendisk(md->disk);
 
 	return ret;
 }
 
+#define CID_MANFID_SANDISK	0x2
+#define CID_MANFID_TOSHIBA	0x11
+#define CID_MANFID_MICRON	0x13
+
 static const struct mmc_fixup blk_fixups[] =
 {
-	MMC_FIXUP("SEM02G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38),
-	MMC_FIXUP("SEM04G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38),
-	MMC_FIXUP("SEM08G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38),
-	MMC_FIXUP("SEM16G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38),
-	MMC_FIXUP("SEM32G", 0x2, 0x100, add_quirk, MMC_QUIRK_INAND_CMD38),
+	MMC_FIXUP("SEM02G", CID_MANFID_SANDISK, 0x100, add_quirk,
+		  MMC_QUIRK_INAND_CMD38),
+	MMC_FIXUP("SEM04G", CID_MANFID_SANDISK, 0x100, add_quirk,
+		  MMC_QUIRK_INAND_CMD38),
+	MMC_FIXUP("SEM08G", CID_MANFID_SANDISK, 0x100, add_quirk,
+		  MMC_QUIRK_INAND_CMD38),
+	MMC_FIXUP("SEM16G", CID_MANFID_SANDISK, 0x100, add_quirk,
+		  MMC_QUIRK_INAND_CMD38),
+	MMC_FIXUP("SEM32G", CID_MANFID_SANDISK, 0x100, add_quirk,
+		  MMC_QUIRK_INAND_CMD38),
 
 	/*
 	 * Some MMC cards experience performance degradation with CMD23
@@ -1600,18 +1737,18 @@
 	 *
 	 * N.B. This doesn't affect SD cards.
 	 */
-	MMC_FIXUP("MMC08G", 0x11, CID_OEMID_ANY, add_quirk_mmc,
+	MMC_FIXUP("MMC08G", CID_MANFID_TOSHIBA, CID_OEMID_ANY, add_quirk_mmc,
 		  MMC_QUIRK_BLK_NO_CMD23),
-	MMC_FIXUP("MMC16G", 0x11, CID_OEMID_ANY, add_quirk_mmc,
+	MMC_FIXUP("MMC16G", CID_MANFID_TOSHIBA, CID_OEMID_ANY, add_quirk_mmc,
 		  MMC_QUIRK_BLK_NO_CMD23),
-	MMC_FIXUP("MMC32G", 0x11, CID_OEMID_ANY, add_quirk_mmc,
+	MMC_FIXUP("MMC32G", CID_MANFID_TOSHIBA, CID_OEMID_ANY, add_quirk_mmc,
 		  MMC_QUIRK_BLK_NO_CMD23),
 
 	/*
 	 * Some Micron MMC cards needs longer data read timeout than
 	 * indicated in CSD.
 	 */
-	MMC_FIXUP(CID_NAME_ANY, 0x13, 0x200, add_quirk_mmc,
+	MMC_FIXUP(CID_NAME_ANY, CID_MANFID_MICRON, 0x200, add_quirk_mmc,
 		  MMC_QUIRK_LONG_READ_TIME),
 
 	END_FIXUP

diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c
index e99bdc1..759714e 100644
--- a/drivers/mmc/card/mmc_test.c
+++ b/drivers/mmc/card/mmc_test.c

@@ -1581,6 +1581,7 @@
 
 	t->max_segs = test->card->host->max_segs;
 	t->max_seg_sz = test->card->host->max_seg_size;
+	t->max_seg_sz -= t->max_seg_sz % 512;
 
 	t->max_tfr = t->max_sz;
 	if (t->max_tfr >> 9 > test->card->host->max_blk_count)

diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index dcad59c..2517547 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c

@@ -29,6 +29,8 @@
  */
 static int mmc_prep_request(struct request_queue *q, struct request *req)
 {
+	struct mmc_queue *mq = q->queuedata;
+
 	/*
 	 * We only like normal block requests and discards.
 	 */
@@ -37,6 +39,9 @@
 		return BLKPREP_KILL;
 	}
 
+	if (mq && mmc_card_removed(mq->card))
+		return BLKPREP_KILL;
+
 	req->cmd_flags |= REQ_DONTPREP;
 
 	return BLKPREP_OK;

diff --git a/drivers/mmc/core/Makefile b/drivers/mmc/core/Makefile
index 6395019..dca4428 100644
--- a/drivers/mmc/core/Makefile
+++ b/drivers/mmc/core/Makefile

@@ -7,6 +7,6 @@
 				   mmc.o mmc_ops.o sd.o sd_ops.o \
 				   sdio.o sdio_ops.o sdio_bus.o \
 				   sdio_cis.o sdio_io.o sdio_irq.o \
-				   quirks.o
+				   quirks.o cd-gpio.o
 
 mmc_core-$(CONFIG_DEBUG_FS)	+= debugfs.o

diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index 6be4924..5d011a3 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c

@@ -303,10 +303,11 @@
 			mmc_card_ddr_mode(card) ? "DDR " : "",
 			type);
 	} else {
-		printk(KERN_INFO "%s: new %s%s%s card at address %04x\n",
+		pr_info("%s: new %s%s%s%s card at address %04x\n",
 			mmc_hostname(card->host),
-			mmc_sd_card_uhs(card) ? "ultra high speed " :
+			mmc_card_uhs(card) ? "ultra high speed " :
 			(mmc_card_highspeed(card) ? "high speed " : ""),
+			(mmc_card_hs200(card) ? "HS200 " : ""),
 			mmc_card_ddr_mode(card) ? "DDR " : "",
 			type, card->rca);
 	}

diff --git a/drivers/mmc/core/cd-gpio.c b/drivers/mmc/core/cd-gpio.c
new file mode 100644
index 0000000..082202a
--- /dev/null
+++ b/drivers/mmc/core/cd-gpio.c

@@ -0,0 +1,74 @@
+/*
+ * Generic GPIO card-detect helper
+ *
+ * Copyright (C) 2011, Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/interrupt.h>
+#include <linux/jiffies.h>
+#include <linux/mmc/host.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+struct mmc_cd_gpio {
+	unsigned int gpio;
+	char label[0];
+};
+
+static irqreturn_t mmc_cd_gpio_irqt(int irq, void *dev_id)
+{
+	/* Schedule a card detection after a debounce timeout */
+	mmc_detect_change(dev_id, msecs_to_jiffies(100));
+	return IRQ_HANDLED;
+}
+
+int mmc_cd_gpio_request(struct mmc_host *host, unsigned int gpio,
+			unsigned int irq, unsigned long flags)
+{
+	size_t len = strlen(dev_name(host->parent)) + 4;
+	struct mmc_cd_gpio *cd = kmalloc(sizeof(*cd) + len, GFP_KERNEL);
+	int ret;
+
+	if (!cd)
+		return -ENOMEM;
+
+	snprintf(cd->label, len, "%s cd", dev_name(host->parent));
+
+	ret = gpio_request_one(gpio, GPIOF_DIR_IN, cd->label);
+	if (ret < 0)
+		goto egpioreq;
+
+	ret = request_threaded_irq(irq, NULL, mmc_cd_gpio_irqt,
+				   flags, cd->label, host);
+	if (ret < 0)
+		goto eirqreq;
+
+	cd->gpio = gpio;
+	host->hotplug.irq = irq;
+	host->hotplug.handler_priv = cd;
+
+	return 0;
+
+eirqreq:
+	gpio_free(gpio);
+egpioreq:
+	kfree(cd);
+	return ret;
+}
+EXPORT_SYMBOL(mmc_cd_gpio_request);
+
+void mmc_cd_gpio_free(struct mmc_host *host)
+{
+	struct mmc_cd_gpio *cd = host->hotplug.handler_priv;
+
+	free_irq(host->hotplug.irq, host);
+	gpio_free(cd->gpio);
+	kfree(cd);
+}
+EXPORT_SYMBOL(mmc_cd_gpio_free);

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 950b97d..bec0bf2 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c

@@ -140,7 +140,7 @@
 			cmd->retries = 0;
 	}
 
-	if (err && cmd->retries) {
+	if (err && cmd->retries && !mmc_card_removed(host->card)) {
 		/*
 		 * Request starter must handle retries - see
 		 * mmc_wait_for_req_done().
@@ -247,6 +247,11 @@
 {
 	init_completion(&mrq->completion);
 	mrq->done = mmc_wait_done;
+	if (mmc_card_removed(host->card)) {
+		mrq->cmd->error = -ENOMEDIUM;
+		complete(&mrq->completion);
+		return;
+	}
 	mmc_start_request(host, mrq);
 }
 
@@ -259,7 +264,8 @@
 		wait_for_completion(&mrq->completion);
 
 		cmd = mrq->cmd;
-		if (!cmd->error || !cmd->retries)
+		if (!cmd->error || !cmd->retries ||
+		    mmc_card_removed(host->card))
 			break;
 
 		pr_debug("%s: req failed (CMD%u): %d, retrying...\n",
@@ -1456,7 +1462,7 @@
 	WARN_ON(host->removed);
 	spin_unlock_irqrestore(&host->lock, flags);
 #endif
-
+	host->detect_change = 1;
 	mmc_schedule_delayed_work(&host->detect, delay);
 }
 
@@ -2049,6 +2055,43 @@
 	return -EIO;
 }
 
+int _mmc_detect_card_removed(struct mmc_host *host)
+{
+	int ret;
+
+	if ((host->caps & MMC_CAP_NONREMOVABLE) || !host->bus_ops->alive)
+		return 0;
+
+	if (!host->card || mmc_card_removed(host->card))
+		return 1;
+
+	ret = host->bus_ops->alive(host);
+	if (ret) {
+		mmc_card_set_removed(host->card);
+		pr_debug("%s: card remove detected\n", mmc_hostname(host));
+	}
+
+	return ret;
+}
+
+int mmc_detect_card_removed(struct mmc_host *host)
+{
+	struct mmc_card *card = host->card;
+
+	WARN_ON(!host->claimed);
+	/*
+	 * The card will be considered unchanged unless we have been asked to
+	 * detect a change or host requires polling to provide card detection.
+	 */
+	if (card && !host->detect_change && !(host->caps & MMC_CAP_NEEDS_POLL))
+		return mmc_card_removed(card);
+
+	host->detect_change = 0;
+
+	return _mmc_detect_card_removed(host);
+}
+EXPORT_SYMBOL(mmc_detect_card_removed);
+
 void mmc_rescan(struct work_struct *work)
 {
 	static const unsigned freqs[] = { 400000, 300000, 200000, 100000 };
@@ -2069,6 +2112,8 @@
 	    && !(host->caps & MMC_CAP_NONREMOVABLE))
 		host->bus_ops->detect(host);
 
+	host->detect_change = 0;
+
 	/*
 	 * Let mmc_bus_put() free the bus/bus_ops if we've found that
 	 * the card is no longer present.
@@ -2130,6 +2175,7 @@
 
 	mmc_bus_get(host);
 	if (host->bus_ops && !host->bus_dead) {
+		/* Calling bus_ops->remove() with a claimed host can deadlock */
 		if (host->bus_ops->remove)
 			host->bus_ops->remove(host);
 
@@ -2201,6 +2247,9 @@
 {
 	int err = -ENOSYS;
 
+	if (host->caps2 & MMC_CAP2_NO_SLEEP_CMD)
+		return 0;
+
 	mmc_bus_get(host);
 
 	if (host->bus_ops && !host->bus_dead && host->bus_ops->awake)
@@ -2216,6 +2265,9 @@
 {
 	int err = -ENOSYS;
 
+	if (host->caps2 & MMC_CAP2_NO_SLEEP_CMD)
+		return 0;
+
 	mmc_bus_get(host);
 
 	if (host->bus_ops && !host->bus_dead && host->bus_ops->sleep)
@@ -2270,6 +2322,7 @@
 int mmc_cache_ctrl(struct mmc_host *host, u8 enable)
 {
 	struct mmc_card *card = host->card;
+	unsigned int timeout;
 	int err = 0;
 
 	if (!(host->caps2 & MMC_CAP2_CACHE_CTRL) ||
@@ -2280,16 +2333,18 @@
 			(card->ext_csd.cache_size > 0)) {
 		enable = !!enable;
 
-		if (card->ext_csd.cache_ctrl ^ enable)
+		if (card->ext_csd.cache_ctrl ^ enable) {
+			timeout = enable ? card->ext_csd.generic_cmd6_time : 0;
 			err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-					EXT_CSD_CACHE_CTRL, enable, 0);
-		if (err)
-			pr_err("%s: cache %s error %d\n",
-					mmc_hostname(card->host),
-					enable ? "on" : "off",
-					err);
-		else
-			card->ext_csd.cache_ctrl = enable;
+					EXT_CSD_CACHE_CTRL, enable, timeout);
+			if (err)
+				pr_err("%s: cache %s error %d\n",
+						mmc_hostname(card->host),
+						enable ? "on" : "off",
+						err);
+			else
+				card->ext_csd.cache_ctrl = enable;
+		}
 	}
 
 	return err;
@@ -2310,7 +2365,13 @@
 		cancel_delayed_work(&host->disable);
 	cancel_delayed_work(&host->detect);
 	mmc_flush_scheduled_work();
-	err = mmc_cache_ctrl(host, 0);
+	if (mmc_try_claim_host(host)) {
+		err = mmc_cache_ctrl(host, 0);
+		mmc_do_release_host(host);
+	} else {
+		err = -EBUSY;
+	}
+
 	if (err)
 		goto out;
 
@@ -2338,7 +2399,9 @@
 			if (err == -ENOSYS || !host->bus_ops->resume) {
 				/*
 				 * We simply "remove" the card in this case.
-				 * It will be redetected on resume.
+				 * It will be redetected on resume.  (Calling
+				 * bus_ops->remove() with a claimed host can
+				 * deadlock.)
 				 */
 				if (host->bus_ops->remove)
 					host->bus_ops->remove(host);
@@ -2431,11 +2494,11 @@
 		if (!host->bus_ops || host->bus_ops->suspend)
 			break;
 
-		mmc_claim_host(host);
-
+		/* Calling bus_ops->remove() with a claimed host can deadlock */
 		if (host->bus_ops->remove)
 			host->bus_ops->remove(host);
 
+		mmc_claim_host(host);
 		mmc_detach_bus(host);
 		mmc_power_off(host);
 		mmc_release_host(host);

diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 14664f1..3400924 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h

@@ -24,6 +24,7 @@
 	int (*resume)(struct mmc_host *);
 	int (*power_save)(struct mmc_host *);
 	int (*power_restore)(struct mmc_host *);
+	int (*alive)(struct mmc_host *);
 };
 
 void mmc_attach_bus(struct mmc_host *host, const struct mmc_bus_ops *ops);
@@ -59,6 +60,8 @@
 void mmc_start_host(struct mmc_host *host);
 void mmc_stop_host(struct mmc_host *host);
 
+int _mmc_detect_card_removed(struct mmc_host *host);
+
 int mmc_attach_mmc(struct mmc_host *host);
 int mmc_attach_sd(struct mmc_host *host);
 int mmc_attach_sdio(struct mmc_host *host);

diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index 3923880..9ab5b17 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c

@@ -57,6 +57,8 @@
 	const char *str;
 
 	seq_printf(s, "clock:\t\t%u Hz\n", ios->clock);
+	if (host->actual_clock)
+		seq_printf(s, "actual clock:\t%u Hz\n", host->actual_clock);
 	seq_printf(s, "vdd:\t\t%u ", ios->vdd);
 	if ((1 << ios->vdd) & MMC_VDD_165_195)
 		seq_printf(s, "(1.65 - 1.95 V)\n");
@@ -133,6 +135,9 @@
 	case MMC_TIMING_UHS_DDR50:
 		str = "sd uhs DDR50";
 		break;
+	case MMC_TIMING_MMC_HS200:
+		str = "mmc high-speed SDR200";
+		break;
 	default:
 		str = "invalid";
 		break;

diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index d31c78b..30055f2 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c

@@ -54,6 +54,27 @@
 static DEFINE_SPINLOCK(mmc_host_lock);
 
 #ifdef CONFIG_MMC_CLKGATE
+static ssize_t clkgate_delay_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct mmc_host *host = cls_dev_to_mmc_host(dev);
+	return snprintf(buf, PAGE_SIZE, "%lu\n", host->clkgate_delay);
+}
+
+static ssize_t clkgate_delay_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct mmc_host *host = cls_dev_to_mmc_host(dev);
+	unsigned long flags, value;
+
+	if (kstrtoul(buf, 0, &value))
+		return -EINVAL;
+
+	spin_lock_irqsave(&host->clk_lock, flags);
+	host->clkgate_delay = value;
+	spin_unlock_irqrestore(&host->clk_lock, flags);
+	return count;
+}
 
 /*
  * Enabling clock gating will make the core call out to the host
@@ -114,7 +135,7 @@
 static void mmc_host_clk_gate_work(struct work_struct *work)
 {
 	struct mmc_host *host = container_of(work, struct mmc_host,
-					      clk_gate_work);
+					      clk_gate_work.work);
 
 	mmc_host_clk_gate_delayed(host);
 }
@@ -131,6 +152,8 @@
 {
 	unsigned long flags;
 
+	/* cancel any clock gating work scheduled by mmc_host_clk_release() */
+	cancel_delayed_work_sync(&host->clk_gate_work);
 	mutex_lock(&host->clk_gate_mutex);
 	spin_lock_irqsave(&host->clk_lock, flags);
 	if (host->clk_gated) {
@@ -180,7 +203,8 @@
 	host->clk_requests--;
 	if (mmc_host_may_gate_card(host->card) &&
 	    !host->clk_requests)
-		queue_work(system_nrt_wq, &host->clk_gate_work);
+		queue_delayed_work(system_nrt_wq, &host->clk_gate_work,
+				msecs_to_jiffies(host->clkgate_delay));
 	spin_unlock_irqrestore(&host->clk_lock, flags);
 }
 
@@ -213,8 +237,13 @@
 	host->clk_requests = 0;
 	/* Hold MCI clock for 8 cycles by default */
 	host->clk_delay = 8;
+	/*
+	 * Default clock gating delay is 200ms.
+	 * This value can be tuned by writing into sysfs entry.
+	 */
+	host->clkgate_delay = 200;
 	host->clk_gated = false;
-	INIT_WORK(&host->clk_gate_work, mmc_host_clk_gate_work);
+	INIT_DELAYED_WORK(&host->clk_gate_work, mmc_host_clk_gate_work);
 	spin_lock_init(&host->clk_lock);
 	mutex_init(&host->clk_gate_mutex);
 }
@@ -229,7 +258,7 @@
 	 * Wait for any outstanding gate and then make sure we're
 	 * ungated before exiting.
 	 */
-	if (cancel_work_sync(&host->clk_gate_work))
+	if (cancel_delayed_work_sync(&host->clk_gate_work))
 		mmc_host_clk_gate_delayed(host);
 	if (host->clk_gated)
 		mmc_host_clk_hold(host);
@@ -237,6 +266,17 @@
 	WARN_ON(host->clk_requests > 1);
 }
 
+static inline void mmc_host_clk_sysfs_init(struct mmc_host *host)
+{
+	host->clkgate_delay_attr.show = clkgate_delay_show;
+	host->clkgate_delay_attr.store = clkgate_delay_store;
+	sysfs_attr_init(&host->clkgate_delay_attr.attr);
+	host->clkgate_delay_attr.attr.name = "clkgate_delay";
+	host->clkgate_delay_attr.attr.mode = S_IRUGO | S_IWUSR;
+	if (device_create_file(&host->class_dev, &host->clkgate_delay_attr))
+		pr_err("%s: Failed to create clkgate_delay sysfs entry\n",
+				mmc_hostname(host));
+}
 #else
 
 static inline void mmc_host_clk_init(struct mmc_host *host)
@@ -247,6 +287,10 @@
 {
 }
 
+static inline void mmc_host_clk_sysfs_init(struct mmc_host *host)
+{
+}
+
 #endif
 
 /**
@@ -335,6 +379,7 @@
 #ifdef CONFIG_DEBUG_FS
 	mmc_add_host_debugfs(host);
 #endif
+	mmc_host_clk_sysfs_init(host);
 
 	mmc_start_host(host);
 	register_pm_notifier(&host->pm_notify);

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index d240427..59b9ba5 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c

@@ -286,6 +286,27 @@
 	}
 	card->ext_csd.raw_card_type = ext_csd[EXT_CSD_CARD_TYPE];
 	switch (ext_csd[EXT_CSD_CARD_TYPE] & EXT_CSD_CARD_TYPE_MASK) {
+	case EXT_CSD_CARD_TYPE_SDR_ALL:
+	case EXT_CSD_CARD_TYPE_SDR_ALL_DDR_1_8V:
+	case EXT_CSD_CARD_TYPE_SDR_ALL_DDR_1_2V:
+	case EXT_CSD_CARD_TYPE_SDR_ALL_DDR_52:
+		card->ext_csd.hs_max_dtr = 200000000;
+		card->ext_csd.card_type = EXT_CSD_CARD_TYPE_SDR_200;
+		break;
+	case EXT_CSD_CARD_TYPE_SDR_1_2V_ALL:
+	case EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_1_8V:
+	case EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_1_2V:
+	case EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_52:
+		card->ext_csd.hs_max_dtr = 200000000;
+		card->ext_csd.card_type = EXT_CSD_CARD_TYPE_SDR_1_2V;
+		break;
+	case EXT_CSD_CARD_TYPE_SDR_1_8V_ALL:
+	case EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_1_8V:
+	case EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_1_2V:
+	case EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_52:
+		card->ext_csd.hs_max_dtr = 200000000;
+		card->ext_csd.card_type = EXT_CSD_CARD_TYPE_SDR_1_8V;
+		break;
 	case EXT_CSD_CARD_TYPE_DDR_52 | EXT_CSD_CARD_TYPE_52 |
 	     EXT_CSD_CARD_TYPE_26:
 		card->ext_csd.hs_max_dtr = 52000000;
@@ -348,7 +369,8 @@
 				part_size = ext_csd[EXT_CSD_BOOT_MULT] << 17;
 				mmc_part_add(card, part_size,
 					EXT_CSD_PART_CONFIG_ACC_BOOT0 + idx,
-					"boot%d", idx, true);
+					"boot%d", idx, true,
+					MMC_BLK_DATA_AREA_BOOT);
 			}
 		}
 	}
@@ -435,7 +457,8 @@
 					hc_wp_grp_sz);
 				mmc_part_add(card, part_size << 19,
 					EXT_CSD_PART_CONFIG_ACC_GP0 + idx,
-					"gp%d", idx, false);
+					"gp%d", idx, false,
+					MMC_BLK_DATA_AREA_GP);
 			}
 		}
 		card->ext_csd.sec_trim_mult =
@@ -446,6 +469,14 @@
 			ext_csd[EXT_CSD_SEC_FEATURE_SUPPORT];
 		card->ext_csd.trim_timeout = 300 *
 			ext_csd[EXT_CSD_TRIM_MULT];
+
+		/*
+		 * Note that the call to mmc_part_add above defaults to read
+		 * only. If this default assumption is changed, the call must
+		 * take into account the value of boot_locked below.
+		 */
+		card->ext_csd.boot_ro_lock = ext_csd[EXT_CSD_BOOT_WP];
+		card->ext_csd.boot_ro_lockable = true;
 	}
 
 	if (card->ext_csd.rev >= 5) {
@@ -690,6 +721,79 @@
 }
 
 /*
+ * Selects the desired buswidth and switch to the HS200 mode
+ * if bus width set without error
+ */
+static int mmc_select_hs200(struct mmc_card *card)
+{
+	int idx, err = 0;
+	struct mmc_host *host;
+	static unsigned ext_csd_bits[] = {
+		EXT_CSD_BUS_WIDTH_4,
+		EXT_CSD_BUS_WIDTH_8,
+	};
+	static unsigned bus_widths[] = {
+		MMC_BUS_WIDTH_4,
+		MMC_BUS_WIDTH_8,
+	};
+
+	BUG_ON(!card);
+
+	host = card->host;
+
+	if (card->ext_csd.card_type & EXT_CSD_CARD_TYPE_SDR_1_2V &&
+	    host->caps2 & MMC_CAP2_HS200_1_2V_SDR)
+		if (mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120, 0))
+			err = mmc_set_signal_voltage(host,
+						     MMC_SIGNAL_VOLTAGE_180, 0);
+
+	/* If fails try again during next card power cycle */
+	if (err)
+		goto err;
+
+	idx = (host->caps & MMC_CAP_8_BIT_DATA) ? 1 : 0;
+
+	/*
+	 * Unlike SD, MMC cards dont have a configuration register to notify
+	 * supported bus width. So bus test command should be run to identify
+	 * the supported bus width or compare the ext csd values of current
+	 * bus width and ext csd values of 1 bit mode read earlier.
+	 */
+	for (; idx >= 0; idx--) {
+
+		/*
+		 * Host is capable of 8bit transfer, then switch
+		 * the device to work in 8bit transfer mode. If the
+		 * mmc switch command returns error then switch to
+		 * 4bit transfer mode. On success set the corresponding
+		 * bus width on the host.
+		 */
+		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+				 EXT_CSD_BUS_WIDTH,
+				 ext_csd_bits[idx],
+				 card->ext_csd.generic_cmd6_time);
+		if (err)
+			continue;
+
+		mmc_set_bus_width(card->host, bus_widths[idx]);
+
+		if (!(host->caps & MMC_CAP_BUS_WIDTH_TEST))
+			err = mmc_compare_ext_csds(card, bus_widths[idx]);
+		else
+			err = mmc_bus_test(card, bus_widths[idx]);
+		if (!err)
+			break;
+	}
+
+	/* switch to HS200 mode if bus width set successfully */
+	if (!err)
+		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+				 EXT_CSD_HS_TIMING, 2, 0);
+err:
+	return err;
+}
+
+/*
  * Handle the detection and initialisation of a card.
  *
  * In the case of a resume, "oldcard" will contain the card
@@ -895,11 +999,15 @@
 	/*
 	 * Activate high speed (if supported)
 	 */
-	if ((card->ext_csd.hs_max_dtr != 0) &&
-		(host->caps & MMC_CAP_MMC_HIGHSPEED)) {
-		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-				 EXT_CSD_HS_TIMING, 1,
-				 card->ext_csd.generic_cmd6_time);
+	if (card->ext_csd.hs_max_dtr != 0) {
+		err = 0;
+		if (card->ext_csd.hs_max_dtr > 52000000 &&
+		    host->caps2 & MMC_CAP2_HS200)
+			err = mmc_select_hs200(card);
+		else if	(host->caps & MMC_CAP_MMC_HIGHSPEED)
+			err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+					 EXT_CSD_HS_TIMING, 1, 0);
+
 		if (err && err != -EBADMSG)
 			goto free_card;
 
@@ -908,8 +1016,15 @@
 			       mmc_hostname(card->host));
 			err = 0;
 		} else {
-			mmc_card_set_highspeed(card);
-			mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
+			if (card->ext_csd.hs_max_dtr > 52000000 &&
+			    host->caps2 & MMC_CAP2_HS200) {
+				mmc_card_set_hs200(card);
+				mmc_set_timing(card->host,
+					       MMC_TIMING_MMC_HS200);
+			} else {
+				mmc_card_set_highspeed(card);
+				mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
+			}
 		}
 	}
 
@@ -934,7 +1049,7 @@
 	 */
 	max_dtr = (unsigned int)-1;
 
-	if (mmc_card_highspeed(card)) {
+	if (mmc_card_highspeed(card) || mmc_card_hs200(card)) {
 		if (max_dtr > card->ext_csd.hs_max_dtr)
 			max_dtr = card->ext_csd.hs_max_dtr;
 	} else if (max_dtr > card->csd.max_dtr) {
@@ -960,9 +1075,48 @@
 	}
 
 	/*
+	 * Indicate HS200 SDR mode (if supported).
+	 */
+	if (mmc_card_hs200(card)) {
+		u32 ext_csd_bits;
+		u32 bus_width = card->host->ios.bus_width;
+
+		/*
+		 * For devices supporting HS200 mode, the bus width has
+		 * to be set before executing the tuning function. If
+		 * set before tuning, then device will respond with CRC
+		 * errors for responses on CMD line. So for HS200 the
+		 * sequence will be
+		 * 1. set bus width 4bit / 8 bit (1 bit not supported)
+		 * 2. switch to HS200 mode
+		 * 3. set the clock to > 52Mhz <=200MHz and
+		 * 4. execute tuning for HS200
+		 */
+		if ((host->caps2 & MMC_CAP2_HS200) &&
+		    card->host->ops->execute_tuning)
+			err = card->host->ops->execute_tuning(card->host,
+				MMC_SEND_TUNING_BLOCK_HS200);
+		if (err) {
+			pr_warning("%s: tuning execution failed\n",
+				   mmc_hostname(card->host));
+			goto err;
+		}
+
+		ext_csd_bits = (bus_width == MMC_BUS_WIDTH_8) ?
+				EXT_CSD_BUS_WIDTH_8 : EXT_CSD_BUS_WIDTH_4;
+		err = mmc_select_powerclass(card, ext_csd_bits, ext_csd);
+		if (err) {
+			pr_err("%s: power class selection to bus width %d failed\n",
+				mmc_hostname(card->host), 1 << bus_width);
+			goto err;
+		}
+	}
+
+	/*
 	 * Activate wide bus and DDR (if supported).
 	 */
-	if ((card->csd.mmca_vsn >= CSD_SPEC_VER_4) &&
+	if (!mmc_card_hs200(card) &&
+	    (card->csd.mmca_vsn >= CSD_SPEC_VER_3) &&
 	    (host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA))) {
 		static unsigned ext_csd_bits[][2] = {
 			{ EXT_CSD_BUS_WIDTH_8, EXT_CSD_DDR_BUS_WIDTH_8 },
@@ -1048,7 +1202,7 @@
 			 *
 			 * WARNING: eMMC rules are NOT the same as SD DDR
 			 */
-			if (ddr == EXT_CSD_CARD_TYPE_DDR_1_2V) {
+			if (ddr == MMC_1_2V_DDR_MODE) {
 				err = mmc_set_signal_voltage(host,
 					MMC_SIGNAL_VOLTAGE_120, 0);
 				if (err)
@@ -1067,14 +1221,23 @@
 	if ((host->caps2 & MMC_CAP2_CACHE_CTRL) &&
 			card->ext_csd.cache_size > 0) {
 		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-				EXT_CSD_CACHE_CTRL, 1, 0);
+				EXT_CSD_CACHE_CTRL, 1,
+				card->ext_csd.generic_cmd6_time);
 		if (err && err != -EBADMSG)
 			goto free_card;
 
 		/*
 		 * Only if no error, cache is turned on successfully.
 		 */
-		card->ext_csd.cache_ctrl = err ? 0 : 1;
+		if (err) {
+			pr_warning("%s: Cache is supported, "
+					"but failed to turn on (%d)\n",
+					mmc_hostname(card->host), err);
+			card->ext_csd.cache_ctrl = 0;
+			err = 0;
+		} else {
+			card->ext_csd.cache_ctrl = 1;
+		}
 	}
 
 	if (!oldcard)
@@ -1105,6 +1268,14 @@
 }
 
 /*
+ * Card detection - card is alive.
+ */
+static int mmc_alive(struct mmc_host *host)
+{
+	return mmc_send_status(host->card, NULL);
+}
+
+/*
  * Card detection callback from host.
  */
 static void mmc_detect(struct mmc_host *host)
@@ -1119,7 +1290,7 @@
 	/*
 	 * Just check if our card has been removed.
 	 */
-	err = mmc_send_status(host->card, NULL);
+	err = _mmc_detect_card_removed(host);
 
 	mmc_release_host(host);
 
@@ -1224,6 +1395,7 @@
 	.suspend = NULL,
 	.resume = NULL,
 	.power_restore = mmc_power_restore,
+	.alive = mmc_alive,
 };
 
 static const struct mmc_bus_ops mmc_ops_unsafe = {
@@ -1234,6 +1406,7 @@
 	.suspend = mmc_suspend,
 	.resume = mmc_resume,
 	.power_restore = mmc_power_restore,
+	.alive = mmc_alive,
 };
 
 static void mmc_attach_bus_ops(struct mmc_host *host)

diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index f2a05ea..c63ad03 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c

@@ -307,8 +307,8 @@
 		goto out;
 	}
 
-	if (status[13] & UHS_SDR50_BUS_SPEED)
-		card->sw_caps.hs_max_dtr = 50000000;
+	if (status[13] & SD_MODE_HIGH_SPEED)
+		card->sw_caps.hs_max_dtr = HIGH_SPEED_MAX_DTR;
 
 	if (card->scr.sda_spec3) {
 		card->sw_caps.sd3_bus_mode = status[13];
@@ -661,7 +661,8 @@
 
 	/* SPI mode doesn't define CMD19 */
 	if (!mmc_host_is_spi(card->host) && card->host->ops->execute_tuning)
-		err = card->host->ops->execute_tuning(card->host);
+		err = card->host->ops->execute_tuning(card->host,
+						      MMC_SEND_TUNING_BLOCK);
 
 out:
 	kfree(status);
@@ -960,7 +961,7 @@
 			goto free_card;
 
 		/* Card is an ultra-high-speed card */
-		mmc_sd_card_set_uhs(card);
+		mmc_card_set_uhs(card);
 
 		/*
 		 * Since initialization is now complete, enable preset
@@ -1019,6 +1020,14 @@
 }
 
 /*
+ * Card detection - card is alive.
+ */
+static int mmc_sd_alive(struct mmc_host *host)
+{
+	return mmc_send_status(host->card, NULL);
+}
+
+/*
  * Card detection callback from host.
  */
 static void mmc_sd_detect(struct mmc_host *host)
@@ -1033,7 +1042,7 @@
 	/*
 	 * Just check if our card has been removed.
 	 */
-	err = mmc_send_status(host->card, NULL);
+	err = _mmc_detect_card_removed(host);
 
 	mmc_release_host(host);
 
@@ -1102,6 +1111,7 @@
 	.suspend = NULL,
 	.resume = NULL,
 	.power_restore = mmc_sd_power_restore,
+	.alive = mmc_sd_alive,
 };
 
 static const struct mmc_bus_ops mmc_sd_ops_unsafe = {
@@ -1110,6 +1120,7 @@
 	.suspend = mmc_sd_suspend,
 	.resume = mmc_sd_resume,
 	.power_restore = mmc_sd_power_restore,
+	.alive = mmc_sd_alive,
 };
 
 static void mmc_sd_attach_bus_ops(struct mmc_host *host)

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 3ab565e..bd7bacc 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c

@@ -14,6 +14,7 @@
 
 #include <linux/mmc/host.h>
 #include <linux/mmc/card.h>
+#include <linux/mmc/mmc.h>
 #include <linux/mmc/sdio.h>
 #include <linux/mmc/sdio_func.h>
 #include <linux/mmc/sdio_ids.h>
@@ -102,6 +103,7 @@
 	int ret;
 	int cccr_vsn;
 	unsigned char data;
+	unsigned char speed;
 
 	memset(&card->cccr, 0, sizeof(struct sdio_cccr));
 
@@ -140,12 +142,60 @@
 	}
 
 	if (cccr_vsn >= SDIO_CCCR_REV_1_20) {
-		ret = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_SPEED, 0, &data);
+		ret = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_SPEED, 0, &speed);
 		if (ret)
 			goto out;
 
-		if (data & SDIO_SPEED_SHS)
-			card->cccr.high_speed = 1;
+		card->scr.sda_spec3 = 0;
+		card->sw_caps.sd3_bus_mode = 0;
+		card->sw_caps.sd3_drv_type = 0;
+		if (cccr_vsn >= SDIO_CCCR_REV_3_00) {
+			card->scr.sda_spec3 = 1;
+			ret = mmc_io_rw_direct(card, 0, 0,
+				SDIO_CCCR_UHS, 0, &data);
+			if (ret)
+				goto out;
+
+			if (card->host->caps &
+				(MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 |
+				 MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 |
+				 MMC_CAP_UHS_DDR50)) {
+				if (data & SDIO_UHS_DDR50)
+					card->sw_caps.sd3_bus_mode
+						|= SD_MODE_UHS_DDR50;
+
+				if (data & SDIO_UHS_SDR50)
+					card->sw_caps.sd3_bus_mode
+						|= SD_MODE_UHS_SDR50;
+
+				if (data & SDIO_UHS_SDR104)
+					card->sw_caps.sd3_bus_mode
+						|= SD_MODE_UHS_SDR104;
+			}
+
+			ret = mmc_io_rw_direct(card, 0, 0,
+				SDIO_CCCR_DRIVE_STRENGTH, 0, &data);
+			if (ret)
+				goto out;
+
+			if (data & SDIO_DRIVE_SDTA)
+				card->sw_caps.sd3_drv_type |= SD_DRIVER_TYPE_A;
+			if (data & SDIO_DRIVE_SDTC)
+				card->sw_caps.sd3_drv_type |= SD_DRIVER_TYPE_C;
+			if (data & SDIO_DRIVE_SDTD)
+				card->sw_caps.sd3_drv_type |= SD_DRIVER_TYPE_D;
+		}
+
+		/* if no uhs mode ensure we check for high speed */
+		if (!card->sw_caps.sd3_bus_mode) {
+			if (speed & SDIO_SPEED_SHS) {
+				card->cccr.high_speed = 1;
+				card->sw_caps.hs_max_dtr = 50000000;
+			} else {
+				card->cccr.high_speed = 0;
+				card->sw_caps.hs_max_dtr = 25000000;
+			}
+		}
 	}
 
 out:
@@ -327,6 +377,194 @@
 	return max_dtr;
 }
 
+static unsigned char host_drive_to_sdio_drive(int host_strength)
+{
+	switch (host_strength) {
+	case MMC_SET_DRIVER_TYPE_A:
+		return SDIO_DTSx_SET_TYPE_A;
+	case MMC_SET_DRIVER_TYPE_B:
+		return SDIO_DTSx_SET_TYPE_B;
+	case MMC_SET_DRIVER_TYPE_C:
+		return SDIO_DTSx_SET_TYPE_C;
+	case MMC_SET_DRIVER_TYPE_D:
+		return SDIO_DTSx_SET_TYPE_D;
+	default:
+		return SDIO_DTSx_SET_TYPE_B;
+	}
+}
+
+static void sdio_select_driver_type(struct mmc_card *card)
+{
+	int host_drv_type = SD_DRIVER_TYPE_B;
+	int card_drv_type = SD_DRIVER_TYPE_B;
+	int drive_strength;
+	unsigned char card_strength;
+	int err;
+
+	/*
+	 * If the host doesn't support any of the Driver Types A,C or D,
+	 * or there is no board specific handler then default Driver
+	 * Type B is used.
+	 */
+	if (!(card->host->caps &
+		(MMC_CAP_DRIVER_TYPE_A |
+		 MMC_CAP_DRIVER_TYPE_C |
+		 MMC_CAP_DRIVER_TYPE_D)))
+		return;
+
+	if (!card->host->ops->select_drive_strength)
+		return;
+
+	if (card->host->caps & MMC_CAP_DRIVER_TYPE_A)
+		host_drv_type |= SD_DRIVER_TYPE_A;
+
+	if (card->host->caps & MMC_CAP_DRIVER_TYPE_C)
+		host_drv_type |= SD_DRIVER_TYPE_C;
+
+	if (card->host->caps & MMC_CAP_DRIVER_TYPE_D)
+		host_drv_type |= SD_DRIVER_TYPE_D;
+
+	if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_A)
+		card_drv_type |= SD_DRIVER_TYPE_A;
+
+	if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C)
+		card_drv_type |= SD_DRIVER_TYPE_C;
+
+	if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_D)
+		card_drv_type |= SD_DRIVER_TYPE_D;
+
+	/*
+	 * The drive strength that the hardware can support
+	 * depends on the board design.  Pass the appropriate
+	 * information and let the hardware specific code
+	 * return what is possible given the options
+	 */
+	drive_strength = card->host->ops->select_drive_strength(
+		card->sw_caps.uhs_max_dtr,
+		host_drv_type, card_drv_type);
+
+	/* if error just use default for drive strength B */
+	err = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_DRIVE_STRENGTH, 0,
+		&card_strength);
+	if (err)
+		return;
+
+	card_strength &= ~(SDIO_DRIVE_DTSx_MASK<<SDIO_DRIVE_DTSx_SHIFT);
+	card_strength |= host_drive_to_sdio_drive(drive_strength);
+
+	err = mmc_io_rw_direct(card, 1, 0, SDIO_CCCR_DRIVE_STRENGTH,
+		card_strength, NULL);
+
+	/* if error default to drive strength B */
+	if (!err)
+		mmc_set_driver_type(card->host, drive_strength);
+}
+
+
+static int sdio_set_bus_speed_mode(struct mmc_card *card)
+{
+	unsigned int bus_speed, timing;
+	int err;
+	unsigned char speed;
+
+	/*
+	 * If the host doesn't support any of the UHS-I modes, fallback on
+	 * default speed.
+	 */
+	if (!(card->host->caps & (MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 |
+	    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | MMC_CAP_UHS_DDR50)))
+		return 0;
+
+	bus_speed = SDIO_SPEED_SDR12;
+	timing = MMC_TIMING_UHS_SDR12;
+	if ((card->host->caps & MMC_CAP_UHS_SDR104) &&
+	    (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR104)) {
+			bus_speed = SDIO_SPEED_SDR104;
+			timing = MMC_TIMING_UHS_SDR104;
+			card->sw_caps.uhs_max_dtr = UHS_SDR104_MAX_DTR;
+	} else if ((card->host->caps & MMC_CAP_UHS_DDR50) &&
+		   (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_DDR50)) {
+			bus_speed = SDIO_SPEED_DDR50;
+			timing = MMC_TIMING_UHS_DDR50;
+			card->sw_caps.uhs_max_dtr = UHS_DDR50_MAX_DTR;
+	} else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
+		    MMC_CAP_UHS_SDR50)) && (card->sw_caps.sd3_bus_mode &
+		    SD_MODE_UHS_SDR50)) {
+			bus_speed = SDIO_SPEED_SDR50;
+			timing = MMC_TIMING_UHS_SDR50;
+			card->sw_caps.uhs_max_dtr = UHS_SDR50_MAX_DTR;
+	} else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
+		    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR25)) &&
+		   (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR25)) {
+			bus_speed = SDIO_SPEED_SDR25;
+			timing = MMC_TIMING_UHS_SDR25;
+			card->sw_caps.uhs_max_dtr = UHS_SDR25_MAX_DTR;
+	} else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
+		    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR25 |
+		    MMC_CAP_UHS_SDR12)) && (card->sw_caps.sd3_bus_mode &
+		    SD_MODE_UHS_SDR12)) {
+			bus_speed = SDIO_SPEED_SDR12;
+			timing = MMC_TIMING_UHS_SDR12;
+			card->sw_caps.uhs_max_dtr = UHS_SDR12_MAX_DTR;
+	}
+
+	err = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_SPEED, 0, &speed);
+	if (err)
+		return err;
+
+	speed &= ~SDIO_SPEED_BSS_MASK;
+	speed |= bus_speed;
+	err = mmc_io_rw_direct(card, 1, 0, SDIO_CCCR_SPEED, speed, NULL);
+	if (err)
+		return err;
+
+	if (bus_speed) {
+		mmc_set_timing(card->host, timing);
+		mmc_set_clock(card->host, card->sw_caps.uhs_max_dtr);
+	}
+
+	return 0;
+}
+
+/*
+ * UHS-I specific initialization procedure
+ */
+static int mmc_sdio_init_uhs_card(struct mmc_card *card)
+{
+	int err;
+
+	if (!card->scr.sda_spec3)
+		return 0;
+
+	/*
+	 * Switch to wider bus (if supported).
+	 */
+	if (card->host->caps & MMC_CAP_4_BIT_DATA) {
+		err = sdio_enable_4bit_bus(card);
+		if (err > 0) {
+			mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4);
+			err = 0;
+		}
+	}
+
+	/* Set the driver strength for the card */
+	sdio_select_driver_type(card);
+
+	/* Set bus speed mode of the card */
+	err = sdio_set_bus_speed_mode(card);
+	if (err)
+		goto out;
+
+	/* Initialize and start re-tuning timer */
+	if (!mmc_host_is_spi(card->host) && card->host->ops->execute_tuning)
+		err = card->host->ops->execute_tuning(card->host,
+						      MMC_SEND_TUNING_BLOCK);
+
+out:
+
+	return err;
+}
+
 /*
  * Handle the detection and initialisation of a card.
  *
@@ -394,6 +632,30 @@
 		host->ops->init_card(host, card);
 
 	/*
+	 * If the host and card support UHS-I mode request the card
+	 * to switch to 1.8V signaling level.  No 1.8v signalling if
+	 * UHS mode is not enabled to maintain compatibilty and some
+	 * systems that claim 1.8v signalling in fact do not support
+	 * it.
+	 */
+	if ((ocr & R4_18V_PRESENT) &&
+		(host->caps &
+			(MMC_CAP_UHS_SDR12 | MMC_CAP_UHS_SDR25 |
+			 MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 |
+			 MMC_CAP_UHS_DDR50))) {
+		err = mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180,
+				true);
+		if (err) {
+			ocr &= ~R4_18V_PRESENT;
+			host->ocr &= ~R4_18V_PRESENT;
+		}
+		err = 0;
+	} else {
+		ocr &= ~R4_18V_PRESENT;
+		host->ocr &= ~R4_18V_PRESENT;
+	}
+
+	/*
 	 * For native busses:  set card RCA and quit open drain mode.
 	 */
 	if (!powered_resume && !mmc_host_is_spi(host)) {
@@ -492,29 +754,39 @@
 	if (err)
 		goto remove;
 
-	/*
-	 * Switch to high-speed (if supported).
-	 */
-	err = sdio_enable_hs(card);
-	if (err > 0)
-		mmc_sd_go_highspeed(card);
-	else if (err)
-		goto remove;
+	/* Initialization sequence for UHS-I cards */
+	/* Only if card supports 1.8v and UHS signaling */
+	if ((ocr & R4_18V_PRESENT) && card->sw_caps.sd3_bus_mode) {
+		err = mmc_sdio_init_uhs_card(card);
+		if (err)
+			goto remove;
 
-	/*
-	 * Change to the card's maximum speed.
-	 */
-	mmc_set_clock(host, mmc_sdio_get_max_clock(card));
+		/* Card is an ultra-high-speed card */
+		mmc_card_set_uhs(card);
+	} else {
+		/*
+		 * Switch to high-speed (if supported).
+		 */
+		err = sdio_enable_hs(card);
+		if (err > 0)
+			mmc_sd_go_highspeed(card);
+		else if (err)
+			goto remove;
 
-	/*
-	 * Switch to wider bus (if supported).
-	 */
-	err = sdio_enable_4bit_bus(card);
-	if (err > 0)
-		mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4);
-	else if (err)
-		goto remove;
+		/*
+		 * Change to the card's maximum speed.
+		 */
+		mmc_set_clock(host, mmc_sdio_get_max_clock(card));
 
+		/*
+		 * Switch to wider bus (if supported).
+		 */
+		err = sdio_enable_4bit_bus(card);
+		if (err > 0)
+			mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4);
+		else if (err)
+			goto remove;
+	}
 finish:
 	if (!oldcard)
 		host->card = card;
@@ -550,6 +822,14 @@
 }
 
 /*
+ * Card detection - card is alive.
+ */
+static int mmc_sdio_alive(struct mmc_host *host)
+{
+	return mmc_select_card(host->card);
+}
+
+/*
  * Card detection callback from host.
  */
 static void mmc_sdio_detect(struct mmc_host *host)
@@ -571,7 +851,7 @@
 	/*
 	 * Just check if our card has been removed.
 	 */
-	err = mmc_select_card(host->card);
+	err = _mmc_detect_card_removed(host);
 
 	mmc_release_host(host);
 
@@ -749,6 +1029,7 @@
 	.suspend = mmc_sdio_suspend,
 	.resume = mmc_sdio_resume,
 	.power_restore = mmc_sdio_power_restore,
+	.alive = mmc_sdio_alive,
 };
 
 
@@ -797,8 +1078,17 @@
 	 * Detect and init the card.
 	 */
 	err = mmc_sdio_init_card(host, host->ocr, NULL, 0);
-	if (err)
-		goto err;
+	if (err) {
+		if (err == -EAGAIN) {
+			/*
+			 * Retry initialization with S18R set to 0.
+			 */
+			host->ocr &= ~R4_18V_PRESENT;
+			err = mmc_sdio_init_card(host, host->ocr, NULL, 0);
+		}
+		if (err)
+			goto err;
+	}
 	card = host->card;
 
 	/*

diff --git a/drivers/mmc/core/sdio_io.c b/drivers/mmc/core/sdio_io.c
index b1f3168..8f6f5ac 100644
--- a/drivers/mmc/core/sdio_io.c
+++ b/drivers/mmc/core/sdio_io.c

@@ -196,6 +196,9 @@
 	else
 		mval = min(mval, func->max_blksize);
 
+	if (mmc_card_broken_byte_mode_512(func->card))
+		return min(mval, 511u);
+
 	return min(mval, 512u); /* maximum size for byte mode */
 }
 
@@ -314,7 +317,7 @@
 			func->card->host->max_seg_size / func->cur_blksize);
 		max_blocks = min(max_blocks, 511u);
 
-		while (remainder > func->cur_blksize) {
+		while (remainder >= func->cur_blksize) {
 			unsigned blocks;
 
 			blocks = remainder / func->cur_blksize;
@@ -339,8 +342,9 @@
 	while (remainder > 0) {
 		size = min(remainder, sdio_max_byte_size(func));
 
+		/* Indicate byte mode by setting "blocks" = 0 */
 		ret = mmc_io_rw_extended(func->card, write, func->num, addr,
-			 incr_addr, buf, 1, size);
+			 incr_addr, buf, 0, size);
 		if (ret)
 			return ret;
 

diff --git a/drivers/mmc/core/sdio_ops.c b/drivers/mmc/core/sdio_ops.c
index b0517cc..d29e206 100644
--- a/drivers/mmc/core/sdio_ops.c
+++ b/drivers/mmc/core/sdio_ops.c

@@ -128,8 +128,6 @@
 
 	BUG_ON(!card);
 	BUG_ON(fn > 7);
-	BUG_ON(blocks == 1 && blksz > 512);
-	WARN_ON(blocks == 0);
 	WARN_ON(blksz == 0);
 
 	/* sanity check */
@@ -144,22 +142,20 @@
 	cmd.arg |= fn << 28;
 	cmd.arg |= incr_addr ? 0x04000000 : 0x00000000;
 	cmd.arg |= addr << 9;
-	if (blocks == 1 && blksz < 512)
-		cmd.arg |= blksz;			/* byte mode */
-	else if (blocks == 1 && blksz == 512 &&
-		 !(mmc_card_broken_byte_mode_512(card)))
-		cmd.arg |= 0;				/* byte mode, 0==512 */
+	if (blocks == 0)
+		cmd.arg |= (blksz == 512) ? 0 : blksz;	/* byte mode */
 	else
 		cmd.arg |= 0x08000000 | blocks;		/* block mode */
 	cmd.flags = MMC_RSP_SPI_R5 | MMC_RSP_R5 | MMC_CMD_ADTC;
 
 	data.blksz = blksz;
-	data.blocks = blocks;
+	/* Code in host drivers/fwk assumes that "blocks" always is >=1 */
+	data.blocks = blocks ? blocks : 1;
 	data.flags = write ? MMC_DATA_WRITE : MMC_DATA_READ;
 	data.sg = &sg;
 	data.sg_len = 1;
 
-	sg_init_one(&sg, buf, blksz * blocks);
+	sg_init_one(&sg, buf, data.blksz * data.blocks);
 
 	mmc_set_data_timeout(&data, card);
 

diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index b4b83f3..745f8fc 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile

@@ -9,6 +9,7 @@
 obj-$(CONFIG_MMC_MXS)		+= mxs-mmc.o
 obj-$(CONFIG_MMC_SDHCI)		+= sdhci.o
 obj-$(CONFIG_MMC_SDHCI_PCI)	+= sdhci-pci.o
+obj-$(subst m,y,$(CONFIG_MMC_SDHCI_PCI))	+= sdhci-pci-data.o
 obj-$(CONFIG_MMC_SDHCI_PXAV3)	+= sdhci-pxav3.o
 obj-$(CONFIG_MMC_SDHCI_PXAV2)	+= sdhci-pxav2.o
 obj-$(CONFIG_MMC_SDHCI_S3C)	+= sdhci-s3c.o

diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c
index f437c3e..947faa5 100644
--- a/drivers/mmc/host/at91_mci.c
+++ b/drivers/mmc/host/at91_mci.c

@@ -236,7 +236,7 @@
 
 		sg = &data->sg[i];
 
-		sgbuffer = kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset;
+		sgbuffer = kmap_atomic(sg_page(sg)) + sg->offset;
 		amount = min(size, sg->length);
 		size -= amount;
 
@@ -252,7 +252,7 @@
 			dmabuf = (unsigned *)tmpv;
 		}
 
-		kunmap_atomic(sgbuffer, KM_BIO_SRC_IRQ);
+		kunmap_atomic(sgbuffer);
 
 		if (size == 0)
 			break;
@@ -302,7 +302,7 @@
 
 		sg = &data->sg[i];
 
-		sgbuffer = kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset;
+		sgbuffer = kmap_atomic(sg_page(sg)) + sg->offset;
 		amount = min(size, sg->length);
 		size -= amount;
 
@@ -318,7 +318,7 @@
 		}
 
 		flush_kernel_dcache_page(sg_page(sg));
-		kunmap_atomic(sgbuffer, KM_BIO_SRC_IRQ);
+		kunmap_atomic(sgbuffer);
 		data->bytes_xfered += amount;
 		if (size == 0)
 			break;

diff --git a/drivers/mmc/host/bfin_sdh.c b/drivers/mmc/host/bfin_sdh.c
index 0371bf5..0366617 100644
--- a/drivers/mmc/host/bfin_sdh.c
+++ b/drivers/mmc/host/bfin_sdh.c

@@ -627,17 +627,7 @@
 	},
 };
 
-static int __init sdh_init(void)
-{
-	return platform_driver_register(&sdh_driver);
-}
-module_init(sdh_init);
-
-static void __exit sdh_exit(void)
-{
-	platform_driver_unregister(&sdh_driver);
-}
-module_exit(sdh_exit);
+module_platform_driver(sdh_driver);
 
 MODULE_DESCRIPTION("Blackfin Secure Digital Host Driver");
 MODULE_AUTHOR("Cliff Cai, Roy Huang");

diff --git a/drivers/mmc/host/cb710-mmc.c b/drivers/mmc/host/cb710-mmc.c
index ce2a47b7..83693fd 100644
--- a/drivers/mmc/host/cb710-mmc.c
+++ b/drivers/mmc/host/cb710-mmc.c

@@ -780,18 +780,7 @@
 #endif
 };
 
-static int __init cb710_mmc_init_module(void)
-{
-	return platform_driver_register(&cb710_mmc_driver);
-}
-
-static void __exit cb710_mmc_cleanup_module(void)
-{
-	platform_driver_unregister(&cb710_mmc_driver);
-}
-
-module_init(cb710_mmc_init_module);
-module_exit(cb710_mmc_cleanup_module);
+module_platform_driver(cb710_mmc_driver);
 
 MODULE_AUTHOR("Michał Mirosław <mirq-linux@rere.qmqm.pl>");
 MODULE_DESCRIPTION("ENE CB710 memory card reader driver - MMC/SD part");

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 3aaeb08..0e34279 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c

@@ -588,11 +588,11 @@
 	mci_writel(host, CTYPE, (slot->ctype << slot->id));
 }
 
-static void dw_mci_start_request(struct dw_mci *host,
-				 struct dw_mci_slot *slot)
+static void __dw_mci_start_request(struct dw_mci *host,
+				   struct dw_mci_slot *slot,
+				   struct mmc_command *cmd)
 {
 	struct mmc_request *mrq;
-	struct mmc_command *cmd;
 	struct mmc_data	*data;
 	u32 cmdflags;
 
@@ -610,14 +610,13 @@
 	host->completed_events = 0;
 	host->data_status = 0;
 
-	data = mrq->data;
+	data = cmd->data;
 	if (data) {
 		dw_mci_set_timeout(host);
 		mci_writel(host, BYTCNT, data->blksz*data->blocks);
 		mci_writel(host, BLKSIZ, data->blksz);
 	}
 
-	cmd = mrq->cmd;
 	cmdflags = dw_mci_prepare_command(slot->mmc, cmd);
 
 	/* this is the first command, send the initialization clock */
@@ -635,6 +634,16 @@
 		host->stop_cmdr = dw_mci_prepare_command(slot->mmc, mrq->stop);
 }
 
+static void dw_mci_start_request(struct dw_mci *host,
+				 struct dw_mci_slot *slot)
+{
+	struct mmc_request *mrq = slot->mrq;
+	struct mmc_command *cmd;
+
+	cmd = mrq->sbc ? mrq->sbc : mrq->cmd;
+	__dw_mci_start_request(host, slot, cmd);
+}
+
 /* must be called with host->lock held */
 static void dw_mci_queue_request(struct dw_mci *host, struct dw_mci_slot *slot,
 				 struct mmc_request *mrq)
@@ -698,12 +707,15 @@
 		break;
 	}
 
+	regs = mci_readl(slot->host, UHS_REG);
+
 	/* DDR mode set */
-	if (ios->timing == MMC_TIMING_UHS_DDR50) {
-		regs = mci_readl(slot->host, UHS_REG);
+	if (ios->timing == MMC_TIMING_UHS_DDR50)
 		regs |= (0x1 << slot->id) << 16;
-		mci_writel(slot->host, UHS_REG, regs);
-	}
+	else
+		regs &= ~(0x1 << slot->id) << 16;
+
+	mci_writel(slot->host, UHS_REG, regs);
 
 	if (ios->clock) {
 		/*
@@ -889,7 +901,14 @@
 			cmd = host->cmd;
 			host->cmd = NULL;
 			set_bit(EVENT_CMD_COMPLETE, &host->completed_events);
-			dw_mci_command_complete(host, host->mrq->cmd);
+			dw_mci_command_complete(host, cmd);
+			if (cmd == host->mrq->sbc && !cmd->error) {
+				prev_state = state = STATE_SENDING_CMD;
+				__dw_mci_start_request(host, host->cur_slot,
+						       host->mrq->cmd);
+				goto unlock;
+			}
+
 			if (!host->mrq->data || cmd->error) {
 				dw_mci_request_end(host, host->mrq);
 				goto unlock;
@@ -967,6 +986,12 @@
 				goto unlock;
 			}
 
+			if (host->mrq->sbc && !data->error) {
+				data->stop->error = 0;
+				dw_mci_request_end(host, host->mrq);
+				goto unlock;
+			}
+
 			prev_state = state = STATE_SENDING_STOP;
 			if (!data->error)
 				send_stop_cmd(host, data);
@@ -1678,8 +1703,9 @@
 
 	if (host->pdata->caps)
 		mmc->caps = host->pdata->caps;
-	else
-		mmc->caps = 0;
+
+	if (host->pdata->caps2)
+		mmc->caps2 = host->pdata->caps2;
 
 	if (host->pdata->get_bus_wd)
 		if (host->pdata->get_bus_wd(slot->id) >= 4)
@@ -1923,7 +1949,7 @@
 		 * should put it in the platform data.
 		 */
 		fifo_size = mci_readl(host, FIFOTH);
-		fifo_size = 1 + ((fifo_size >> 16) & 0x7ff);
+		fifo_size = 1 + ((fifo_size >> 16) & 0xfff);
 	} else {
 		fifo_size = host->pdata->fifo_depth;
 	}
@@ -2062,14 +2088,14 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 /*
  * TODO: we should probably disable the clock to the card in the suspend path.
  */
-static int dw_mci_suspend(struct platform_device *pdev, pm_message_t mesg)
+static int dw_mci_suspend(struct device *dev)
 {
 	int i, ret;
-	struct dw_mci *host = platform_get_drvdata(pdev);
+	struct dw_mci *host = dev_get_drvdata(dev);
 
 	for (i = 0; i < host->num_slots; i++) {
 		struct dw_mci_slot *slot = host->slot[i];
@@ -2092,10 +2118,10 @@
 	return 0;
 }
 
-static int dw_mci_resume(struct platform_device *pdev)
+static int dw_mci_resume(struct device *dev)
 {
 	int i, ret;
-	struct dw_mci *host = platform_get_drvdata(pdev);
+	struct dw_mci *host = dev_get_drvdata(dev);
 
 	if (host->vmmc)
 		regulator_enable(host->vmmc);
@@ -2103,7 +2129,7 @@
 	if (host->dma_ops->init)
 		host->dma_ops->init(host);
 
-	if (!mci_wait_reset(&pdev->dev, host)) {
+	if (!mci_wait_reset(dev, host)) {
 		ret = -ENODEV;
 		return ret;
 	}
@@ -2131,14 +2157,15 @@
 #else
 #define dw_mci_suspend	NULL
 #define dw_mci_resume	NULL
-#endif /* CONFIG_PM */
+#endif /* CONFIG_PM_SLEEP */
+
+static SIMPLE_DEV_PM_OPS(dw_mci_pmops, dw_mci_suspend, dw_mci_resume);
 
 static struct platform_driver dw_mci_driver = {
 	.remove		= __exit_p(dw_mci_remove),
-	.suspend	= dw_mci_suspend,
-	.resume		= dw_mci_resume,
 	.driver		= {
 		.name		= "dw_mmc",
+		.pm		= &dw_mci_pmops,
 	},
 };
 

diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index 72c071f..df392a1 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h

@@ -126,7 +126,7 @@
 #define SDMMC_CMD_RESP_EXP		BIT(6)
 #define SDMMC_CMD_INDX(n)		((n) & 0x1F)
 /* Status register defines */
-#define SDMMC_GET_FCNT(x)		(((x)>>17) & 0x1FF)
+#define SDMMC_GET_FCNT(x)		(((x)>>17) & 0x1FFF)
 /* Internal DMAC interrupt defines */
 #define SDMMC_IDMAC_INT_AI		BIT(9)
 #define SDMMC_IDMAC_INT_NI		BIT(8)

diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c
index 74218ad..c8852a8 100644
--- a/drivers/mmc/host/jz4740_mmc.c
+++ b/drivers/mmc/host/jz4740_mmc.c

@@ -1012,17 +1012,7 @@
 	},
 };
 
-static int __init jz4740_mmc_init(void)
-{
-	return platform_driver_register(&jz4740_mmc_driver);
-}
-module_init(jz4740_mmc_init);
-
-static void __exit jz4740_mmc_exit(void)
-{
-	platform_driver_unregister(&jz4740_mmc_driver);
-}
-module_exit(jz4740_mmc_exit);
+module_platform_driver(jz4740_mmc_driver);
 
 MODULE_DESCRIPTION("JZ4740 SD/MMC controller driver");
 MODULE_LICENSE("GPL");

diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 92946b8..273306c 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c

@@ -1525,7 +1525,6 @@
 static struct spi_driver mmc_spi_driver = {
 	.driver = {
 		.name =		"mmc_spi",
-		.bus =		&spi_bus_type,
 		.owner =	THIS_MODULE,
 		.of_match_table = mmc_spi_of_match_table,
 	},

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index fa8dd2f..ece03b4 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c

@@ -1245,6 +1245,7 @@
 	if (host->vcc == NULL)
 		mmc->ocr_avail = plat->ocr_mask;
 	mmc->caps = plat->capabilities;
+	mmc->caps2 = plat->capabilities2;
 
 	/*
 	 * We can do SGIO

diff --git a/drivers/mmc/host/msm_sdcc.c b/drivers/mmc/host/msm_sdcc.c
index 80d8eb1..1d14cda 100644
--- a/drivers/mmc/host/msm_sdcc.c
+++ b/drivers/mmc/host/msm_sdcc.c

@@ -689,8 +689,8 @@
 
 		/* Map the current scatter buffer */
 		local_irq_save(flags);
-		buffer = kmap_atomic(sg_page(host->pio.sg),
-				     KM_BIO_SRC_IRQ) + host->pio.sg->offset;
+		buffer = kmap_atomic(sg_page(host->pio.sg))
+				     + host->pio.sg->offset;
 		buffer += host->pio.sg_off;
 		remain = host->pio.sg->length - host->pio.sg_off;
 		len = 0;
@@ -700,7 +700,7 @@
 			len = msmsdcc_pio_write(host, buffer, remain, status);
 
 		/* Unmap the buffer */
-		kunmap_atomic(buffer, KM_BIO_SRC_IRQ);
+		kunmap_atomic(buffer);
 		local_irq_restore(flags);
 
 		host->pio.sg_off += len;
@@ -1480,18 +1480,7 @@
 	},
 };
 
-static int __init msmsdcc_init(void)
-{
-	return platform_driver_register(&msmsdcc_driver);
-}
-
-static void __exit msmsdcc_exit(void)
-{
-	platform_driver_unregister(&msmsdcc_driver);
-}
-
-module_init(msmsdcc_init);
-module_exit(msmsdcc_exit);
+module_platform_driver(msmsdcc_driver);
 
 MODULE_DESCRIPTION("Qualcomm MSM 7X00A Multimedia Card Interface driver");
 MODULE_LICENSE("GPL");

diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index 8e0fbe9..7088b40 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c

@@ -1047,18 +1047,7 @@
 	}
 };
 
-static int __init mxcmci_init(void)
-{
-	return platform_driver_register(&mxcmci_driver);
-}
-
-static void __exit mxcmci_exit(void)
-{
-	platform_driver_unregister(&mxcmci_driver);
-}
-
-module_init(mxcmci_init);
-module_exit(mxcmci_exit);
+module_platform_driver(mxcmci_driver);
 
 MODULE_DESCRIPTION("i.MX Multimedia Card Interface Driver");
 MODULE_AUTHOR("Sascha Hauer, Pengutronix");

diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c
index 973011f..4e2e019 100644
--- a/drivers/mmc/host/mxs-mmc.c
+++ b/drivers/mmc/host/mxs-mmc.c

@@ -855,18 +855,7 @@
 	},
 };
 
-static int __init mxs_mmc_init(void)
-{
-	return platform_driver_register(&mxs_mmc_driver);
-}
-
-static void __exit mxs_mmc_exit(void)
-{
-	platform_driver_unregister(&mxs_mmc_driver);
-}
-
-module_init(mxs_mmc_init);
-module_exit(mxs_mmc_exit);
+module_platform_driver(mxs_mmc_driver);
 
 MODULE_DESCRIPTION("FREESCALE MXS MMC peripheral");
 MODULE_AUTHOR("Freescale Semiconductor");

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index d1fb561..fd0c661 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c

@@ -24,7 +24,6 @@
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/platform_device.h>
-#include <linux/workqueue.h>
 #include <linux/timer.h>
 #include <linux/clk.h>
 #include <linux/mmc/host.h>
@@ -120,7 +119,6 @@
 
 #define MMC_AUTOSUSPEND_DELAY	100
 #define MMC_TIMEOUT_MS		20
-#define OMAP_MMC_MASTER_CLOCK	96000000
 #define OMAP_MMC_MIN_CLOCK	400000
 #define OMAP_MMC_MAX_CLOCK	52000000
 #define DRIVER_NAME		"omap_hsmmc"
@@ -163,7 +161,6 @@
 	 */
 	struct	regulator	*vcc;
 	struct	regulator	*vcc_aux;
-	struct	work_struct	mmc_carddetect_work;
 	void	__iomem		*base;
 	resource_size_t		mapbase;
 	spinlock_t		irq_lock; /* Prevent races with irq handler */
@@ -598,12 +595,12 @@
 }
 
 /* Calculate divisor for the given clock frequency */
-static u16 calc_divisor(struct mmc_ios *ios)
+static u16 calc_divisor(struct omap_hsmmc_host *host, struct mmc_ios *ios)
 {
 	u16 dsor = 0;
 
 	if (ios->clock) {
-		dsor = DIV_ROUND_UP(OMAP_MMC_MASTER_CLOCK, ios->clock);
+		dsor = DIV_ROUND_UP(clk_get_rate(host->fclk), ios->clock);
 		if (dsor > 250)
 			dsor = 250;
 	}
@@ -623,7 +620,7 @@
 
 	regval = OMAP_HSMMC_READ(host->base, SYSCTL);
 	regval = regval & ~(CLKD_MASK | DTO_MASK);
-	regval = regval | (calc_divisor(ios) << 6) | (DTO << 16);
+	regval = regval | (calc_divisor(host, ios) << 6) | (DTO << 16);
 	OMAP_HSMMC_WRITE(host->base, SYSCTL, regval);
 	OMAP_HSMMC_WRITE(host->base, SYSCTL,
 		OMAP_HSMMC_READ(host->base, SYSCTL) | ICE);
@@ -1280,17 +1277,16 @@
 }
 
 /*
- * Work Item to notify the core about card insertion/removal
+ * irq handler to notify the core about card insertion/removal
  */
-static void omap_hsmmc_detect(struct work_struct *work)
+static irqreturn_t omap_hsmmc_detect(int irq, void *dev_id)
 {
-	struct omap_hsmmc_host *host =
-		container_of(work, struct omap_hsmmc_host, mmc_carddetect_work);
+	struct omap_hsmmc_host *host = dev_id;
 	struct omap_mmc_slot_data *slot = &mmc_slot(host);
 	int carddetect;
 
 	if (host->suspended)
-		return;
+		return IRQ_HANDLED;
 
 	sysfs_notify(&host->mmc->class_dev.kobj, NULL, "cover_switch");
 
@@ -1305,19 +1301,6 @@
 		mmc_detect_change(host->mmc, (HZ * 200) / 1000);
 	else
 		mmc_detect_change(host->mmc, (HZ * 50) / 1000);
-}
-
-/*
- * ISR for handling card insertion and removal
- */
-static irqreturn_t omap_hsmmc_cd_handler(int irq, void *dev_id)
-{
-	struct omap_hsmmc_host *host = (struct omap_hsmmc_host *)dev_id;
-
-	if (host->suspended)
-		return IRQ_HANDLED;
-	schedule_work(&host->mmc_carddetect_work);
-
 	return IRQ_HANDLED;
 }
 
@@ -1919,7 +1902,6 @@
 	host->next_data.cookie = 1;
 
 	platform_set_drvdata(pdev, host);
-	INIT_WORK(&host->mmc_carddetect_work, omap_hsmmc_detect);
 
 	mmc->ops	= &omap_hsmmc_ops;
 
@@ -2049,10 +2031,11 @@
 
 	/* Request IRQ for card detect */
 	if ((mmc_slot(host).card_detect_irq)) {
-		ret = request_irq(mmc_slot(host).card_detect_irq,
-				  omap_hsmmc_cd_handler,
-				  IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
-				  mmc_hostname(mmc), host);
+		ret = request_threaded_irq(mmc_slot(host).card_detect_irq,
+					   NULL,
+					   omap_hsmmc_detect,
+					   IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+					   mmc_hostname(mmc), host);
 		if (ret) {
 			dev_dbg(mmc_dev(host->mmc),
 				"Unable to grab MMC CD IRQ\n");
@@ -2131,7 +2114,6 @@
 		free_irq(host->irq, host);
 		if (mmc_slot(host).card_detect_irq)
 			free_irq(mmc_slot(host).card_detect_irq, host);
-		flush_work_sync(&host->mmc_carddetect_work);
 
 		pm_runtime_put_sync(host->dev);
 		pm_runtime_disable(host->dev);
@@ -2178,7 +2160,6 @@
 				return ret;
 			}
 		}
-		cancel_work_sync(&host->mmc_carddetect_work);
 		ret = mmc_suspend_host(host->mmc);
 
 		if (ret) {

diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index fc4356e..cb2dc0e 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c

@@ -872,18 +872,7 @@
 	},
 };
 
-static int __init pxamci_init(void)
-{
-	return platform_driver_register(&pxamci_driver);
-}
-
-static void __exit pxamci_exit(void)
-{
-	platform_driver_unregister(&pxamci_driver);
-}
-
-module_init(pxamci_init);
-module_exit(pxamci_exit);
+module_platform_driver(pxamci_driver);
 
 MODULE_DESCRIPTION("PXA Multimedia Card Interface Driver");
 MODULE_LICENSE("GPL");

diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index 720f993..1bcfd6d 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c

@@ -1914,18 +1914,7 @@
 	.shutdown	= s3cmci_shutdown,
 };
 
-static int __init s3cmci_init(void)
-{
-	return platform_driver_register(&s3cmci_driver);
-}
-
-static void __exit s3cmci_exit(void)
-{
-	platform_driver_unregister(&s3cmci_driver);
-}
-
-module_init(s3cmci_init);
-module_exit(s3cmci_exit);
+module_platform_driver(s3cmci_driver);
 
 MODULE_DESCRIPTION("Samsung S3C MMC/SD Card Interface driver");
 MODULE_LICENSE("GPL v2");

diff --git a/drivers/mmc/host/sdhci-cns3xxx.c b/drivers/mmc/host/sdhci-cns3xxx.c
index b4257e7..28a8708 100644
--- a/drivers/mmc/host/sdhci-cns3xxx.c
+++ b/drivers/mmc/host/sdhci-cns3xxx.c

@@ -115,17 +115,7 @@
 	.remove		= __devexit_p(sdhci_cns3xxx_remove),
 };
 
-static int __init sdhci_cns3xxx_init(void)
-{
-	return platform_driver_register(&sdhci_cns3xxx_driver);
-}
-module_init(sdhci_cns3xxx_init);
-
-static void __exit sdhci_cns3xxx_exit(void)
-{
-	platform_driver_unregister(&sdhci_cns3xxx_driver);
-}
-module_exit(sdhci_cns3xxx_exit);
+module_platform_driver(sdhci_cns3xxx_driver);
 
 MODULE_DESCRIPTION("SDHCI driver for CNS3xxx");
 MODULE_AUTHOR("Scott Shu, "

diff --git a/drivers/mmc/host/sdhci-dove.c b/drivers/mmc/host/sdhci-dove.c
index a81312c..46fd1fd 100644
--- a/drivers/mmc/host/sdhci-dove.c
+++ b/drivers/mmc/host/sdhci-dove.c

@@ -88,17 +88,7 @@
 	.remove		= __devexit_p(sdhci_dove_remove),
 };
 
-static int __init sdhci_dove_init(void)
-{
-	return platform_driver_register(&sdhci_dove_driver);
-}
-module_init(sdhci_dove_init);
-
-static void __exit sdhci_dove_exit(void)
-{
-	platform_driver_unregister(&sdhci_dove_driver);
-}
-module_exit(sdhci_dove_exit);
+module_platform_driver(sdhci_dove_driver);
 
 MODULE_DESCRIPTION("SDHCI driver for Dove");
 MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>, "

diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 38ebc4e..d601e41 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c

@@ -606,17 +606,7 @@
 	.remove		= __devexit_p(sdhci_esdhc_imx_remove),
 };
 
-static int __init sdhci_esdhc_imx_init(void)
-{
-	return platform_driver_register(&sdhci_esdhc_imx_driver);
-}
-module_init(sdhci_esdhc_imx_init);
-
-static void __exit sdhci_esdhc_imx_exit(void)
-{
-	platform_driver_unregister(&sdhci_esdhc_imx_driver);
-}
-module_exit(sdhci_esdhc_imx_exit);
+module_platform_driver(sdhci_esdhc_imx_driver);
 
 MODULE_DESCRIPTION("SDHCI driver for Freescale i.MX eSDHC");
 MODULE_AUTHOR("Wolfram Sang <w.sang@pengutronix.de>");

diff --git a/drivers/mmc/host/sdhci-esdhc.h b/drivers/mmc/host/sdhci-esdhc.h
index c3b08f1..b97b2f5 100644
--- a/drivers/mmc/host/sdhci-esdhc.h
+++ b/drivers/mmc/host/sdhci-esdhc.h

@@ -73,7 +73,7 @@
 		| (div << ESDHC_DIVIDER_SHIFT)
 		| (pre_div << ESDHC_PREDIV_SHIFT));
 	sdhci_writel(host, temp, ESDHC_SYSTEM_CONTROL);
-	mdelay(100);
+	mdelay(1);
 out:
 	host->clock = clock;
 }

diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c
index 01e5f62..ff4adc0 100644
--- a/drivers/mmc/host/sdhci-of-esdhc.c
+++ b/drivers/mmc/host/sdhci-of-esdhc.c

@@ -131,17 +131,7 @@
 	.remove = __devexit_p(sdhci_esdhc_remove),
 };
 
-static int __init sdhci_esdhc_init(void)
-{
-	return platform_driver_register(&sdhci_esdhc_driver);
-}
-module_init(sdhci_esdhc_init);
-
-static void __exit sdhci_esdhc_exit(void)
-{
-	platform_driver_unregister(&sdhci_esdhc_driver);
-}
-module_exit(sdhci_esdhc_exit);
+module_platform_driver(sdhci_esdhc_driver);
 
 MODULE_DESCRIPTION("SDHCI OF driver for Freescale MPC eSDHC");
 MODULE_AUTHOR("Xiaobo Xie <X.Xie@freescale.com>, "

diff --git a/drivers/mmc/host/sdhci-of-hlwd.c b/drivers/mmc/host/sdhci-of-hlwd.c
index 3619adc..0ce088a 100644
--- a/drivers/mmc/host/sdhci-of-hlwd.c
+++ b/drivers/mmc/host/sdhci-of-hlwd.c

@@ -93,17 +93,7 @@
 	.remove = __devexit_p(sdhci_hlwd_remove),
 };
 
-static int __init sdhci_hlwd_init(void)
-{
-	return platform_driver_register(&sdhci_hlwd_driver);
-}
-module_init(sdhci_hlwd_init);
-
-static void __exit sdhci_hlwd_exit(void)
-{
-	platform_driver_unregister(&sdhci_hlwd_driver);
-}
-module_exit(sdhci_hlwd_exit);
+module_platform_driver(sdhci_hlwd_driver);
 
 MODULE_DESCRIPTION("Nintendo Wii SDHCI OF driver");
 MODULE_AUTHOR("The GameCube Linux Team, Albert Herranz");

diff --git a/drivers/mmc/host/sdhci-pci-data.c b/drivers/mmc/host/sdhci-pci-data.c
new file mode 100644
index 0000000..a611217
--- /dev/null
+++ b/drivers/mmc/host/sdhci-pci-data.c

@@ -0,0 +1,5 @@
+#include <linux/module.h>
+#include <linux/mmc/sdhci-pci-data.h>
+
+struct sdhci_pci_data *(*sdhci_pci_get_data)(struct pci_dev *pdev, int slotno);
+EXPORT_SYMBOL_GPL(sdhci_pci_get_data);

diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 6878a94..7165e6a 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c

@@ -23,8 +23,8 @@
 #include <linux/scatterlist.h>
 #include <linux/io.h>
 #include <linux/gpio.h>
-#include <linux/sfi.h>
 #include <linux/pm_runtime.h>
+#include <linux/mmc/sdhci-pci-data.h>
 
 #include "sdhci.h"
 
@@ -61,6 +61,7 @@
 struct sdhci_pci_slot {
 	struct sdhci_pci_chip	*chip;
 	struct sdhci_host	*host;
+	struct sdhci_pci_data	*data;
 
 	int			pci_bar;
 	int			rst_n_gpio;
@@ -171,32 +172,9 @@
 	return 0;
 }
 
-/* Medfield eMMC hardware reset GPIOs */
-static int mfd_emmc0_rst_gpio = -EINVAL;
-static int mfd_emmc1_rst_gpio = -EINVAL;
-
-static int mfd_emmc_gpio_parse(struct sfi_table_header *table)
-{
-	struct sfi_table_simple *sb = (struct sfi_table_simple *)table;
-	struct sfi_gpio_table_entry *entry;
-	int i, num;
-
-	num = SFI_GET_NUM_ENTRIES(sb, struct sfi_gpio_table_entry);
-	entry = (struct sfi_gpio_table_entry *)sb->pentry;
-
-	for (i = 0; i < num; i++, entry++) {
-		if (!strncmp(entry->pin_name, "emmc0_rst", SFI_NAME_LEN))
-			mfd_emmc0_rst_gpio = entry->pin_no;
-		else if (!strncmp(entry->pin_name, "emmc1_rst", SFI_NAME_LEN))
-			mfd_emmc1_rst_gpio = entry->pin_no;
-	}
-
-	return 0;
-}
-
 #ifdef CONFIG_PM_RUNTIME
 
-static irqreturn_t mfd_sd_cd(int irq, void *dev_id)
+static irqreturn_t sdhci_pci_sd_cd(int irq, void *dev_id)
 {
 	struct sdhci_pci_slot *slot = dev_id;
 	struct sdhci_host *host = slot->host;
@@ -205,15 +183,16 @@
 	return IRQ_HANDLED;
 }
 
-#define MFLD_SD_CD_PIN 69
-
-static int mfd_sd_probe_slot(struct sdhci_pci_slot *slot)
+static void sdhci_pci_add_own_cd(struct sdhci_pci_slot *slot)
 {
-	int err, irq, gpio = MFLD_SD_CD_PIN;
+	int err, irq, gpio = slot->cd_gpio;
 
 	slot->cd_gpio = -EINVAL;
 	slot->cd_irq = -EINVAL;
 
+	if (!gpio_is_valid(gpio))
+		return;
+
 	err = gpio_request(gpio, "sd_cd");
 	if (err < 0)
 		goto out;
@@ -226,72 +205,53 @@
 	if (irq < 0)
 		goto out_free;
 
-	err = request_irq(irq, mfd_sd_cd, IRQF_TRIGGER_RISING |
+	err = request_irq(irq, sdhci_pci_sd_cd, IRQF_TRIGGER_RISING |
 			  IRQF_TRIGGER_FALLING, "sd_cd", slot);
 	if (err)
 		goto out_free;
 
 	slot->cd_gpio = gpio;
 	slot->cd_irq = irq;
-	slot->host->quirks2 |= SDHCI_QUIRK2_OWN_CARD_DETECTION;
 
-	return 0;
+	return;
 
 out_free:
 	gpio_free(gpio);
 out:
 	dev_warn(&slot->chip->pdev->dev, "failed to setup card detect wake up\n");
-	return 0;
 }
 
-static void mfd_sd_remove_slot(struct sdhci_pci_slot *slot, int dead)
+static void sdhci_pci_remove_own_cd(struct sdhci_pci_slot *slot)
 {
 	if (slot->cd_irq >= 0)
 		free_irq(slot->cd_irq, slot);
-	gpio_free(slot->cd_gpio);
+	if (gpio_is_valid(slot->cd_gpio))
+		gpio_free(slot->cd_gpio);
 }
 
 #else
 
-#define mfd_sd_probe_slot	NULL
-#define mfd_sd_remove_slot	NULL
+static inline void sdhci_pci_add_own_cd(struct sdhci_pci_slot *slot)
+{
+}
+
+static inline void sdhci_pci_remove_own_cd(struct sdhci_pci_slot *slot)
+{
+}
 
 #endif
 
 static int mfd_emmc_probe_slot(struct sdhci_pci_slot *slot)
 {
-	const char *name = NULL;
-	int gpio = -EINVAL;
-
-	sfi_table_parse(SFI_SIG_GPIO, NULL, NULL, mfd_emmc_gpio_parse);
-
-	switch (slot->chip->pdev->device) {
-	case PCI_DEVICE_ID_INTEL_MFD_EMMC0:
-		gpio = mfd_emmc0_rst_gpio;
-		name = "eMMC0_reset";
-		break;
-	case PCI_DEVICE_ID_INTEL_MFD_EMMC1:
-		gpio = mfd_emmc1_rst_gpio;
-		name = "eMMC1_reset";
-		break;
-	}
-
-	if (!gpio_request(gpio, name)) {
-		gpio_direction_output(gpio, 1);
-		slot->rst_n_gpio = gpio;
-		slot->host->mmc->caps |= MMC_CAP_HW_RESET;
-	}
-
 	slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE;
-
 	slot->host->mmc->caps2 = MMC_CAP2_BOOTPART_NOACC;
-
 	return 0;
 }
 
-static void mfd_emmc_remove_slot(struct sdhci_pci_slot *slot, int dead)
+static int mfd_sdio_probe_slot(struct sdhci_pci_slot *slot)
 {
-	gpio_free(slot->rst_n_gpio);
+	slot->host->mmc->caps |= MMC_CAP_POWER_OFF_CARD;
+	return 0;
 }
 
 static const struct sdhci_pci_fixes sdhci_intel_mrst_hc0 = {
@@ -307,20 +267,18 @@
 static const struct sdhci_pci_fixes sdhci_intel_mfd_sd = {
 	.quirks		= SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
 	.allow_runtime_pm = true,
-	.probe_slot	= mfd_sd_probe_slot,
-	.remove_slot	= mfd_sd_remove_slot,
 };
 
 static const struct sdhci_pci_fixes sdhci_intel_mfd_sdio = {
 	.quirks		= SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
 	.allow_runtime_pm = true,
+	.probe_slot	= mfd_sdio_probe_slot,
 };
 
 static const struct sdhci_pci_fixes sdhci_intel_mfd_emmc = {
 	.quirks		= SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
 	.allow_runtime_pm = true,
 	.probe_slot	= mfd_emmc_probe_slot,
-	.remove_slot	= mfd_emmc_remove_slot,
 };
 
 /* O2Micro extra registers */
@@ -1012,11 +970,8 @@
 
 		ret = sdhci_suspend_host(slot->host);
 
-		if (ret) {
-			for (i--; i >= 0; i--)
-				sdhci_resume_host(chip->slots[i]->host);
-			return ret;
-		}
+		if (ret)
+			goto err_pci_suspend;
 
 		slot_pm_flags = slot->host->mmc->pm_flags;
 		if (slot_pm_flags & MMC_PM_WAKE_SDIO_IRQ)
@@ -1027,11 +982,8 @@
 
 	if (chip->fixes && chip->fixes->suspend) {
 		ret = chip->fixes->suspend(chip);
-		if (ret) {
-			for (i = chip->num_slots - 1; i >= 0; i--)
-				sdhci_resume_host(chip->slots[i]->host);
-			return ret;
-		}
+		if (ret)
+			goto err_pci_suspend;
 	}
 
 	pci_save_state(pdev);
@@ -1048,6 +1000,11 @@
 	}
 
 	return 0;
+
+err_pci_suspend:
+	while (--i >= 0)
+		sdhci_resume_host(chip->slots[i]->host);
+	return ret;
 }
 
 static int sdhci_pci_resume(struct device *dev)
@@ -1113,23 +1070,22 @@
 
 		ret = sdhci_runtime_suspend_host(slot->host);
 
-		if (ret) {
-			for (i--; i >= 0; i--)
-				sdhci_runtime_resume_host(chip->slots[i]->host);
-			return ret;
-		}
+		if (ret)
+			goto err_pci_runtime_suspend;
 	}
 
 	if (chip->fixes && chip->fixes->suspend) {
 		ret = chip->fixes->suspend(chip);
-		if (ret) {
-			for (i = chip->num_slots - 1; i >= 0; i--)
-				sdhci_runtime_resume_host(chip->slots[i]->host);
-			return ret;
-		}
+		if (ret)
+			goto err_pci_runtime_suspend;
 	}
 
 	return 0;
+
+err_pci_runtime_suspend:
+	while (--i >= 0)
+		sdhci_runtime_resume_host(chip->slots[i]->host);
+	return ret;
 }
 
 static int sdhci_pci_runtime_resume(struct device *dev)
@@ -1190,11 +1146,12 @@
 \*****************************************************************************/
 
 static struct sdhci_pci_slot * __devinit sdhci_pci_probe_slot(
-	struct pci_dev *pdev, struct sdhci_pci_chip *chip, int bar)
+	struct pci_dev *pdev, struct sdhci_pci_chip *chip, int first_bar,
+	int slotno)
 {
 	struct sdhci_pci_slot *slot;
 	struct sdhci_host *host;
-	int ret;
+	int ret, bar = first_bar + slotno;
 
 	if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) {
 		dev_err(&pdev->dev, "BAR %d is not iomem. Aborting.\n", bar);
@@ -1228,6 +1185,23 @@
 	slot->host = host;
 	slot->pci_bar = bar;
 	slot->rst_n_gpio = -EINVAL;
+	slot->cd_gpio = -EINVAL;
+
+	/* Retrieve platform data if there is any */
+	if (*sdhci_pci_get_data)
+		slot->data = sdhci_pci_get_data(pdev, slotno);
+
+	if (slot->data) {
+		if (slot->data->setup) {
+			ret = slot->data->setup(slot->data);
+			if (ret) {
+				dev_err(&pdev->dev, "platform setup failed\n");
+				goto free;
+			}
+		}
+		slot->rst_n_gpio = slot->data->rst_n_gpio;
+		slot->cd_gpio = slot->data->cd_gpio;
+	}
 
 	host->hw_name = "PCI";
 	host->ops = &sdhci_pci_ops;
@@ -1238,7 +1212,7 @@
 	ret = pci_request_region(pdev, bar, mmc_hostname(host->mmc));
 	if (ret) {
 		dev_err(&pdev->dev, "cannot request region\n");
-		goto free;
+		goto cleanup;
 	}
 
 	host->ioaddr = pci_ioremap_bar(pdev, bar);
@@ -1254,15 +1228,30 @@
 			goto unmap;
 	}
 
+	if (gpio_is_valid(slot->rst_n_gpio)) {
+		if (!gpio_request(slot->rst_n_gpio, "eMMC_reset")) {
+			gpio_direction_output(slot->rst_n_gpio, 1);
+			slot->host->mmc->caps |= MMC_CAP_HW_RESET;
+		} else {
+			dev_warn(&pdev->dev, "failed to request rst_n_gpio\n");
+			slot->rst_n_gpio = -EINVAL;
+		}
+	}
+
 	host->mmc->pm_caps = MMC_PM_KEEP_POWER | MMC_PM_WAKE_SDIO_IRQ;
 
 	ret = sdhci_add_host(host);
 	if (ret)
 		goto remove;
 
+	sdhci_pci_add_own_cd(slot);
+
 	return slot;
 
 remove:
+	if (gpio_is_valid(slot->rst_n_gpio))
+		gpio_free(slot->rst_n_gpio);
+
 	if (chip->fixes && chip->fixes->remove_slot)
 		chip->fixes->remove_slot(slot, 0);
 
@@ -1272,6 +1261,10 @@
 release:
 	pci_release_region(pdev, bar);
 
+cleanup:
+	if (slot->data && slot->data->cleanup)
+		slot->data->cleanup(slot->data);
+
 free:
 	sdhci_free_host(host);
 
@@ -1283,6 +1276,8 @@
 	int dead;
 	u32 scratch;
 
+	sdhci_pci_remove_own_cd(slot);
+
 	dead = 0;
 	scratch = readl(slot->host->ioaddr + SDHCI_INT_STATUS);
 	if (scratch == (u32)-1)
@@ -1290,9 +1285,15 @@
 
 	sdhci_remove_host(slot->host, dead);
 
+	if (gpio_is_valid(slot->rst_n_gpio))
+		gpio_free(slot->rst_n_gpio);
+
 	if (slot->chip->fixes && slot->chip->fixes->remove_slot)
 		slot->chip->fixes->remove_slot(slot, dead);
 
+	if (slot->data && slot->data->cleanup)
+		slot->data->cleanup(slot->data);
+
 	pci_release_region(slot->chip->pdev, slot->pci_bar);
 
 	sdhci_free_host(slot->host);
@@ -1379,7 +1380,7 @@
 	slots = chip->num_slots;	/* Quirk may have changed this */
 
 	for (i = 0; i < slots; i++) {
-		slot = sdhci_pci_probe_slot(pdev, chip, first_bar + i);
+		slot = sdhci_pci_probe_slot(pdev, chip, first_bar, i);
 		if (IS_ERR(slot)) {
 			for (i--; i >= 0; i--)
 				sdhci_pci_remove_slot(chip->slots[i]);

diff --git a/drivers/mmc/host/sdhci-pxav2.c b/drivers/mmc/host/sdhci-pxav2.c
index 7a039c3..dbb75bf 100644
--- a/drivers/mmc/host/sdhci-pxav2.c
+++ b/drivers/mmc/host/sdhci-pxav2.c

@@ -223,18 +223,8 @@
 	.probe		= sdhci_pxav2_probe,
 	.remove		= __devexit_p(sdhci_pxav2_remove),
 };
-static int __init sdhci_pxav2_init(void)
-{
-	return platform_driver_register(&sdhci_pxav2_driver);
-}
 
-static void __exit sdhci_pxav2_exit(void)
-{
-	platform_driver_unregister(&sdhci_pxav2_driver);
-}
-
-module_init(sdhci_pxav2_init);
-module_exit(sdhci_pxav2_exit);
+module_platform_driver(sdhci_pxav2_driver);
 
 MODULE_DESCRIPTION("SDHCI driver for pxav2");
 MODULE_AUTHOR("Marvell International Ltd.");

diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c
index 15673a7..f296956 100644
--- a/drivers/mmc/host/sdhci-pxav3.c
+++ b/drivers/mmc/host/sdhci-pxav3.c

@@ -269,18 +269,8 @@
 	.probe		= sdhci_pxav3_probe,
 	.remove		= __devexit_p(sdhci_pxav3_remove),
 };
-static int __init sdhci_pxav3_init(void)
-{
-	return platform_driver_register(&sdhci_pxav3_driver);
-}
 
-static void __exit sdhci_pxav3_exit(void)
-{
-	platform_driver_unregister(&sdhci_pxav3_driver);
-}
-
-module_init(sdhci_pxav3_init);
-module_exit(sdhci_pxav3_exit);
+module_platform_driver(sdhci_pxav3_driver);
 
 MODULE_DESCRIPTION("SDHCI driver for pxav3");
 MODULE_AUTHOR("Marvell International Ltd.");

diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c
index 9a20d1f..1af756e 100644
--- a/drivers/mmc/host/sdhci-s3c.c
+++ b/drivers/mmc/host/sdhci-s3c.c

@@ -80,7 +80,7 @@
 
 		tmp &= ~S3C_SDHCI_CTRL2_SELBASECLK_MASK;
 		tmp |= ourhost->cur_clk << S3C_SDHCI_CTRL2_SELBASECLK_SHIFT;
-		writel(tmp, host->ioaddr + 0x80);
+		writel(tmp, host->ioaddr + S3C_SDHCI_CONTROL2);
 	}
 }
 
@@ -521,6 +521,9 @@
 	if (pdata->host_caps)
 		host->mmc->caps |= pdata->host_caps;
 
+	if (pdata->pm_caps)
+		host->mmc->pm_caps |= pdata->pm_caps;
+
 	host->quirks |= (SDHCI_QUIRK_32BIT_DMA_ADDR |
 			 SDHCI_QUIRK_32BIT_DMA_SIZE);
 
@@ -654,18 +657,7 @@
 	},
 };
 
-static int __init sdhci_s3c_init(void)
-{
-	return platform_driver_register(&sdhci_s3c_driver);
-}
-
-static void __exit sdhci_s3c_exit(void)
-{
-	platform_driver_unregister(&sdhci_s3c_driver);
-}
-
-module_init(sdhci_s3c_init);
-module_exit(sdhci_s3c_exit);
+module_platform_driver(sdhci_s3c_driver);
 
 MODULE_DESCRIPTION("Samsung SDHCI (HSMMC) glue");
 MODULE_AUTHOR("Ben Dooks, <ben@simtec.co.uk>");

diff --git a/drivers/mmc/host/sdhci-spear.c b/drivers/mmc/host/sdhci-spear.c
index 63cc8b6..b7f8b33 100644
--- a/drivers/mmc/host/sdhci-spear.c
+++ b/drivers/mmc/host/sdhci-spear.c

@@ -21,6 +21,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/platform_device.h>
+#include <linux/pm.h>
 #include <linux/slab.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/sdhci-spear.h>
@@ -271,26 +272,54 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int sdhci_suspend(struct device *dev)
+{
+	struct sdhci_host *host = dev_get_drvdata(dev);
+	struct spear_sdhci *sdhci = dev_get_platdata(dev);
+	int ret;
+
+	ret = sdhci_suspend_host(host);
+	if (!ret)
+		clk_disable(sdhci->clk);
+
+	return ret;
+}
+
+static int sdhci_resume(struct device *dev)
+{
+	struct sdhci_host *host = dev_get_drvdata(dev);
+	struct spear_sdhci *sdhci = dev_get_platdata(dev);
+	int ret;
+
+	ret = clk_enable(sdhci->clk);
+	if (ret) {
+		dev_dbg(dev, "Resume: Error enabling clock\n");
+		return ret;
+	}
+
+	return sdhci_resume_host(host);
+}
+
+const struct dev_pm_ops sdhci_pm_ops = {
+	.suspend	= sdhci_suspend,
+	.resume		= sdhci_resume,
+};
+#endif
+
 static struct platform_driver sdhci_driver = {
 	.driver = {
 		.name	= "sdhci",
 		.owner	= THIS_MODULE,
+#ifdef CONFIG_PM
+		.pm	= &sdhci_pm_ops,
+#endif
 	},
 	.probe		= sdhci_probe,
 	.remove		= __devexit_p(sdhci_remove),
 };
 
-static int __init sdhci_init(void)
-{
-	return platform_driver_register(&sdhci_driver);
-}
-module_init(sdhci_init);
-
-static void __exit sdhci_exit(void)
-{
-	platform_driver_unregister(&sdhci_driver);
-}
-module_exit(sdhci_exit);
+module_platform_driver(sdhci_driver);
 
 MODULE_DESCRIPTION("SPEAr Secure Digital Host Controller Interface driver");
 MODULE_AUTHOR("Viresh Kumar <viresh.kumar@st.com>");

diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index e2e18d3..78a36eb 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c

@@ -324,17 +324,7 @@
 	.remove		= __devexit_p(sdhci_tegra_remove),
 };
 
-static int __init sdhci_tegra_init(void)
-{
-	return platform_driver_register(&sdhci_tegra_driver);
-}
-module_init(sdhci_tegra_init);
-
-static void __exit sdhci_tegra_exit(void)
-{
-	platform_driver_unregister(&sdhci_tegra_driver);
-}
-module_exit(sdhci_tegra_exit);
+module_platform_driver(sdhci_tegra_driver);
 
 MODULE_DESCRIPTION("SDHCI driver for Tegra");
 MODULE_AUTHOR(" Google, Inc.");

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 19ed580..8d66706 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c

@@ -49,7 +49,7 @@
 
 static void sdhci_send_command(struct sdhci_host *, struct mmc_command *);
 static void sdhci_finish_command(struct sdhci_host *);
-static int sdhci_execute_tuning(struct mmc_host *mmc);
+static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode);
 static void sdhci_tuning_timer(unsigned long data);
 
 #ifdef CONFIG_PM_RUNTIME
@@ -146,10 +146,8 @@
 {
 	u32 present, irqs;
 
-	if (host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION)
-		return;
-
-	if (host->quirks2 & SDHCI_QUIRK2_OWN_CARD_DETECTION)
+	if ((host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) ||
+	    !mmc_card_is_removable(host->mmc))
 		return;
 
 	present = sdhci_readl(host, SDHCI_PRESENT_STATE) &
@@ -214,6 +212,11 @@
 
 	if (host->quirks & SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET)
 		sdhci_clear_set_irqs(host, SDHCI_INT_ALL_MASK, ier);
+
+	if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
+		if ((host->ops->enable_dma) && (mask & SDHCI_RESET_ALL))
+			host->ops->enable_dma(host);
+	}
 }
 
 static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios);
@@ -423,12 +426,12 @@
 static char *sdhci_kmap_atomic(struct scatterlist *sg, unsigned long *flags)
 {
 	local_irq_save(*flags);
-	return kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset;
+	return kmap_atomic(sg_page(sg)) + sg->offset;
 }
 
 static void sdhci_kunmap_atomic(void *buffer, unsigned long *flags)
 {
-	kunmap_atomic(buffer, KM_BIO_SRC_IRQ);
+	kunmap_atomic(buffer);
 	local_irq_restore(*flags);
 }
 
@@ -1016,7 +1019,8 @@
 		flags |= SDHCI_CMD_INDEX;
 
 	/* CMD19 is special in that the Data Present Select should be set */
-	if (cmd->data || (cmd->opcode == MMC_SEND_TUNING_BLOCK))
+	if (cmd->data || cmd->opcode == MMC_SEND_TUNING_BLOCK ||
+	    cmd->opcode == MMC_SEND_TUNING_BLOCK_HS200)
 		flags |= SDHCI_CMD_DATA;
 
 	sdhci_writew(host, SDHCI_MAKE_CMD(cmd->opcode, flags), SDHCI_COMMAND);
@@ -1066,12 +1070,15 @@
 static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 {
 	int div = 0; /* Initialized for compiler warning */
+	int real_div = div, clk_mul = 1;
 	u16 clk = 0;
 	unsigned long timeout;
 
-	if (clock == host->clock)
+	if (clock && clock == host->clock)
 		return;
 
+	host->mmc->actual_clock = 0;
+
 	if (host->ops->set_clock) {
 		host->ops->set_clock(host, clock);
 		if (host->quirks & SDHCI_QUIRK_NONSTANDARD_CLOCK)
@@ -1109,6 +1116,8 @@
 				 * Control register.
 				 */
 				clk = SDHCI_PROG_CLOCK_MODE;
+				real_div = div;
+				clk_mul = host->clk_mul;
 				div--;
 			}
 		} else {
@@ -1122,6 +1131,7 @@
 						break;
 				}
 			}
+			real_div = div;
 			div >>= 1;
 		}
 	} else {
@@ -1130,9 +1140,13 @@
 			if ((host->max_clk / div) <= clock)
 				break;
 		}
+		real_div = div;
 		div >>= 1;
 	}
 
+	if (real_div)
+		host->mmc->actual_clock = (host->max_clk * clk_mul) / real_div;
+
 	clk |= (div & SDHCI_DIV_MASK) << SDHCI_DIVIDER_SHIFT;
 	clk |= ((div & SDHCI_DIV_HI_MASK) >> SDHCI_DIV_MASK_LEN)
 		<< SDHCI_DIVIDER_HI_SHIFT;
@@ -1160,7 +1174,7 @@
 	host->clock = clock;
 }
 
-static void sdhci_set_power(struct sdhci_host *host, unsigned short power)
+static int sdhci_set_power(struct sdhci_host *host, unsigned short power)
 {
 	u8 pwr = 0;
 
@@ -1183,13 +1197,13 @@
 	}
 
 	if (host->pwr == pwr)
-		return;
+		return -1;
 
 	host->pwr = pwr;
 
 	if (pwr == 0) {
 		sdhci_writeb(host, 0, SDHCI_POWER_CONTROL);
-		return;
+		return 0;
 	}
 
 	/*
@@ -1216,6 +1230,8 @@
 	 */
 	if (host->quirks & SDHCI_QUIRK_DELAY_AFTER_POWER)
 		mdelay(10);
+
+	return power;
 }
 
 /*****************************************************************************\
@@ -1277,7 +1293,7 @@
 		if ((host->flags & SDHCI_NEEDS_RETUNING) &&
 		    !(present_state & (SDHCI_DOING_WRITE | SDHCI_DOING_READ))) {
 			spin_unlock_irqrestore(&host->lock, flags);
-			sdhci_execute_tuning(mmc);
+			sdhci_execute_tuning(mmc, mrq->cmd->opcode);
 			spin_lock_irqsave(&host->lock, flags);
 
 			/* Restore original mmc_request structure */
@@ -1297,12 +1313,17 @@
 static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 {
 	unsigned long flags;
+	int vdd_bit = -1;
 	u8 ctrl;
 
 	spin_lock_irqsave(&host->lock, flags);
 
-	if (host->flags & SDHCI_DEVICE_DEAD)
-		goto out;
+	if (host->flags & SDHCI_DEVICE_DEAD) {
+		spin_unlock_irqrestore(&host->lock, flags);
+		if (host->vmmc && ios->power_mode == MMC_POWER_OFF)
+			mmc_regulator_set_ocr(host->mmc, host->vmmc, 0);
+		return;
+	}
 
 	/*
 	 * Reset the chip on each power off.
@@ -1316,9 +1337,15 @@
 	sdhci_set_clock(host, ios->clock);
 
 	if (ios->power_mode == MMC_POWER_OFF)
-		sdhci_set_power(host, -1);
+		vdd_bit = sdhci_set_power(host, -1);
 	else
-		sdhci_set_power(host, ios->vdd);
+		vdd_bit = sdhci_set_power(host, ios->vdd);
+
+	if (host->vmmc && vdd_bit != -1) {
+		spin_unlock_irqrestore(&host->lock, flags);
+		mmc_regulator_set_ocr(host->mmc, host->vmmc, vdd_bit);
+		spin_lock_irqsave(&host->lock, flags);
+	}
 
 	if (host->ops->platform_send_init_74_clocks)
 		host->ops->platform_send_init_74_clocks(host, ios->power_mode);
@@ -1361,11 +1388,11 @@
 		unsigned int clock;
 
 		/* In case of UHS-I modes, set High Speed Enable */
-		if ((ios->timing == MMC_TIMING_UHS_SDR50) ||
+		if ((ios->timing == MMC_TIMING_MMC_HS200) ||
+		    (ios->timing == MMC_TIMING_UHS_SDR50) ||
 		    (ios->timing == MMC_TIMING_UHS_SDR104) ||
 		    (ios->timing == MMC_TIMING_UHS_DDR50) ||
-		    (ios->timing == MMC_TIMING_UHS_SDR25) ||
-		    (ios->timing == MMC_TIMING_UHS_SDR12))
+		    (ios->timing == MMC_TIMING_UHS_SDR25))
 			ctrl |= SDHCI_CTRL_HISPD;
 
 		ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
@@ -1415,7 +1442,9 @@
 			ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
 			/* Select Bus Speed Mode for host */
 			ctrl_2 &= ~SDHCI_CTRL_UHS_MASK;
-			if (ios->timing == MMC_TIMING_UHS_SDR12)
+			if (ios->timing == MMC_TIMING_MMC_HS200)
+				ctrl_2 |= SDHCI_CTRL_HS_SDR200;
+			else if (ios->timing == MMC_TIMING_UHS_SDR12)
 				ctrl_2 |= SDHCI_CTRL_UHS_SDR12;
 			else if (ios->timing == MMC_TIMING_UHS_SDR25)
 				ctrl_2 |= SDHCI_CTRL_UHS_SDR25;
@@ -1443,7 +1472,6 @@
 	if(host->quirks & SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS)
 		sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA);
 
-out:
 	mmiowb();
 	spin_unlock_irqrestore(&host->lock, flags);
 }
@@ -1663,7 +1691,7 @@
 	return err;
 }
 
-static int sdhci_execute_tuning(struct mmc_host *mmc)
+static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
 {
 	struct sdhci_host *host;
 	u16 ctrl;
@@ -1671,6 +1699,7 @@
 	int tuning_loop_counter = MAX_TUNING_LOOP;
 	unsigned long timeout;
 	int err = 0;
+	bool requires_tuning_nonuhs = false;
 
 	host = mmc_priv(mmc);
 
@@ -1681,13 +1710,19 @@
 	ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
 
 	/*
-	 * Host Controller needs tuning only in case of SDR104 mode
-	 * and for SDR50 mode when Use Tuning for SDR50 is set in
+	 * The Host Controller needs tuning only in case of SDR104 mode
+	 * and for SDR50 mode when Use Tuning for SDR50 is set in the
 	 * Capabilities register.
+	 * If the Host Controller supports the HS200 mode then the
+	 * tuning function has to be executed.
 	 */
+	if (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR50) &&
+	    (host->flags & SDHCI_SDR50_NEEDS_TUNING ||
+	     host->flags & SDHCI_HS200_NEEDS_TUNING))
+		requires_tuning_nonuhs = true;
+
 	if (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR104) ||
-	    (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR50) &&
-	    (host->flags & SDHCI_SDR50_NEEDS_TUNING)))
+	    requires_tuning_nonuhs)
 		ctrl |= SDHCI_CTRL_EXEC_TUNING;
 	else {
 		spin_unlock(&host->lock);
@@ -1723,7 +1758,7 @@
 		if (!tuning_loop_counter && !timeout)
 			break;
 
-		cmd.opcode = MMC_SEND_TUNING_BLOCK;
+		cmd.opcode = opcode;
 		cmd.arg = 0;
 		cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC;
 		cmd.retries = 0;
@@ -1738,7 +1773,17 @@
 		 * block to the Host Controller. So we set the block size
 		 * to 64 here.
 		 */
-		sdhci_writew(host, SDHCI_MAKE_BLKSZ(7, 64), SDHCI_BLOCK_SIZE);
+		if (cmd.opcode == MMC_SEND_TUNING_BLOCK_HS200) {
+			if (mmc->ios.bus_width == MMC_BUS_WIDTH_8)
+				sdhci_writew(host, SDHCI_MAKE_BLKSZ(7, 128),
+					     SDHCI_BLOCK_SIZE);
+			else if (mmc->ios.bus_width == MMC_BUS_WIDTH_4)
+				sdhci_writew(host, SDHCI_MAKE_BLKSZ(7, 64),
+					     SDHCI_BLOCK_SIZE);
+		} else {
+			sdhci_writew(host, SDHCI_MAKE_BLKSZ(7, 64),
+				     SDHCI_BLOCK_SIZE);
+		}
 
 		/*
 		 * The tuning block is sent by the card to the host controller.
@@ -2121,12 +2166,14 @@
 
 static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
 {
+	u32 command;
 	BUG_ON(intmask == 0);
 
 	/* CMD19 generates _only_ Buffer Read Ready interrupt */
 	if (intmask & SDHCI_INT_DATA_AVAIL) {
-		if (SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND)) ==
-		    MMC_SEND_TUNING_BLOCK) {
+		command = SDHCI_GET_CMD(sdhci_readw(host, SDHCI_COMMAND));
+		if (command == MMC_SEND_TUNING_BLOCK ||
+		    command == MMC_SEND_TUNING_BLOCK_HS200) {
 			host->tuning_done = 1;
 			wake_up(&host->buf_ready_int);
 			return;
@@ -2330,26 +2377,33 @@
 int sdhci_suspend_host(struct sdhci_host *host)
 {
 	int ret;
+	bool has_tuning_timer;
 
 	sdhci_disable_card_detection(host);
 
 	/* Disable tuning since we are suspending */
-	if (host->version >= SDHCI_SPEC_300 && host->tuning_count &&
-	    host->tuning_mode == SDHCI_TUNING_MODE_1) {
+	has_tuning_timer = host->version >= SDHCI_SPEC_300 &&
+		host->tuning_count && host->tuning_mode == SDHCI_TUNING_MODE_1;
+	if (has_tuning_timer) {
+		del_timer_sync(&host->tuning_timer);
 		host->flags &= ~SDHCI_NEEDS_RETUNING;
-		mod_timer(&host->tuning_timer, jiffies +
-			host->tuning_count * HZ);
 	}
 
 	ret = mmc_suspend_host(host->mmc);
-	if (ret)
+	if (ret) {
+		if (has_tuning_timer) {
+			host->flags |= SDHCI_NEEDS_RETUNING;
+			mod_timer(&host->tuning_timer, jiffies +
+					host->tuning_count * HZ);
+		}
+
+		sdhci_enable_card_detection(host);
+
 		return ret;
+	}
 
 	free_irq(host->irq, host);
 
-	if (host->vmmc)
-		ret = regulator_disable(host->vmmc);
-
 	return ret;
 }
 
@@ -2359,12 +2413,6 @@
 {
 	int ret;
 
-	if (host->vmmc) {
-		int ret = regulator_enable(host->vmmc);
-		if (ret)
-			return ret;
-	}
-
 	if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
 		if (host->ops->enable_dma)
 			host->ops->enable_dma(host);
@@ -2727,10 +2775,14 @@
 	if (caps[1] & SDHCI_SUPPORT_DDR50)
 		mmc->caps |= MMC_CAP_UHS_DDR50;
 
-	/* Does the host needs tuning for SDR50? */
+	/* Does the host need tuning for SDR50? */
 	if (caps[1] & SDHCI_USE_SDR50_TUNING)
 		host->flags |= SDHCI_SDR50_NEEDS_TUNING;
 
+	/* Does the host need tuning for HS200? */
+	if (mmc->caps2 & MMC_CAP2_HS200)
+		host->flags |= SDHCI_HS200_NEEDS_TUNING;
+
 	/* Driver Type(s) (A, C, D) supported by the host */
 	if (caps[1] & SDHCI_DRIVER_TYPE_A)
 		mmc->caps |= MMC_CAP_DRIVER_TYPE_A;
@@ -2926,8 +2978,6 @@
 	if (IS_ERR(host->vmmc)) {
 		pr_info("%s: no vmmc regulator found\n", mmc_hostname(mmc));
 		host->vmmc = NULL;
-	} else {
-		regulator_enable(host->vmmc);
 	}
 
 	sdhci_init(host, 0);
@@ -3016,10 +3066,8 @@
 	tasklet_kill(&host->card_tasklet);
 	tasklet_kill(&host->finish_tasklet);
 
-	if (host->vmmc) {
-		regulator_disable(host->vmmc);
+	if (host->vmmc)
 		regulator_put(host->vmmc);
-	}
 
 	kfree(host->adma_desc);
 	kfree(host->align_buffer);

diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index a04d4d0..ad265b9 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h

@@ -158,6 +158,7 @@
 #define   SDHCI_CTRL_UHS_SDR50		0x0002
 #define   SDHCI_CTRL_UHS_SDR104		0x0003
 #define   SDHCI_CTRL_UHS_DDR50		0x0004
+#define   SDHCI_CTRL_HS_SDR200		0x0005 /* reserved value in SDIO spec */
 #define  SDHCI_CTRL_VDD_180		0x0008
 #define  SDHCI_CTRL_DRV_TYPE_MASK	0x0030
 #define   SDHCI_CTRL_DRV_TYPE_B		0x0000

diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index d5505f3..4a2c5b2 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c

@@ -16,6 +16,33 @@
  *
  */
 
+/*
+ * The MMCIF driver is now processing MMC requests asynchronously, according
+ * to the Linux MMC API requirement.
+ *
+ * The MMCIF driver processes MMC requests in up to 3 stages: command, optional
+ * data, and optional stop. To achieve asynchronous processing each of these
+ * stages is split into two halves: a top and a bottom half. The top half
+ * initialises the hardware, installs a timeout handler to handle completion
+ * timeouts, and returns. In case of the command stage this immediately returns
+ * control to the caller, leaving all further processing to run asynchronously.
+ * All further request processing is performed by the bottom halves.
+ *
+ * The bottom half further consists of a "hard" IRQ handler, an IRQ handler
+ * thread, a DMA completion callback, if DMA is used, a timeout work, and
+ * request- and stage-specific handler methods.
+ *
+ * Each bottom half run begins with either a hardware interrupt, a DMA callback
+ * invocation, or a timeout work run. In case of an error or a successful
+ * processing completion, the MMC core is informed and the request processing is
+ * finished. In case processing has to continue, i.e., if data has to be read
+ * from or written to the card, or if a stop command has to be sent, the next
+ * top half is called, which performs the necessary hardware handling and
+ * reschedules the timeout work. This returns the driver state machine into the
+ * bottom half waiting state.
+ */
+
+#include <linux/bitops.h>
 #include <linux/clk.h>
 #include <linux/completion.h>
 #include <linux/delay.h>
@@ -123,6 +150,11 @@
 #define MASK_MRBSYTO		(1 << 1)
 #define MASK_MRSPTO		(1 << 0)
 
+#define MASK_START_CMD		(MASK_MCMDVIO | MASK_MBUFVIO | MASK_MWDATERR | \
+				 MASK_MRDATERR | MASK_MRIDXERR | MASK_MRSPERR | \
+				 MASK_MCCSTO | MASK_MCRCSTO | MASK_MWDATTO | \
+				 MASK_MRDATTO | MASK_MRBSYTO | MASK_MRSPTO)
+
 /* CE_HOST_STS1 */
 #define STS1_CMDSEQ		(1 << 31)
 
@@ -162,9 +194,21 @@
 	STATE_IOS,
 };
 
+enum mmcif_wait_for {
+	MMCIF_WAIT_FOR_REQUEST,
+	MMCIF_WAIT_FOR_CMD,
+	MMCIF_WAIT_FOR_MREAD,
+	MMCIF_WAIT_FOR_MWRITE,
+	MMCIF_WAIT_FOR_READ,
+	MMCIF_WAIT_FOR_WRITE,
+	MMCIF_WAIT_FOR_READ_END,
+	MMCIF_WAIT_FOR_WRITE_END,
+	MMCIF_WAIT_FOR_STOP,
+};
+
 struct sh_mmcif_host {
 	struct mmc_host *mmc;
-	struct mmc_data *data;
+	struct mmc_request *mrq;
 	struct platform_device *pd;
 	struct sh_dmae_slave dma_slave_tx;
 	struct sh_dmae_slave dma_slave_rx;
@@ -172,11 +216,17 @@
 	unsigned int clk;
 	int bus_width;
 	bool sd_error;
+	bool dying;
 	long timeout;
 	void __iomem *addr;
-	struct completion intr_wait;
+	u32 *pio_ptr;
+	spinlock_t lock;		/* protect sh_mmcif_host::state */
 	enum mmcif_state state;
-	spinlock_t lock;
+	enum mmcif_wait_for wait_for;
+	struct delayed_work timeout_work;
+	size_t blocksize;
+	int sg_idx;
+	int sg_blkidx;
 	bool power;
 	bool card_present;
 
@@ -202,19 +252,21 @@
 static void mmcif_dma_complete(void *arg)
 {
 	struct sh_mmcif_host *host = arg;
+	struct mmc_data *data = host->mrq->data;
+
 	dev_dbg(&host->pd->dev, "Command completed\n");
 
-	if (WARN(!host->data, "%s: NULL data in DMA completion!\n",
+	if (WARN(!data, "%s: NULL data in DMA completion!\n",
 		 dev_name(&host->pd->dev)))
 		return;
 
-	if (host->data->flags & MMC_DATA_READ)
+	if (data->flags & MMC_DATA_READ)
 		dma_unmap_sg(host->chan_rx->device->dev,
-			     host->data->sg, host->data->sg_len,
+			     data->sg, data->sg_len,
 			     DMA_FROM_DEVICE);
 	else
 		dma_unmap_sg(host->chan_tx->device->dev,
-			     host->data->sg, host->data->sg_len,
+			     data->sg, data->sg_len,
 			     DMA_TO_DEVICE);
 
 	complete(&host->dma_complete);
@@ -222,13 +274,14 @@
 
 static void sh_mmcif_start_dma_rx(struct sh_mmcif_host *host)
 {
-	struct scatterlist *sg = host->data->sg;
+	struct mmc_data *data = host->mrq->data;
+	struct scatterlist *sg = data->sg;
 	struct dma_async_tx_descriptor *desc = NULL;
 	struct dma_chan *chan = host->chan_rx;
 	dma_cookie_t cookie = -EINVAL;
 	int ret;
 
-	ret = dma_map_sg(chan->device->dev, sg, host->data->sg_len,
+	ret = dma_map_sg(chan->device->dev, sg, data->sg_len,
 			 DMA_FROM_DEVICE);
 	if (ret > 0) {
 		host->dma_active = true;
@@ -244,7 +297,7 @@
 		dma_async_issue_pending(chan);
 	}
 	dev_dbg(&host->pd->dev, "%s(): mapped %d -> %d, cookie %d\n",
-		__func__, host->data->sg_len, ret, cookie);
+		__func__, data->sg_len, ret, cookie);
 
 	if (!desc) {
 		/* DMA failed, fall back to PIO */
@@ -265,18 +318,19 @@
 	}
 
 	dev_dbg(&host->pd->dev, "%s(): desc %p, cookie %d, sg[%d]\n", __func__,
-		desc, cookie, host->data->sg_len);
+		desc, cookie, data->sg_len);
 }
 
 static void sh_mmcif_start_dma_tx(struct sh_mmcif_host *host)
 {
-	struct scatterlist *sg = host->data->sg;
+	struct mmc_data *data = host->mrq->data;
+	struct scatterlist *sg = data->sg;
 	struct dma_async_tx_descriptor *desc = NULL;
 	struct dma_chan *chan = host->chan_tx;
 	dma_cookie_t cookie = -EINVAL;
 	int ret;
 
-	ret = dma_map_sg(chan->device->dev, sg, host->data->sg_len,
+	ret = dma_map_sg(chan->device->dev, sg, data->sg_len,
 			 DMA_TO_DEVICE);
 	if (ret > 0) {
 		host->dma_active = true;
@@ -292,7 +346,7 @@
 		dma_async_issue_pending(chan);
 	}
 	dev_dbg(&host->pd->dev, "%s(): mapped %d -> %d, cookie %d\n",
-		__func__, host->data->sg_len, ret, cookie);
+		__func__, data->sg_len, ret, cookie);
 
 	if (!desc) {
 		/* DMA failed, fall back to PIO */
@@ -399,7 +453,7 @@
 		sh_mmcif_bitset(host, MMCIF_CE_CLK_CTRL, CLK_SUP_PCLK);
 	else
 		sh_mmcif_bitset(host, MMCIF_CE_CLK_CTRL, CLK_CLEAR &
-			(ilog2(__rounddown_pow_of_two(host->clk / clk)) << 16));
+				((fls(host->clk / clk) - 1) << 16));
 
 	sh_mmcif_bitset(host, MMCIF_CE_CLK_CTRL, CLK_ENABLE);
 }
@@ -421,7 +475,7 @@
 static int sh_mmcif_error_manage(struct sh_mmcif_host *host)
 {
 	u32 state1, state2;
-	int ret, timeout = 10000000;
+	int ret, timeout;
 
 	host->sd_error = false;
 
@@ -433,155 +487,212 @@
 	if (state1 & STS1_CMDSEQ) {
 		sh_mmcif_bitset(host, MMCIF_CE_CMD_CTRL, CMD_CTRL_BREAK);
 		sh_mmcif_bitset(host, MMCIF_CE_CMD_CTRL, ~CMD_CTRL_BREAK);
-		while (1) {
-			timeout--;
-			if (timeout < 0) {
-				dev_err(&host->pd->dev,
-					"Forceed end of command sequence timeout err\n");
-				return -EIO;
-			}
+		for (timeout = 10000000; timeout; timeout--) {
 			if (!(sh_mmcif_readl(host->addr, MMCIF_CE_HOST_STS1)
-								& STS1_CMDSEQ))
+			      & STS1_CMDSEQ))
 				break;
 			mdelay(1);
 		}
+		if (!timeout) {
+			dev_err(&host->pd->dev,
+				"Forced end of command sequence timeout err\n");
+			return -EIO;
+		}
 		sh_mmcif_sync_reset(host);
 		dev_dbg(&host->pd->dev, "Forced end of command sequence\n");
 		return -EIO;
 	}
 
 	if (state2 & STS2_CRC_ERR) {
-		dev_dbg(&host->pd->dev, ": Happened CRC error\n");
+		dev_dbg(&host->pd->dev, ": CRC error\n");
 		ret = -EIO;
 	} else if (state2 & STS2_TIMEOUT_ERR) {
-		dev_dbg(&host->pd->dev, ": Happened Timeout error\n");
+		dev_dbg(&host->pd->dev, ": Timeout\n");
 		ret = -ETIMEDOUT;
 	} else {
-		dev_dbg(&host->pd->dev, ": Happened End/Index error\n");
+		dev_dbg(&host->pd->dev, ": End/Index error\n");
 		ret = -EIO;
 	}
 	return ret;
 }
 
-static int sh_mmcif_single_read(struct sh_mmcif_host *host,
-					struct mmc_request *mrq)
+static bool sh_mmcif_next_block(struct sh_mmcif_host *host, u32 *p)
 {
-	struct mmc_data *data = mrq->data;
-	long time;
-	u32 blocksize, i, *p = sg_virt(data->sg);
+	struct mmc_data *data = host->mrq->data;
+
+	host->sg_blkidx += host->blocksize;
+
+	/* data->sg->length must be a multiple of host->blocksize? */
+	BUG_ON(host->sg_blkidx > data->sg->length);
+
+	if (host->sg_blkidx == data->sg->length) {
+		host->sg_blkidx = 0;
+		if (++host->sg_idx < data->sg_len)
+			host->pio_ptr = sg_virt(++data->sg);
+	} else {
+		host->pio_ptr = p;
+	}
+
+	if (host->sg_idx == data->sg_len)
+		return false;
+
+	return true;
+}
+
+static void sh_mmcif_single_read(struct sh_mmcif_host *host,
+				 struct mmc_request *mrq)
+{
+	host->blocksize = (sh_mmcif_readl(host->addr, MMCIF_CE_BLOCK_SET) &
+			   BLOCK_SIZE_MASK) + 3;
+
+	host->wait_for = MMCIF_WAIT_FOR_READ;
+	schedule_delayed_work(&host->timeout_work, host->timeout);
 
 	/* buf read enable */
 	sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFREN);
-	time = wait_for_completion_interruptible_timeout(&host->intr_wait,
-			host->timeout);
-	if (time <= 0 || host->sd_error)
-		return sh_mmcif_error_manage(host);
+}
 
-	blocksize = (BLOCK_SIZE_MASK &
-			sh_mmcif_readl(host->addr, MMCIF_CE_BLOCK_SET)) + 3;
-	for (i = 0; i < blocksize / 4; i++)
+static bool sh_mmcif_read_block(struct sh_mmcif_host *host)
+{
+	struct mmc_data *data = host->mrq->data;
+	u32 *p = sg_virt(data->sg);
+	int i;
+
+	if (host->sd_error) {
+		data->error = sh_mmcif_error_manage(host);
+		return false;
+	}
+
+	for (i = 0; i < host->blocksize / 4; i++)
 		*p++ = sh_mmcif_readl(host->addr, MMCIF_CE_DATA);
 
 	/* buffer read end */
 	sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFRE);
-	time = wait_for_completion_interruptible_timeout(&host->intr_wait,
-			host->timeout);
-	if (time <= 0 || host->sd_error)
-		return sh_mmcif_error_manage(host);
+	host->wait_for = MMCIF_WAIT_FOR_READ_END;
 
-	return 0;
+	return true;
 }
 
-static int sh_mmcif_multi_read(struct sh_mmcif_host *host,
-					struct mmc_request *mrq)
+static void sh_mmcif_multi_read(struct sh_mmcif_host *host,
+				struct mmc_request *mrq)
 {
 	struct mmc_data *data = mrq->data;
-	long time;
-	u32 blocksize, i, j, sec, *p;
 
-	blocksize = BLOCK_SIZE_MASK & sh_mmcif_readl(host->addr,
-						     MMCIF_CE_BLOCK_SET);
-	for (j = 0; j < data->sg_len; j++) {
-		p = sg_virt(data->sg);
-		for (sec = 0; sec < data->sg->length / blocksize; sec++) {
-			sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFREN);
-			/* buf read enable */
-			time = wait_for_completion_interruptible_timeout(&host->intr_wait,
-				host->timeout);
+	if (!data->sg_len || !data->sg->length)
+		return;
 
-			if (time <= 0 || host->sd_error)
-				return sh_mmcif_error_manage(host);
+	host->blocksize = sh_mmcif_readl(host->addr, MMCIF_CE_BLOCK_SET) &
+		BLOCK_SIZE_MASK;
 
-			for (i = 0; i < blocksize / 4; i++)
-				*p++ = sh_mmcif_readl(host->addr,
-						      MMCIF_CE_DATA);
-		}
-		if (j < data->sg_len - 1)
-			data->sg++;
+	host->wait_for = MMCIF_WAIT_FOR_MREAD;
+	host->sg_idx = 0;
+	host->sg_blkidx = 0;
+	host->pio_ptr = sg_virt(data->sg);
+	schedule_delayed_work(&host->timeout_work, host->timeout);
+	sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFREN);
+}
+
+static bool sh_mmcif_mread_block(struct sh_mmcif_host *host)
+{
+	struct mmc_data *data = host->mrq->data;
+	u32 *p = host->pio_ptr;
+	int i;
+
+	if (host->sd_error) {
+		data->error = sh_mmcif_error_manage(host);
+		return false;
 	}
-	return 0;
+
+	BUG_ON(!data->sg->length);
+
+	for (i = 0; i < host->blocksize / 4; i++)
+		*p++ = sh_mmcif_readl(host->addr, MMCIF_CE_DATA);
+
+	if (!sh_mmcif_next_block(host, p))
+		return false;
+
+	schedule_delayed_work(&host->timeout_work, host->timeout);
+	sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFREN);
+
+	return true;
 }
 
-static int sh_mmcif_single_write(struct sh_mmcif_host *host,
+static void sh_mmcif_single_write(struct sh_mmcif_host *host,
 					struct mmc_request *mrq)
 {
-	struct mmc_data *data = mrq->data;
-	long time;
-	u32 blocksize, i, *p = sg_virt(data->sg);
+	host->blocksize = (sh_mmcif_readl(host->addr, MMCIF_CE_BLOCK_SET) &
+			   BLOCK_SIZE_MASK) + 3;
 
-	sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFWEN);
+	host->wait_for = MMCIF_WAIT_FOR_WRITE;
+	schedule_delayed_work(&host->timeout_work, host->timeout);
 
 	/* buf write enable */
-	time = wait_for_completion_interruptible_timeout(&host->intr_wait,
-			host->timeout);
-	if (time <= 0 || host->sd_error)
-		return sh_mmcif_error_manage(host);
+	sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFWEN);
+}
 
-	blocksize = (BLOCK_SIZE_MASK &
-			sh_mmcif_readl(host->addr, MMCIF_CE_BLOCK_SET)) + 3;
-	for (i = 0; i < blocksize / 4; i++)
+static bool sh_mmcif_write_block(struct sh_mmcif_host *host)
+{
+	struct mmc_data *data = host->mrq->data;
+	u32 *p = sg_virt(data->sg);
+	int i;
+
+	if (host->sd_error) {
+		data->error = sh_mmcif_error_manage(host);
+		return false;
+	}
+
+	for (i = 0; i < host->blocksize / 4; i++)
 		sh_mmcif_writel(host->addr, MMCIF_CE_DATA, *p++);
 
 	/* buffer write end */
 	sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MDTRANE);
+	host->wait_for = MMCIF_WAIT_FOR_WRITE_END;
 
-	time = wait_for_completion_interruptible_timeout(&host->intr_wait,
-			host->timeout);
-	if (time <= 0 || host->sd_error)
-		return sh_mmcif_error_manage(host);
-
-	return 0;
+	return true;
 }
 
-static int sh_mmcif_multi_write(struct sh_mmcif_host *host,
-						struct mmc_request *mrq)
+static void sh_mmcif_multi_write(struct sh_mmcif_host *host,
+				struct mmc_request *mrq)
 {
 	struct mmc_data *data = mrq->data;
-	long time;
-	u32 i, sec, j, blocksize, *p;
 
-	blocksize = BLOCK_SIZE_MASK & sh_mmcif_readl(host->addr,
-						     MMCIF_CE_BLOCK_SET);
+	if (!data->sg_len || !data->sg->length)
+		return;
 
-	for (j = 0; j < data->sg_len; j++) {
-		p = sg_virt(data->sg);
-		for (sec = 0; sec < data->sg->length / blocksize; sec++) {
-			sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFWEN);
-			/* buf write enable*/
-			time = wait_for_completion_interruptible_timeout(&host->intr_wait,
-				host->timeout);
+	host->blocksize = sh_mmcif_readl(host->addr, MMCIF_CE_BLOCK_SET) &
+		BLOCK_SIZE_MASK;
 
-			if (time <= 0 || host->sd_error)
-				return sh_mmcif_error_manage(host);
+	host->wait_for = MMCIF_WAIT_FOR_MWRITE;
+	host->sg_idx = 0;
+	host->sg_blkidx = 0;
+	host->pio_ptr = sg_virt(data->sg);
+	schedule_delayed_work(&host->timeout_work, host->timeout);
+	sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFWEN);
+}
 
-			for (i = 0; i < blocksize / 4; i++)
-				sh_mmcif_writel(host->addr,
-						MMCIF_CE_DATA, *p++);
-		}
-		if (j < data->sg_len - 1)
-			data->sg++;
+static bool sh_mmcif_mwrite_block(struct sh_mmcif_host *host)
+{
+	struct mmc_data *data = host->mrq->data;
+	u32 *p = host->pio_ptr;
+	int i;
+
+	if (host->sd_error) {
+		data->error = sh_mmcif_error_manage(host);
+		return false;
 	}
-	return 0;
+
+	BUG_ON(!data->sg->length);
+
+	for (i = 0; i < host->blocksize / 4; i++)
+		sh_mmcif_writel(host->addr, MMCIF_CE_DATA, *p++);
+
+	if (!sh_mmcif_next_block(host, p))
+		return false;
+
+	schedule_delayed_work(&host->timeout_work, host->timeout);
+	sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MBUFWEN);
+
+	return true;
 }
 
 static void sh_mmcif_get_response(struct sh_mmcif_host *host,
@@ -603,8 +714,11 @@
 }
 
 static u32 sh_mmcif_set_cmd(struct sh_mmcif_host *host,
-		struct mmc_request *mrq, struct mmc_command *cmd, u32 opc)
+			    struct mmc_request *mrq)
 {
+	struct mmc_data *data = mrq->data;
+	struct mmc_command *cmd = mrq->cmd;
+	u32 opc = cmd->opcode;
 	u32 tmp = 0;
 
 	/* Response Type check */
@@ -636,7 +750,7 @@
 		break;
 	}
 	/* WDAT / DATW */
-	if (host->data) {
+	if (data) {
 		tmp |= CMD_SET_WDAT;
 		switch (host->bus_width) {
 		case MMC_BUS_WIDTH_1:
@@ -660,7 +774,7 @@
 	if (opc == MMC_READ_MULTIPLE_BLOCK || opc == MMC_WRITE_MULTIPLE_BLOCK) {
 		tmp |= CMD_SET_CMLTE | CMD_SET_CMD12EN;
 		sh_mmcif_bitset(host, MMCIF_CE_BLOCK_SET,
-					mrq->data->blocks << 16);
+				data->blocks << 16);
 	}
 	/* RIDXC[1:0] check bits */
 	if (opc == MMC_SEND_OP_COND || opc == MMC_ALL_SEND_CID ||
@@ -674,68 +788,60 @@
 		opc == MMC_SEND_CSD || opc == MMC_SEND_CID)
 		tmp |= CMD_SET_CRC7C_INTERNAL;
 
-	return opc = ((opc << 24) | tmp);
+	return (opc << 24) | tmp;
 }
 
 static int sh_mmcif_data_trans(struct sh_mmcif_host *host,
-				struct mmc_request *mrq, u32 opc)
+			       struct mmc_request *mrq, u32 opc)
 {
-	int ret;
-
 	switch (opc) {
 	case MMC_READ_MULTIPLE_BLOCK:
-		ret = sh_mmcif_multi_read(host, mrq);
-		break;
+		sh_mmcif_multi_read(host, mrq);
+		return 0;
 	case MMC_WRITE_MULTIPLE_BLOCK:
-		ret = sh_mmcif_multi_write(host, mrq);
-		break;
+		sh_mmcif_multi_write(host, mrq);
+		return 0;
 	case MMC_WRITE_BLOCK:
-		ret = sh_mmcif_single_write(host, mrq);
-		break;
+		sh_mmcif_single_write(host, mrq);
+		return 0;
 	case MMC_READ_SINGLE_BLOCK:
 	case MMC_SEND_EXT_CSD:
-		ret = sh_mmcif_single_read(host, mrq);
-		break;
+		sh_mmcif_single_read(host, mrq);
+		return 0;
 	default:
 		dev_err(&host->pd->dev, "UNSUPPORTED CMD = d'%08d\n", opc);
-		ret = -EINVAL;
-		break;
+		return -EINVAL;
 	}
-	return ret;
 }
 
 static void sh_mmcif_start_cmd(struct sh_mmcif_host *host,
-			struct mmc_request *mrq, struct mmc_command *cmd)
+			       struct mmc_request *mrq)
 {
-	long time;
-	int ret = 0, mask = 0;
+	struct mmc_command *cmd = mrq->cmd;
 	u32 opc = cmd->opcode;
+	u32 mask;
 
 	switch (opc) {
-	/* respons busy check */
+	/* response busy check */
 	case MMC_SWITCH:
 	case MMC_STOP_TRANSMISSION:
 	case MMC_SET_WRITE_PROT:
 	case MMC_CLR_WRITE_PROT:
 	case MMC_ERASE:
 	case MMC_GEN_CMD:
-		mask = MASK_MRBSYE;
+		mask = MASK_START_CMD | MASK_MRBSYE;
 		break;
 	default:
-		mask = MASK_MCRSPE;
+		mask = MASK_START_CMD | MASK_MCRSPE;
 		break;
 	}
-	mask |=	MASK_MCMDVIO | MASK_MBUFVIO | MASK_MWDATERR |
-		MASK_MRDATERR | MASK_MRIDXERR | MASK_MRSPERR |
-		MASK_MCCSTO | MASK_MCRCSTO | MASK_MWDATTO |
-		MASK_MRDATTO | MASK_MRBSYTO | MASK_MRSPTO;
 
-	if (host->data) {
+	if (mrq->data) {
 		sh_mmcif_writel(host->addr, MMCIF_CE_BLOCK_SET, 0);
 		sh_mmcif_writel(host->addr, MMCIF_CE_BLOCK_SET,
 				mrq->data->blksz);
 	}
-	opc = sh_mmcif_set_cmd(host, mrq, cmd, opc);
+	opc = sh_mmcif_set_cmd(host, mrq);
 
 	sh_mmcif_writel(host->addr, MMCIF_CE_INT, 0xD80430C0);
 	sh_mmcif_writel(host->addr, MMCIF_CE_INT_MASK, mask);
@@ -744,80 +850,28 @@
 	/* set cmd */
 	sh_mmcif_writel(host->addr, MMCIF_CE_CMD_SET, opc);
 
-	time = wait_for_completion_interruptible_timeout(&host->intr_wait,
-		host->timeout);
-	if (time <= 0) {
-		cmd->error = sh_mmcif_error_manage(host);
-		return;
-	}
-	if (host->sd_error) {
-		switch (cmd->opcode) {
-		case MMC_ALL_SEND_CID:
-		case MMC_SELECT_CARD:
-		case MMC_APP_CMD:
-			cmd->error = -ETIMEDOUT;
-			break;
-		default:
-			dev_dbg(&host->pd->dev, "Cmd(d'%d) err\n",
-					cmd->opcode);
-			cmd->error = sh_mmcif_error_manage(host);
-			break;
-		}
-		host->sd_error = false;
-		return;
-	}
-	if (!(cmd->flags & MMC_RSP_PRESENT)) {
-		cmd->error = 0;
-		return;
-	}
-	sh_mmcif_get_response(host, cmd);
-	if (host->data) {
-		if (!host->dma_active) {
-			ret = sh_mmcif_data_trans(host, mrq, cmd->opcode);
-		} else {
-			long time =
-				wait_for_completion_interruptible_timeout(&host->dma_complete,
-									  host->timeout);
-			if (!time)
-				ret = -ETIMEDOUT;
-			else if (time < 0)
-				ret = time;
-			sh_mmcif_bitclr(host, MMCIF_CE_BUF_ACC,
-					BUF_ACC_DMAREN | BUF_ACC_DMAWEN);
-			host->dma_active = false;
-		}
-		if (ret < 0)
-			mrq->data->bytes_xfered = 0;
-		else
-			mrq->data->bytes_xfered =
-				mrq->data->blocks * mrq->data->blksz;
-	}
-	cmd->error = ret;
+	host->wait_for = MMCIF_WAIT_FOR_CMD;
+	schedule_delayed_work(&host->timeout_work, host->timeout);
 }
 
 static void sh_mmcif_stop_cmd(struct sh_mmcif_host *host,
-		struct mmc_request *mrq, struct mmc_command *cmd)
+			      struct mmc_request *mrq)
 {
-	long time;
-
-	if (mrq->cmd->opcode == MMC_READ_MULTIPLE_BLOCK)
+	switch (mrq->cmd->opcode) {
+	case MMC_READ_MULTIPLE_BLOCK:
 		sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MCMD12DRE);
-	else if (mrq->cmd->opcode == MMC_WRITE_MULTIPLE_BLOCK)
+		break;
+	case MMC_WRITE_MULTIPLE_BLOCK:
 		sh_mmcif_bitset(host, MMCIF_CE_INT_MASK, MASK_MCMD12RBE);
-	else {
+		break;
+	default:
 		dev_err(&host->pd->dev, "unsupported stop cmd\n");
-		cmd->error = sh_mmcif_error_manage(host);
+		mrq->stop->error = sh_mmcif_error_manage(host);
 		return;
 	}
 
-	time = wait_for_completion_interruptible_timeout(&host->intr_wait,
-			host->timeout);
-	if (time <= 0 || host->sd_error) {
-		cmd->error = sh_mmcif_error_manage(host);
-		return;
-	}
-	sh_mmcif_get_cmd12response(host, cmd);
-	cmd->error = 0;
+	host->wait_for = MMCIF_WAIT_FOR_STOP;
+	schedule_delayed_work(&host->timeout_work, host->timeout);
 }
 
 static void sh_mmcif_request(struct mmc_host *mmc, struct mmc_request *mrq)
@@ -856,23 +910,10 @@
 	default:
 		break;
 	}
-	host->data = mrq->data;
-	if (mrq->data) {
-		if (mrq->data->flags & MMC_DATA_READ) {
-			if (host->chan_rx)
-				sh_mmcif_start_dma_rx(host);
-		} else {
-			if (host->chan_tx)
-				sh_mmcif_start_dma_tx(host);
-		}
-	}
-	sh_mmcif_start_cmd(host, mrq, mrq->cmd);
-	host->data = NULL;
 
-	if (!mrq->cmd->error && mrq->stop)
-		sh_mmcif_stop_cmd(host, mrq, mrq->stop);
-	host->state = STATE_IDLE;
-	mmc_request_done(mmc, mrq);
+	host->mrq = mrq;
+
+	sh_mmcif_start_cmd(host, mrq);
 }
 
 static void sh_mmcif_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
@@ -947,9 +988,156 @@
 	.get_cd		= sh_mmcif_get_cd,
 };
 
-static void sh_mmcif_detect(struct mmc_host *mmc)
+static bool sh_mmcif_end_cmd(struct sh_mmcif_host *host)
 {
-	mmc_detect_change(mmc, 0);
+	struct mmc_command *cmd = host->mrq->cmd;
+	struct mmc_data *data = host->mrq->data;
+	long time;
+
+	if (host->sd_error) {
+		switch (cmd->opcode) {
+		case MMC_ALL_SEND_CID:
+		case MMC_SELECT_CARD:
+		case MMC_APP_CMD:
+			cmd->error = -ETIMEDOUT;
+			host->sd_error = false;
+			break;
+		default:
+			cmd->error = sh_mmcif_error_manage(host);
+			dev_dbg(&host->pd->dev, "Cmd(d'%d) error %d\n",
+				cmd->opcode, cmd->error);
+			break;
+		}
+		return false;
+	}
+	if (!(cmd->flags & MMC_RSP_PRESENT)) {
+		cmd->error = 0;
+		return false;
+	}
+
+	sh_mmcif_get_response(host, cmd);
+
+	if (!data)
+		return false;
+
+	if (data->flags & MMC_DATA_READ) {
+		if (host->chan_rx)
+			sh_mmcif_start_dma_rx(host);
+	} else {
+		if (host->chan_tx)
+			sh_mmcif_start_dma_tx(host);
+	}
+
+	if (!host->dma_active) {
+		data->error = sh_mmcif_data_trans(host, host->mrq, cmd->opcode);
+		if (!data->error)
+			return true;
+		return false;
+	}
+
+	/* Running in the IRQ thread, can sleep */
+	time = wait_for_completion_interruptible_timeout(&host->dma_complete,
+							 host->timeout);
+	if (host->sd_error) {
+		dev_err(host->mmc->parent,
+			"Error IRQ while waiting for DMA completion!\n");
+		/* Woken up by an error IRQ: abort DMA */
+		if (data->flags & MMC_DATA_READ)
+			dmaengine_terminate_all(host->chan_rx);
+		else
+			dmaengine_terminate_all(host->chan_tx);
+		data->error = sh_mmcif_error_manage(host);
+	} else if (!time) {
+		data->error = -ETIMEDOUT;
+	} else if (time < 0) {
+		data->error = time;
+	}
+	sh_mmcif_bitclr(host, MMCIF_CE_BUF_ACC,
+			BUF_ACC_DMAREN | BUF_ACC_DMAWEN);
+	host->dma_active = false;
+
+	if (data->error)
+		data->bytes_xfered = 0;
+
+	return false;
+}
+
+static irqreturn_t sh_mmcif_irqt(int irq, void *dev_id)
+{
+	struct sh_mmcif_host *host = dev_id;
+	struct mmc_request *mrq = host->mrq;
+	struct mmc_data *data = mrq->data;
+
+	cancel_delayed_work_sync(&host->timeout_work);
+
+	/*
+	 * All handlers return true, if processing continues, and false, if the
+	 * request has to be completed - successfully or not
+	 */
+	switch (host->wait_for) {
+	case MMCIF_WAIT_FOR_REQUEST:
+		/* We're too late, the timeout has already kicked in */
+		return IRQ_HANDLED;
+	case MMCIF_WAIT_FOR_CMD:
+		if (sh_mmcif_end_cmd(host))
+			/* Wait for data */
+			return IRQ_HANDLED;
+		break;
+	case MMCIF_WAIT_FOR_MREAD:
+		if (sh_mmcif_mread_block(host))
+			/* Wait for more data */
+			return IRQ_HANDLED;
+		break;
+	case MMCIF_WAIT_FOR_READ:
+		if (sh_mmcif_read_block(host))
+			/* Wait for data end */
+			return IRQ_HANDLED;
+		break;
+	case MMCIF_WAIT_FOR_MWRITE:
+		if (sh_mmcif_mwrite_block(host))
+			/* Wait data to write */
+			return IRQ_HANDLED;
+		break;
+	case MMCIF_WAIT_FOR_WRITE:
+		if (sh_mmcif_write_block(host))
+			/* Wait for data end */
+			return IRQ_HANDLED;
+		break;
+	case MMCIF_WAIT_FOR_STOP:
+		if (host->sd_error) {
+			mrq->stop->error = sh_mmcif_error_manage(host);
+			break;
+		}
+		sh_mmcif_get_cmd12response(host, mrq->stop);
+		mrq->stop->error = 0;
+		break;
+	case MMCIF_WAIT_FOR_READ_END:
+	case MMCIF_WAIT_FOR_WRITE_END:
+		if (host->sd_error)
+			data->error = sh_mmcif_error_manage(host);
+		break;
+	default:
+		BUG();
+	}
+
+	if (host->wait_for != MMCIF_WAIT_FOR_STOP) {
+		if (!mrq->cmd->error && data && !data->error)
+			data->bytes_xfered =
+				data->blocks * data->blksz;
+
+		if (mrq->stop && !mrq->cmd->error && (!data || !data->error)) {
+			sh_mmcif_stop_cmd(host, mrq);
+			if (!mrq->stop->error)
+				return IRQ_HANDLED;
+		}
+	}
+
+	host->wait_for = MMCIF_WAIT_FOR_REQUEST;
+	host->state = STATE_IDLE;
+	host->mrq = NULL;
+	mmc_request_done(host->mmc, mrq);
+
+	return IRQ_HANDLED;
 }
 
 static irqreturn_t sh_mmcif_intr(int irq, void *dev_id)
@@ -960,7 +1148,12 @@
 
 	state = sh_mmcif_readl(host->addr, MMCIF_CE_INT);
 
-	if (state & INT_RBSYE) {
+	if (state & INT_ERR_STS) {
+		/* error interrupts - process first */
+		sh_mmcif_writel(host->addr, MMCIF_CE_INT, ~state);
+		sh_mmcif_bitclr(host, MMCIF_CE_INT_MASK, state);
+		err = 1;
+	} else if (state & INT_RBSYE) {
 		sh_mmcif_writel(host->addr, MMCIF_CE_INT,
 				~(INT_RBSYE | INT_CRSPE));
 		sh_mmcif_bitclr(host, MMCIF_CE_INT_MASK, MASK_MRBSYE);
@@ -988,11 +1181,6 @@
 		sh_mmcif_writel(host->addr, MMCIF_CE_INT,
 				~(INT_CMD12RBE | INT_CMD12CRE));
 		sh_mmcif_bitclr(host, MMCIF_CE_INT_MASK, MASK_MCMD12RBE);
-	} else if (state & INT_ERR_STS) {
-		/* err interrupts */
-		sh_mmcif_writel(host->addr, MMCIF_CE_INT, ~state);
-		sh_mmcif_bitclr(host, MMCIF_CE_INT_MASK, state);
-		err = 1;
 	} else {
 		dev_dbg(&host->pd->dev, "Unsupported interrupt: 0x%x\n", state);
 		sh_mmcif_writel(host->addr, MMCIF_CE_INT, ~state);
@@ -1003,14 +1191,57 @@
 		host->sd_error = true;
 		dev_dbg(&host->pd->dev, "int err state = %08x\n", state);
 	}
-	if (state & ~(INT_CMD12RBE | INT_CMD12CRE))
-		complete(&host->intr_wait);
-	else
+	if (state & ~(INT_CMD12RBE | INT_CMD12CRE)) {
+		if (!host->dma_active)
+			return IRQ_WAKE_THREAD;
+		else if (host->sd_error)
+			mmcif_dma_complete(host);
+	} else {
 		dev_dbg(&host->pd->dev, "Unexpected IRQ 0x%x\n", state);
+	}
 
 	return IRQ_HANDLED;
 }
 
+static void mmcif_timeout_work(struct work_struct *work)
+{
+	struct delayed_work *d = container_of(work, struct delayed_work, work);
+	struct sh_mmcif_host *host = container_of(d, struct sh_mmcif_host, timeout_work);
+	struct mmc_request *mrq = host->mrq;
+
+	if (host->dying)
+		/* Don't run after mmc_remove_host() */
+		return;
+
+	/*
+	 * Handle races with cancel_delayed_work(), unless
+	 * cancel_delayed_work_sync() is used
+	 */
+	switch (host->wait_for) {
+	case MMCIF_WAIT_FOR_CMD:
+		mrq->cmd->error = sh_mmcif_error_manage(host);
+		break;
+	case MMCIF_WAIT_FOR_STOP:
+		mrq->stop->error = sh_mmcif_error_manage(host);
+		break;
+	case MMCIF_WAIT_FOR_MREAD:
+	case MMCIF_WAIT_FOR_MWRITE:
+	case MMCIF_WAIT_FOR_READ:
+	case MMCIF_WAIT_FOR_WRITE:
+	case MMCIF_WAIT_FOR_READ_END:
+	case MMCIF_WAIT_FOR_WRITE_END:
+		mrq->data->error = sh_mmcif_error_manage(host);
+		break;
+	default:
+		BUG();
+	}
+
+	host->state = STATE_IDLE;
+	host->wait_for = MMCIF_WAIT_FOR_REQUEST;
+	host->mrq = NULL;
+	mmc_request_done(host->mmc, mrq);
+}
+
 static int __devinit sh_mmcif_probe(struct platform_device *pdev)
 {
 	int ret = 0, irq[2];
@@ -1064,7 +1295,6 @@
 	host->clk = clk_get_rate(host->hclk);
 	host->pd = pdev;
 
-	init_completion(&host->intr_wait);
 	spin_lock_init(&host->lock);
 
 	mmc->ops = &sh_mmcif_ops;
@@ -1101,19 +1331,21 @@
 
 	sh_mmcif_writel(host->addr, MMCIF_CE_INT_MASK, MASK_ALL);
 
-	ret = request_irq(irq[0], sh_mmcif_intr, 0, "sh_mmc:error", host);
+	ret = request_threaded_irq(irq[0], sh_mmcif_intr, sh_mmcif_irqt, 0, "sh_mmc:error", host);
 	if (ret) {
 		dev_err(&pdev->dev, "request_irq error (sh_mmc:error)\n");
 		goto clean_up3;
 	}
-	ret = request_irq(irq[1], sh_mmcif_intr, 0, "sh_mmc:int", host);
+	ret = request_threaded_irq(irq[1], sh_mmcif_intr, sh_mmcif_irqt, 0, "sh_mmc:int", host);
 	if (ret) {
 		free_irq(irq[0], host);
 		dev_err(&pdev->dev, "request_irq error (sh_mmc:int)\n");
 		goto clean_up3;
 	}
 
-	sh_mmcif_detect(host->mmc);
+	INIT_DELAYED_WORK(&host->timeout_work, mmcif_timeout_work);
+
+	mmc_detect_change(host->mmc, 0);
 
 	dev_info(&pdev->dev, "driver version %s\n", DRIVER_VERSION);
 	dev_dbg(&pdev->dev, "chip ver H'%04x\n",
@@ -1139,11 +1371,19 @@
 	struct sh_mmcif_host *host = platform_get_drvdata(pdev);
 	int irq[2];
 
+	host->dying = true;
 	pm_runtime_get_sync(&pdev->dev);
 
 	mmc_remove_host(host->mmc);
 	sh_mmcif_writel(host->addr, MMCIF_CE_INT_MASK, MASK_ALL);
 
+	/*
+	 * FIXME: cancel_delayed_work(_sync)() and free_irq() race with the
+	 * mmc_remove_host() call above. But swapping order doesn't help either
+	 * (a query on the linux-mmc mailing list didn't bring any replies).
+	 */
+	cancel_delayed_work_sync(&host->timeout_work);
+
 	if (host->addr)
 		iounmap(host->addr);
 
@@ -1206,19 +1446,7 @@
 	},
 };
 
-static int __init sh_mmcif_init(void)
-{
-	return platform_driver_register(&sh_mmcif_driver);
-}
-
-static void __exit sh_mmcif_exit(void)
-{
-	platform_driver_unregister(&sh_mmcif_driver);
-}
-
-module_init(sh_mmcif_init);
-module_exit(sh_mmcif_exit);
-
+module_platform_driver(sh_mmcif_driver);
 
 MODULE_DESCRIPTION("SuperH on-chip MMC/eMMC interface driver");
 MODULE_LICENSE("GPL");

diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c
index 41ae646..58da3c4 100644
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c

@@ -282,18 +282,7 @@
 	.remove		= __devexit_p(sh_mobile_sdhi_remove),
 };
 
-static int __init sh_mobile_sdhi_init(void)
-{
-	return platform_driver_register(&sh_mobile_sdhi_driver);
-}
-
-static void __exit sh_mobile_sdhi_exit(void)
-{
-	platform_driver_unregister(&sh_mobile_sdhi_driver);
-}
-
-module_init(sh_mobile_sdhi_init);
-module_exit(sh_mobile_sdhi_exit);
+module_platform_driver(sh_mobile_sdhi_driver);
 
 MODULE_DESCRIPTION("SuperH Mobile SDHI driver");
 MODULE_AUTHOR("Magnus Damm");

diff --git a/drivers/mmc/host/tifm_sd.c b/drivers/mmc/host/tifm_sd.c
index f70d046..fc00081 100644
--- a/drivers/mmc/host/tifm_sd.c
+++ b/drivers/mmc/host/tifm_sd.c

@@ -118,7 +118,7 @@
 	unsigned char *buf;
 	unsigned int pos = 0, val;
 
-	buf = kmap_atomic(pg, KM_BIO_DST_IRQ) + off;
+	buf = kmap_atomic(pg) + off;
 	if (host->cmd_flags & DATA_CARRY) {
 		buf[pos++] = host->bounce_buf_data[0];
 		host->cmd_flags &= ~DATA_CARRY;
@@ -134,7 +134,7 @@
 		}
 		buf[pos++] = (val >> 8) & 0xff;
 	}
-	kunmap_atomic(buf - off, KM_BIO_DST_IRQ);
+	kunmap_atomic(buf - off);
 }
 
 static void tifm_sd_write_fifo(struct tifm_sd *host, struct page *pg,
@@ -144,7 +144,7 @@
 	unsigned char *buf;
 	unsigned int pos = 0, val;
 
-	buf = kmap_atomic(pg, KM_BIO_SRC_IRQ) + off;
+	buf = kmap_atomic(pg) + off;
 	if (host->cmd_flags & DATA_CARRY) {
 		val = host->bounce_buf_data[0] | ((buf[pos++] << 8) & 0xff00);
 		writel(val, sock->addr + SOCK_MMCSD_DATA);
@@ -161,7 +161,7 @@
 		val |= (buf[pos++] << 8) & 0xff00;
 		writel(val, sock->addr + SOCK_MMCSD_DATA);
 	}
-	kunmap_atomic(buf - off, KM_BIO_SRC_IRQ);
+	kunmap_atomic(buf - off);
 }
 
 static void tifm_sd_transfer_data(struct tifm_sd *host)
@@ -212,13 +212,13 @@
 			      struct page *src, unsigned int src_off,
 			      unsigned int count)
 {
-	unsigned char *src_buf = kmap_atomic(src, KM_BIO_SRC_IRQ) + src_off;
-	unsigned char *dst_buf = kmap_atomic(dst, KM_BIO_DST_IRQ) + dst_off;
+	unsigned char *src_buf = kmap_atomic(src) + src_off;
+	unsigned char *dst_buf = kmap_atomic(dst) + dst_off;
 
 	memcpy(dst_buf, src_buf, count);
 
-	kunmap_atomic(dst_buf - dst_off, KM_BIO_DST_IRQ);
-	kunmap_atomic(src_buf - src_off, KM_BIO_SRC_IRQ);
+	kunmap_atomic(dst_buf - dst_off);
+	kunmap_atomic(src_buf - src_off);
 }
 
 static void tifm_sd_bounce_block(struct tifm_sd *host, struct mmc_data *r_data)

diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index a4ea102..113ce6c 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c

@@ -138,19 +138,7 @@
 	.resume = tmio_mmc_resume,
 };
 
-
-static int __init tmio_mmc_init(void)
-{
-	return platform_driver_register(&tmio_mmc_driver);
-}
-
-static void __exit tmio_mmc_exit(void)
-{
-	platform_driver_unregister(&tmio_mmc_driver);
-}
-
-module_init(tmio_mmc_init);
-module_exit(tmio_mmc_exit);
+module_platform_driver(tmio_mmc_driver);
 
 MODULE_DESCRIPTION("Toshiba TMIO SD/MMC driver");
 MODULE_AUTHOR("Ian Molton <spyro@f2s.com>");

diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
index 3020f98..a95e6d9 100644
--- a/drivers/mmc/host/tmio_mmc.h
+++ b/drivers/mmc/host/tmio_mmc.h

@@ -105,13 +105,13 @@
 					 unsigned long *flags)
 {
 	local_irq_save(*flags);
-	return kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset;
+	return kmap_atomic(sg_page(sg)) + sg->offset;
 }
 
 static inline void tmio_mmc_kunmap_atomic(struct scatterlist *sg,
 					  unsigned long *flags, void *virt)
 {
-	kunmap_atomic(virt - sg->offset, KM_BIO_SRC_IRQ);
+	kunmap_atomic(virt - sg->offset);
 	local_irq_restore(*flags);
 }
 

diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index 4208b39..abad01b 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c

@@ -800,8 +800,7 @@
 	} else if (ios->power_mode != MMC_POWER_UP) {
 		if (host->set_pwr && ios->power_mode == MMC_POWER_OFF)
 			host->set_pwr(host->pdev, 0);
-		if ((pdata->flags & TMIO_MMC_HAS_COLD_CD) &&
-		    pdata->power) {
+		if (pdata->power) {
 			pdata->power = false;
 			pm_runtime_put(&host->pdev->dev);
 		}
@@ -915,6 +914,23 @@
 	if (ret < 0)
 		goto pm_disable;
 
+	/*
+	 * There are 4 different scenarios for the card detection:
+	 *  1) an external gpio irq handles the cd (best for power savings)
+	 *  2) internal sdhi irq handles the cd
+	 *  3) a worker thread polls the sdhi - indicated by MMC_CAP_NEEDS_POLL
+	 *  4) the medium is non-removable - indicated by MMC_CAP_NONREMOVABLE
+	 *
+	 *  While we increment the rtpm counter for all scenarios when the mmc
+	 *  core activates us by calling an appropriate set_ios(), we must
+	 *  additionally ensure that in case 2) the tmio mmc hardware stays
+	 *  powered on during runtime for the card detection to work.
+	 */
+	if (!(pdata->flags & TMIO_MMC_HAS_COLD_CD
+		|| mmc->caps & MMC_CAP_NEEDS_POLL
+		|| mmc->caps & MMC_CAP_NONREMOVABLE))
+		pm_runtime_get_noresume(&pdev->dev);
+
 	tmio_mmc_clk_stop(_host);
 	tmio_mmc_reset(_host);
 
@@ -933,12 +949,6 @@
 	/* See if we also get DMA */
 	tmio_mmc_request_dma(_host, pdata);
 
-	/* We have to keep the device powered for its card detection to work */
-	if (!(pdata->flags & TMIO_MMC_HAS_COLD_CD)) {
-		pdata->power = true;
-		pm_runtime_get_noresume(&pdev->dev);
-	}
-
 	mmc_add_host(mmc);
 
 	/* Unmask the IRQs we want to know about */
@@ -974,7 +984,9 @@
 	 * the controller, the runtime PM is suspended and pdata->power == false,
 	 * so, our .runtime_resume() will not try to detect a card in the slot.
 	 */
-	if (host->pdata->flags & TMIO_MMC_HAS_COLD_CD)
+	if (host->pdata->flags & TMIO_MMC_HAS_COLD_CD
+		|| host->mmc->caps & MMC_CAP_NEEDS_POLL
+		|| host->mmc->caps & MMC_CAP_NONREMOVABLE)
 		pm_runtime_get_sync(&pdev->dev);
 
 	mmc_remove_host(host->mmc);

diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index db8e827..3ce99e0 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c

@@ -315,8 +315,7 @@
 	char *dst;
 
 	if (reason != KMSG_DUMP_OOPS &&
-	    reason != KMSG_DUMP_PANIC &&
-	    reason != KMSG_DUMP_KEXEC)
+	    reason != KMSG_DUMP_PANIC)
 		return;
 
 	/* Only dump oopses if dump_oops is set */

diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h
index 64fbb00..ead2cd1 100644
--- a/drivers/mtd/ubi/debug.h
+++ b/drivers/mtd/ubi/debug.h

@@ -43,7 +43,10 @@
 	pr_debug("UBI DBG " type ": " fmt "\n", ##__VA_ARGS__)
 
 /* Just a debugging messages not related to any specific UBI subsystem */
-#define dbg_msg(fmt, ...) ubi_dbg_msg("msg", fmt, ##__VA_ARGS__)
+#define dbg_msg(fmt, ...)                                    \
+	printk(KERN_DEBUG "UBI DBG (pid %d): %s: " fmt "\n", \
+	       current->pid, __func__, ##__VA_ARGS__)
+
 /* General debugging messages */
 #define dbg_gen(fmt, ...) ubi_dbg_msg("gen", fmt, ##__VA_ARGS__)
 /* Messages from the eraseblock association sub-system */

diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c
index 9ad18da..890754c 100644
--- a/drivers/mtd/ubi/vtbl.c
+++ b/drivers/mtd/ubi/vtbl.c

@@ -306,7 +306,7 @@
 		       int copy, void *vtbl)
 {
 	int err, tries = 0;
-	static struct ubi_vid_hdr *vid_hdr;
+	struct ubi_vid_hdr *vid_hdr;
 	struct ubi_scan_leb *new_seb;
 
 	ubi_msg("create volume table (copy #%d)", copy + 1);

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 106b88a..342626f 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c

@@ -99,16 +99,26 @@
 
 /*********************** tlb specific functions ***************************/
 
-static inline void _lock_tx_hashtbl(struct bonding *bond)
+static inline void _lock_tx_hashtbl_bh(struct bonding *bond)
 {
 	spin_lock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
 }
 
-static inline void _unlock_tx_hashtbl(struct bonding *bond)
+static inline void _unlock_tx_hashtbl_bh(struct bonding *bond)
 {
 	spin_unlock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
 }
 
+static inline void _lock_tx_hashtbl(struct bonding *bond)
+{
+	spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
+}
+
+static inline void _unlock_tx_hashtbl(struct bonding *bond)
+{
+	spin_unlock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
+}
+
 /* Caller must hold tx_hashtbl lock */
 static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load)
 {
@@ -129,14 +139,13 @@
 	SLAVE_TLB_INFO(slave).head = TLB_NULL_INDEX;
 }
 
-/* Caller must hold bond lock for read */
-static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_load)
+/* Caller must hold bond lock for read, BH disabled */
+static void __tlb_clear_slave(struct bonding *bond, struct slave *slave,
+			 int save_load)
 {
 	struct tlb_client_info *tx_hash_table;
 	u32 index;
 
-	_lock_tx_hashtbl(bond);
-
 	/* clear slave from tx_hashtbl */
 	tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl;
 
@@ -151,8 +160,15 @@
 	}
 
 	tlb_init_slave(slave);
+}
 
-	_unlock_tx_hashtbl(bond);
+/* Caller must hold bond lock for read */
+static void tlb_clear_slave(struct bonding *bond, struct slave *slave,
+			 int save_load)
+{
+	_lock_tx_hashtbl_bh(bond);
+	__tlb_clear_slave(bond, slave, save_load);
+	_unlock_tx_hashtbl_bh(bond);
 }
 
 /* Must be called before starting the monitor timer */
@@ -169,7 +185,7 @@
 		       bond->dev->name);
 		return -1;
 	}
-	_lock_tx_hashtbl(bond);
+	_lock_tx_hashtbl_bh(bond);
 
 	bond_info->tx_hashtbl = new_hashtbl;
 
@@ -177,7 +193,7 @@
 		tlb_init_table_entry(&bond_info->tx_hashtbl[i], 0);
 	}
 
-	_unlock_tx_hashtbl(bond);
+	_unlock_tx_hashtbl_bh(bond);
 
 	return 0;
 }
@@ -187,12 +203,12 @@
 {
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 
-	_lock_tx_hashtbl(bond);
+	_lock_tx_hashtbl_bh(bond);
 
 	kfree(bond_info->tx_hashtbl);
 	bond_info->tx_hashtbl = NULL;
 
-	_unlock_tx_hashtbl(bond);
+	_unlock_tx_hashtbl_bh(bond);
 }
 
 static long long compute_gap(struct slave *slave)
@@ -226,15 +242,13 @@
 	return least_loaded;
 }
 
-/* Caller must hold bond lock for read */
-static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len)
+static struct slave *__tlb_choose_channel(struct bonding *bond, u32 hash_index,
+						u32 skb_len)
 {
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 	struct tlb_client_info *hash_table;
 	struct slave *assigned_slave;
 
-	_lock_tx_hashtbl(bond);
-
 	hash_table = bond_info->tx_hashtbl;
 	assigned_slave = hash_table[hash_index].tx_slave;
 	if (!assigned_slave) {
@@ -263,22 +277,46 @@
 		hash_table[hash_index].tx_bytes += skb_len;
 	}
 
-	_unlock_tx_hashtbl(bond);
-
 	return assigned_slave;
 }
 
+/* Caller must hold bond lock for read */
+static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index,
+					u32 skb_len)
+{
+	struct slave *tx_slave;
+	/*
+	 * We don't need to disable softirq here, becase
+	 * tlb_choose_channel() is only called by bond_alb_xmit()
+	 * which already has softirq disabled.
+	 */
+	_lock_tx_hashtbl(bond);
+	tx_slave = __tlb_choose_channel(bond, hash_index, skb_len);
+	_unlock_tx_hashtbl(bond);
+	return tx_slave;
+}
+
 /*********************** rlb specific functions ***************************/
-static inline void _lock_rx_hashtbl(struct bonding *bond)
+static inline void _lock_rx_hashtbl_bh(struct bonding *bond)
 {
 	spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
 }
 
-static inline void _unlock_rx_hashtbl(struct bonding *bond)
+static inline void _unlock_rx_hashtbl_bh(struct bonding *bond)
 {
 	spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
 }
 
+static inline void _lock_rx_hashtbl(struct bonding *bond)
+{
+	spin_lock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
+}
+
+static inline void _unlock_rx_hashtbl(struct bonding *bond)
+{
+	spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
+}
+
 /* when an ARP REPLY is received from a client update its info
  * in the rx_hashtbl
  */
@@ -288,7 +326,7 @@
 	struct rlb_client_info *client_info;
 	u32 hash_index;
 
-	_lock_rx_hashtbl(bond);
+	_lock_rx_hashtbl_bh(bond);
 
 	hash_index = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src));
 	client_info = &(bond_info->rx_hashtbl[hash_index]);
@@ -303,7 +341,7 @@
 		bond_info->rx_ntt = 1;
 	}
 
-	_unlock_rx_hashtbl(bond);
+	_unlock_rx_hashtbl_bh(bond);
 }
 
 static void rlb_arp_recv(struct sk_buff *skb, struct bonding *bond,
@@ -401,7 +439,7 @@
 	u32 index, next_index;
 
 	/* clear slave from rx_hashtbl */
-	_lock_rx_hashtbl(bond);
+	_lock_rx_hashtbl_bh(bond);
 
 	rx_hash_table = bond_info->rx_hashtbl;
 	index = bond_info->rx_hashtbl_head;
@@ -432,7 +470,7 @@
 		}
 	}
 
-	_unlock_rx_hashtbl(bond);
+	_unlock_rx_hashtbl_bh(bond);
 
 	write_lock_bh(&bond->curr_slave_lock);
 
@@ -489,7 +527,7 @@
 	struct rlb_client_info *client_info;
 	u32 hash_index;
 
-	_lock_rx_hashtbl(bond);
+	_lock_rx_hashtbl_bh(bond);
 
 	hash_index = bond_info->rx_hashtbl_head;
 	for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
@@ -507,7 +545,7 @@
 	 */
 	bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY;
 
-	_unlock_rx_hashtbl(bond);
+	_unlock_rx_hashtbl_bh(bond);
 }
 
 /* The slave was assigned a new mac address - update the clients */
@@ -518,7 +556,7 @@
 	int ntt = 0;
 	u32 hash_index;
 
-	_lock_rx_hashtbl(bond);
+	_lock_rx_hashtbl_bh(bond);
 
 	hash_index = bond_info->rx_hashtbl_head;
 	for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
@@ -538,7 +576,7 @@
 		bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY;
 	}
 
-	_unlock_rx_hashtbl(bond);
+	_unlock_rx_hashtbl_bh(bond);
 }
 
 /* mark all clients using src_ip to be updated */
@@ -709,7 +747,7 @@
 	int ntt;
 	u32 hash_index;
 
-	_lock_rx_hashtbl(bond);
+	_lock_rx_hashtbl_bh(bond);
 
 	ntt = 0;
 	hash_index = bond_info->rx_hashtbl_head;
@@ -727,7 +765,7 @@
 	if (ntt) {
 		bond_info->rx_ntt = 1;
 	}
-	_unlock_rx_hashtbl(bond);
+	_unlock_rx_hashtbl_bh(bond);
 }
 
 /* Caller must hold rx_hashtbl lock */
@@ -751,7 +789,7 @@
 		       bond->dev->name);
 		return -1;
 	}
-	_lock_rx_hashtbl(bond);
+	_lock_rx_hashtbl_bh(bond);
 
 	bond_info->rx_hashtbl = new_hashtbl;
 
@@ -761,7 +799,7 @@
 		rlb_init_table_entry(bond_info->rx_hashtbl + i);
 	}
 
-	_unlock_rx_hashtbl(bond);
+	_unlock_rx_hashtbl_bh(bond);
 
 	/* register to receive ARPs */
 	bond->recv_probe = rlb_arp_recv;
@@ -773,13 +811,13 @@
 {
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 
-	_lock_rx_hashtbl(bond);
+	_lock_rx_hashtbl_bh(bond);
 
 	kfree(bond_info->rx_hashtbl);
 	bond_info->rx_hashtbl = NULL;
 	bond_info->rx_hashtbl_head = RLB_NULL_INDEX;
 
-	_unlock_rx_hashtbl(bond);
+	_unlock_rx_hashtbl_bh(bond);
 }
 
 static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
@@ -787,7 +825,7 @@
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 	u32 curr_index;
 
-	_lock_rx_hashtbl(bond);
+	_lock_rx_hashtbl_bh(bond);
 
 	curr_index = bond_info->rx_hashtbl_head;
 	while (curr_index != RLB_NULL_INDEX) {
@@ -812,7 +850,7 @@
 		curr_index = next_index;
 	}
 
-	_unlock_rx_hashtbl(bond);
+	_unlock_rx_hashtbl_bh(bond);
 }
 
 /*********************** tlb/rlb shared functions *********************/
@@ -1320,7 +1358,9 @@
 		res = bond_dev_queue_xmit(bond, skb, tx_slave->dev);
 	} else {
 		if (tx_slave) {
-			tlb_clear_slave(bond, tx_slave, 0);
+			_lock_tx_hashtbl(bond);
+			__tlb_clear_slave(bond, tx_slave, 0);
+			_unlock_tx_hashtbl(bond);
 		}
 	}
 

diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index 9e8ba4f..0f92e35 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c

@@ -623,7 +623,8 @@
 
 	ax->mii_bus->name = "ax88796_mii_bus";
 	ax->mii_bus->parent = dev->dev.parent;
-	snprintf(ax->mii_bus->id, MII_BUS_ID_SIZE, "%x", pdev->id);
+	snprintf(ax->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
+		pdev->name, pdev->id);
 
 	ax->mii_bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL);
 	if (!ax->mii_bus->irq) {

diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c
index b6d69c9..d812a10 100644
--- a/drivers/net/ethernet/adi/bfin_mac.c
+++ b/drivers/net/ethernet/adi/bfin_mac.c

@@ -1670,7 +1670,8 @@
 	miibus->name = "bfin_mii_bus";
 	miibus->phy_mask = mii_bus_pd->phy_mask;
 
-	snprintf(miibus->id, MII_BUS_ID_SIZE, "0");
+	snprintf(miibus->id, MII_BUS_ID_SIZE, "%s-%x",
+		pdev->name, pdev->id);
 	miibus->irq = kmalloc(sizeof(int)*PHY_MAX_ADDR, GFP_KERNEL);
 	if (!miibus->irq)
 		goto out_err_irq_alloc;

diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index cc9262b..8b95dd3 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c

@@ -1171,7 +1171,8 @@
 	aup->mii_bus->write = au1000_mdiobus_write;
 	aup->mii_bus->reset = au1000_mdiobus_reset;
 	aup->mii_bus->name = "au1000_eth_mii";
-	snprintf(aup->mii_bus->id, MII_BUS_ID_SIZE, "%x", aup->mac_id);
+	snprintf(aup->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
+		pdev->name, aup->mac_id);
 	aup->mii_bus->irq = kmalloc(sizeof(int)*PHY_MAX_ADDR, GFP_KERNEL);
 	if (aup->mii_bus->irq == NULL)
 		goto err_out;

diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index d44331e..986019b2 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c

@@ -1727,7 +1727,7 @@
 		bus->priv = priv;
 		bus->read = bcm_enet_mdio_read_phylib;
 		bus->write = bcm_enet_mdio_write_phylib;
-		sprintf(bus->id, "%d", priv->mac_id);
+		sprintf(bus->id, "%s-%d", pdev->name, priv->mac_id);
 
 		/* only probe bus where we think the PHY is, because
 		 * the mdio read operation return 0 instead of 0xffff

diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
index 8fa7abc..084904c 100644
--- a/drivers/net/ethernet/broadcom/sb1250-mac.c
+++ b/drivers/net/ethernet/broadcom/sb1250-mac.c

@@ -2259,7 +2259,8 @@
 	}
 
 	sc->mii_bus->name = sbmac_mdio_string;
-	snprintf(sc->mii_bus->id, MII_BUS_ID_SIZE, "%x", idx);
+	snprintf(sc->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
+		pldev->name, idx);
 	sc->mii_bus->priv = sc;
 	sc->mii_bus->read = sbmac_mii_read;
 	sc->mii_bus->write = sbmac_mii_write;

diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index f3d5c65..2320068 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c

@@ -243,7 +243,8 @@
 	bp->mii_bus->read = &macb_mdio_read;
 	bp->mii_bus->write = &macb_mdio_write;
 	bp->mii_bus->reset = &macb_mdio_reset;
-	snprintf(bp->mii_bus->id, MII_BUS_ID_SIZE, "%x", bp->pdev->id);
+	snprintf(bp->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
+		bp->pdev->name, bp->pdev->id);
 	bp->mii_bus->priv = bp;
 	bp->mii_bus->parent = &bp->dev->dev;
 	pdata = bp->pdev->dev.platform_data;

diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c
index ce88c0f..925c9ba 100644
--- a/drivers/net/ethernet/dnet.c
+++ b/drivers/net/ethernet/dnet.c

@@ -325,7 +325,8 @@
 	bp->mii_bus->write = &dnet_mdio_write;
 	bp->mii_bus->reset = &dnet_mdio_reset;
 
-	snprintf(bp->mii_bus->id, MII_BUS_ID_SIZE, "%x", 0);
+	snprintf(bp->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
+		bp->pdev->name, bp->pdev->id);
 
 	bp->mii_bus->priv = bp;
 

diff --git a/drivers/net/ethernet/freescale/fec.c b/drivers/net/ethernet/freescale/fec.c
index ddcbbb3..7b25e9c 100644
--- a/drivers/net/ethernet/freescale/fec.c
+++ b/drivers/net/ethernet/freescale/fec.c

@@ -476,6 +476,7 @@
 	} else {
 #ifdef FEC_MIIGSK_ENR
 		if (id_entry->driver_data & FEC_QUIRK_USE_GASKET) {
+			u32 cfgr;
 			/* disable the gasket and wait */
 			writel(0, fep->hwp + FEC_MIIGSK_ENR);
 			while (readl(fep->hwp + FEC_MIIGSK_ENR) & 4)
@@ -486,9 +487,11 @@
 			 *   RMII, 50 MHz, no loopback, no echo
 			 *   MII, 25 MHz, no loopback, no echo
 			 */
-			writel((fep->phy_interface == PHY_INTERFACE_MODE_RMII) ?
-					1 : 0, fep->hwp + FEC_MIIGSK_CFGR);
-
+			cfgr = (fep->phy_interface == PHY_INTERFACE_MODE_RMII)
+				? BM_MIIGSK_CFGR_RMII : BM_MIIGSK_CFGR_MII;
+			if (fep->phy_dev && fep->phy_dev->speed == SPEED_10)
+				cfgr |= BM_MIIGSK_CFGR_FRCONT_10M;
+			writel(cfgr, fep->hwp + FEC_MIIGSK_CFGR);
 
 			/* re-enable the gasket */
 			writel(2, fep->hwp + FEC_MIIGSK_ENR);
@@ -1077,7 +1080,8 @@
 	fep->mii_bus->read = fec_enet_mdio_read;
 	fep->mii_bus->write = fec_enet_mdio_write;
 	fep->mii_bus->reset = fec_enet_mdio_reset;
-	snprintf(fep->mii_bus->id, MII_BUS_ID_SIZE, "%x", fep->dev_id + 1);
+	snprintf(fep->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
+		pdev->name, fep->dev_id + 1);
 	fep->mii_bus->priv = fep;
 	fep->mii_bus->parent = &pdev->dev;
 

diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 8b2c6d7..8408c62 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h

@@ -47,6 +47,10 @@
 #define FEC_MIIGSK_CFGR		0x300 /* MIIGSK Configuration reg */
 #define FEC_MIIGSK_ENR		0x308 /* MIIGSK Enable reg */
 
+#define BM_MIIGSK_CFGR_MII		0x00
+#define BM_MIIGSK_CFGR_RMII		0x01
+#define BM_MIIGSK_CFGR_FRCONT_10M	0x40
+
 #else
 
 #define FEC_ECNTRL		0x000 /* Ethernet control reg */

diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index e01cdaa..39d160d 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c

@@ -1984,7 +1984,8 @@
 	return fcb;
 }
 
-static inline void gfar_tx_checksum(struct sk_buff *skb, struct txfcb *fcb)
+static inline void gfar_tx_checksum(struct sk_buff *skb, struct txfcb *fcb,
+		int fcb_length)
 {
 	u8 flags = 0;
 
@@ -2006,7 +2007,7 @@
 	 * frame (skb->data) and the start of the IP hdr.
 	 * l4os is the distance between the start of the
 	 * l3 hdr and the l4 hdr */
-	fcb->l3os = (u16)(skb_network_offset(skb) - GMAC_FCB_LEN);
+	fcb->l3os = (u16)(skb_network_offset(skb) - fcb_length);
 	fcb->l4os = skb_network_header_len(skb);
 
 	fcb->flags = flags;
@@ -2046,7 +2047,7 @@
 	int i, rq = 0, do_tstamp = 0;
 	u32 bufaddr;
 	unsigned long flags;
-	unsigned int nr_frags, nr_txbds, length;
+	unsigned int nr_frags, nr_txbds, length, fcb_length = GMAC_FCB_LEN;
 
 	/*
 	 * TOE=1 frames larger than 2500 bytes may see excess delays
@@ -2070,22 +2071,28 @@
 
 	/* check if time stamp should be generated */
 	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
-		     priv->hwts_tx_en))
+			priv->hwts_tx_en)) {
 		do_tstamp = 1;
+		fcb_length = GMAC_FCB_LEN + GMAC_TXPAL_LEN;
+	}
 
 	/* make space for additional header when fcb is needed */
 	if (((skb->ip_summed == CHECKSUM_PARTIAL) ||
 			vlan_tx_tag_present(skb) ||
 			unlikely(do_tstamp)) &&
-			(skb_headroom(skb) < GMAC_FCB_LEN)) {
+			(skb_headroom(skb) < fcb_length)) {
 		struct sk_buff *skb_new;
 
-		skb_new = skb_realloc_headroom(skb, GMAC_FCB_LEN);
+		skb_new = skb_realloc_headroom(skb, fcb_length);
 		if (!skb_new) {
 			dev->stats.tx_errors++;
 			kfree_skb(skb);
 			return NETDEV_TX_OK;
 		}
+
+		/* Steal sock reference for processing TX time stamps */
+		swap(skb_new->sk, skb->sk);
+		swap(skb_new->destructor, skb->destructor);
 		kfree_skb(skb);
 		skb = skb_new;
 	}
@@ -2154,6 +2161,12 @@
 		lstatus = txbdp_start->lstatus;
 	}
 
+	/* Add TxPAL between FCB and frame if required */
+	if (unlikely(do_tstamp)) {
+		skb_push(skb, GMAC_TXPAL_LEN);
+		memset(skb->data, 0, GMAC_TXPAL_LEN);
+	}
+
 	/* Set up checksumming */
 	if (CHECKSUM_PARTIAL == skb->ip_summed) {
 		fcb = gfar_add_fcb(skb);
@@ -2164,7 +2177,7 @@
 			skb_checksum_help(skb);
 		} else {
 			lstatus |= BD_LFLAG(TXBD_TOE);
-			gfar_tx_checksum(skb, fcb);
+			gfar_tx_checksum(skb, fcb, fcb_length);
 		}
 	}
 
@@ -2196,9 +2209,9 @@
 	 * the full frame length.
 	 */
 	if (unlikely(do_tstamp)) {
-		txbdp_tstamp->bufPtr = txbdp_start->bufPtr + GMAC_FCB_LEN;
+		txbdp_tstamp->bufPtr = txbdp_start->bufPtr + fcb_length;
 		txbdp_tstamp->lstatus |= BD_LFLAG(TXBD_READY) |
-				(skb_headlen(skb) - GMAC_FCB_LEN);
+				(skb_headlen(skb) - fcb_length);
 		lstatus |= BD_LFLAG(TXBD_CRC | TXBD_READY) | GMAC_FCB_LEN;
 	} else {
 		lstatus |= BD_LFLAG(TXBD_CRC | TXBD_READY) | skb_headlen(skb);
@@ -2490,7 +2503,7 @@
 
 		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
 			next = next_txbd(bdp, base, tx_ring_size);
-			buflen = next->length + GMAC_FCB_LEN;
+			buflen = next->length + GMAC_FCB_LEN + GMAC_TXPAL_LEN;
 		} else
 			buflen = bdp->length;
 
@@ -2502,6 +2515,7 @@
 			u64 *ns = (u64*) (((u32)skb->data + 0x10) & ~0x7);
 			memset(&shhwtstamps, 0, sizeof(shhwtstamps));
 			shhwtstamps.hwtstamp = ns_to_ktime(*ns);
+			skb_pull(skb, GMAC_FCB_LEN + GMAC_TXPAL_LEN);
 			skb_tstamp_tx(skb, &shhwtstamps);
 			bdp->lstatus &= BD_LFLAG(TXBD_WRAP);
 			bdp = next;

diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
index fe7ac3a..40c33a7 100644
--- a/drivers/net/ethernet/freescale/gianfar.h
+++ b/drivers/net/ethernet/freescale/gianfar.h

@@ -63,6 +63,9 @@
 /* Length for FCB */
 #define GMAC_FCB_LEN 8
 
+/* Length for TxPAL */
+#define GMAC_TXPAL_LEN 16
+
 /* Default padding amount */
 #define DEFAULT_PADDING 2
 

diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index 0b3567a..85e2c6c 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c

@@ -98,6 +98,7 @@
 
 struct ltq_etop_priv {
 	struct net_device *netdev;
+	struct platform_device *pdev;
 	struct ltq_eth_data *pldata;
 	struct resource *res;
 
@@ -436,7 +437,8 @@
 	priv->mii_bus->read = ltq_etop_mdio_rd;
 	priv->mii_bus->write = ltq_etop_mdio_wr;
 	priv->mii_bus->name = "ltq_mii";
-	snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%x", 0);
+	snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
+		priv->pdev->name, priv->pdev->id);
 	priv->mii_bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL);
 	if (!priv->mii_bus->irq) {
 		err = -ENOMEM;
@@ -734,6 +736,7 @@
 	dev->ethtool_ops = &ltq_etop_ethtool_ops;
 	priv = netdev_priv(dev);
 	priv->res = res;
+	priv->pdev = pdev;
 	priv->pldata = dev_get_platdata(&pdev->dev);
 	priv->netdev = dev;
 	spin_lock_init(&priv->lock);

diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 80aab4e..9c049d2 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c

@@ -2613,7 +2613,8 @@
 		msp->smi_bus->name = "mv643xx_eth smi";
 		msp->smi_bus->read = smi_bus_read;
 		msp->smi_bus->write = smi_bus_write,
-		snprintf(msp->smi_bus->id, MII_BUS_ID_SIZE, "%d", pdev->id);
+		snprintf(msp->smi_bus->id, MII_BUS_ID_SIZE, "%s-%d",
+			pdev->name, pdev->id);
 		msp->smi_bus->parent = &pdev->dev;
 		msp->smi_bus->phy_mask = 0xffffffff;
 		if (mdiobus_register(msp->smi_bus) < 0)

diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index 5ec409e..953ba58 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c

@@ -1552,7 +1552,8 @@
 	pep->smi_bus->name = "pxa168_eth smi";
 	pep->smi_bus->read = pxa168_smi_read;
 	pep->smi_bus->write = pxa168_smi_write;
-	snprintf(pep->smi_bus->id, MII_BUS_ID_SIZE, "%d", pdev->id);
+	snprintf(pep->smi_bus->id, MII_BUS_ID_SIZE, "%s-%d",
+		pdev->name, pdev->id);
 	pep->smi_bus->parent = &pdev->dev;
 	pep->smi_bus->phy_mask = 0xffffffff;
 	err = mdiobus_register(pep->smi_bus);

diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index 6ed09a8..e52cd31 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c

@@ -746,7 +746,7 @@
 #define MAC_ADDR_ORDER(i)		(ETH_ALEN - 1 - (i))
 
 #define MAX_ETHERNET_BODY_SIZE		1500
-#define ETHERNET_HEADER_SIZE		14
+#define ETHERNET_HEADER_SIZE		(14 + VLAN_HLEN)
 
 #define MAX_ETHERNET_PACKET_SIZE	\
 	(MAX_ETHERNET_BODY_SIZE + ETHERNET_HEADER_SIZE)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index fc9bda9..6ece429 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c

@@ -1702,7 +1702,8 @@
 	/* Hook up MII support for ethtool */
 	mdp->mii_bus->name = "sh_mii";
 	mdp->mii_bus->parent = &ndev->dev;
-	snprintf(mdp->mii_bus->id, MII_BUS_ID_SIZE, "%x", id);
+	snprintf(mdp->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
+		mdp->pdev->name, pdid);
 
 	/* PHY IRQ */
 	mdp->mii_bus->irq = kmalloc(sizeof(int)*PHY_MAX_ADDR, GFP_KERNEL);

diff --git a/drivers/net/ethernet/s6gmac.c b/drivers/net/ethernet/s6gmac.c
index a7ff8ea..22e9c01 100644
--- a/drivers/net/ethernet/s6gmac.c
+++ b/drivers/net/ethernet/s6gmac.c

@@ -1004,7 +1004,7 @@
 	mb->write = s6mii_write;
 	mb->reset = s6mii_reset;
 	mb->priv = pd;
-	snprintf(mb->id, MII_BUS_ID_SIZE, "0");
+	snprintf(mb->id, MII_BUS_ID_SIZE, "%s-%x", pdev->name, pdev->id);
 	mb->phy_mask = ~(1 << 0);
 	mb->irq = &pd->mii.irq[0];
 	for (i = 0; i < PHY_MAX_ADDR; i++) {

diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index 9d0b8ce..24d2df0 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c

@@ -1044,7 +1044,8 @@
 	}
 
 	pdata->mii_bus->name = SMSC_MDIONAME;
-	snprintf(pdata->mii_bus->id, MII_BUS_ID_SIZE, "%x", pdev->id);
+	snprintf(pdata->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
+		pdev->name, pdev->id);
 	pdata->mii_bus->priv = pdata;
 	pdata->mii_bus->read = smsc911x_mii_read;
 	pdata->mii_bus->write = smsc911x_mii_write;

diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
index 41e6b33..c07cfe9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c

@@ -22,6 +22,7 @@
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
 
+#include <linux/kernel.h>
 #include <linux/io.h>
 #include "mmc.h"
 

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 3738b47..96fa2da 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c

@@ -307,7 +307,7 @@
 	priv->speed = 0;
 	priv->oldduplex = -1;
 
-	snprintf(bus_id, MII_BUS_ID_SIZE, "%x", priv->plat->bus_id);
+	snprintf(bus_id, MII_BUS_ID_SIZE, "stmmac-%x", priv->plat->bus_id);
 	snprintf(phy_id, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, bus_id,
 		 priv->plat->phy_addr);
 	pr_debug("stmmac_init_phy:  trying to attach to %s\n", phy_id);
@@ -772,7 +772,7 @@
 		dwmac_mmc_ctrl(priv->ioaddr, mode);
 		memset(&priv->mmc, 0, sizeof(struct stmmac_counters));
 	} else
-		pr_info(" No MAC Management Counters available");
+		pr_info(" No MAC Management Counters available\n");
 }
 
 static u32 stmmac_get_synopsys_id(struct stmmac_priv *priv)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index 51f4412..da4a104 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c

@@ -158,7 +158,8 @@
 	new_bus->read = &stmmac_mdio_read;
 	new_bus->write = &stmmac_mdio_write;
 	new_bus->reset = &stmmac_mdio_reset;
-	snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", mdio_bus_data->bus_id);
+	snprintf(new_bus->id, MII_BUS_ID_SIZE, "%s-%x",
+		new_bus->name, mdio_bus_data->bus_id);
 	new_bus->priv = ndev;
 	new_bus->irq = irqlist;
 	new_bus->phy_mask = mdio_bus_data->phy_mask;

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 7b1594f..1ac8324 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c

@@ -62,7 +62,7 @@
 	priv = stmmac_dvr_probe(&(pdev->dev), plat_dat);
 	if (!priv) {
 		pr_err("%s: main drivr probe failed", __func__);
-		goto out_release_region;
+		goto out_unmap;
 	}
 
 	priv->ioaddr = addr;

diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c
index aaac0c7..4d9a28f 100644
--- a/drivers/net/ethernet/ti/cpmac.c
+++ b/drivers/net/ethernet/ti/cpmac.c

@@ -1269,7 +1269,7 @@
 	}
 
 	cpmac_mii->phy_mask = ~(mask | 0x80000000);
-	snprintf(cpmac_mii->id, MII_BUS_ID_SIZE, "1");
+	snprintf(cpmac_mii->id, MII_BUS_ID_SIZE, "cpmac-1");
 
 	res = mdiobus_register(cpmac_mii);
 	if (res)

diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c
index 7615040..ef7c9c1 100644
--- a/drivers/net/ethernet/ti/davinci_mdio.c
+++ b/drivers/net/ethernet/ti/davinci_mdio.c

@@ -313,7 +313,8 @@
 	data->bus->reset	= davinci_mdio_reset,
 	data->bus->parent	= dev;
 	data->bus->priv		= data;
-	snprintf(data->bus->id, MII_BUS_ID_SIZE, "%x", pdev->id);
+	snprintf(data->bus->id, MII_BUS_ID_SIZE, "%s-%x",
+		pdev->name, pdev->id);
 
 	data->clk = clk_get(dev, NULL);
 	if (IS_ERR(data->clk)) {

diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c
index a9ce01ba..164fb77 100644
--- a/drivers/net/ethernet/tundra/tsi108_eth.c
+++ b/drivers/net/ethernet/tundra/tsi108_eth.c

@@ -1604,7 +1604,7 @@
 	data->phyregs = ioremap(einfo->phyregs, 0x400);
 	if (NULL == data->phyregs) {
 		err = -ENOMEM;
-		goto regs_fail;
+		goto phyregs_fail;
 	}
 /* MII setup */
 	data->mii_if.dev = dev;
@@ -1663,9 +1663,11 @@
 	return 0;
 
 register_fail:
-	iounmap(data->regs);
 	iounmap(data->phyregs);
 
+phyregs_fail:
+	iounmap(data->regs);
+
 regs_fail:
 	free_netdev(dev);
 	return err;

diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index 5c4983b..10b18eb 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c

@@ -39,10 +39,9 @@
 
 /* A few user-configurable values.
    These may be modified when a driver module is loaded. */
-
-#define DEBUG
-static int debug = 1;	/* 1 normal messages, 0 quiet .. 7 verbose. */
-static int max_interrupt_work = 20;
+static int debug = 0;
+#define RHINE_MSG_DEFAULT \
+        (0x0000)
 
 /* Set the copy breakpoint for the copy-only-tiny-frames scheme.
    Setting to > 1518 effectively disables this feature. */
@@ -128,12 +127,10 @@
 MODULE_DESCRIPTION("VIA Rhine PCI Fast Ethernet driver");
 MODULE_LICENSE("GPL");
 
-module_param(max_interrupt_work, int, 0);
 module_param(debug, int, 0);
 module_param(rx_copybreak, int, 0);
 module_param(avoid_D3, bool, 0);
-MODULE_PARM_DESC(max_interrupt_work, "VIA Rhine maximum events handled per interrupt");
-MODULE_PARM_DESC(debug, "VIA Rhine debug level (0-7)");
+MODULE_PARM_DESC(debug, "VIA Rhine debug message flags");
 MODULE_PARM_DESC(rx_copybreak, "VIA Rhine copy breakpoint for copy-only-tiny-frames");
 MODULE_PARM_DESC(avoid_D3, "Avoid power state D3 (work-around for broken BIOSes)");
 
@@ -351,16 +348,25 @@
 
 /* Bits in the interrupt status/mask registers. */
 enum intr_status_bits {
-	IntrRxDone=0x0001, IntrRxErr=0x0004, IntrRxEmpty=0x0020,
-	IntrTxDone=0x0002, IntrTxError=0x0008, IntrTxUnderrun=0x0210,
-	IntrPCIErr=0x0040,
-	IntrStatsMax=0x0080, IntrRxEarly=0x0100,
-	IntrRxOverflow=0x0400, IntrRxDropped=0x0800, IntrRxNoBuf=0x1000,
-	IntrTxAborted=0x2000, IntrLinkChange=0x4000,
-	IntrRxWakeUp=0x8000,
-	IntrNormalSummary=0x0003, IntrAbnormalSummary=0xC260,
-	IntrTxDescRace=0x080000,	/* mapped from IntrStatus2 */
-	IntrTxErrSummary=0x082218,
+	IntrRxDone	= 0x0001,
+	IntrTxDone	= 0x0002,
+	IntrRxErr	= 0x0004,
+	IntrTxError	= 0x0008,
+	IntrRxEmpty	= 0x0020,
+	IntrPCIErr	= 0x0040,
+	IntrStatsMax	= 0x0080,
+	IntrRxEarly	= 0x0100,
+	IntrTxUnderrun	= 0x0210,
+	IntrRxOverflow	= 0x0400,
+	IntrRxDropped	= 0x0800,
+	IntrRxNoBuf	= 0x1000,
+	IntrTxAborted	= 0x2000,
+	IntrLinkChange	= 0x4000,
+	IntrRxWakeUp	= 0x8000,
+	IntrTxDescRace		= 0x080000,	/* mapped from IntrStatus2 */
+	IntrNormalSummary	= IntrRxDone | IntrTxDone,
+	IntrTxErrSummary	= IntrTxDescRace | IntrTxAborted | IntrTxError |
+				  IntrTxUnderrun,
 };
 
 /* Bits in WOLcrSet/WOLcrClr and PwrcsrSet/PwrcsrClr */
@@ -439,8 +445,13 @@
 	struct net_device *dev;
 	struct napi_struct napi;
 	spinlock_t lock;
+	struct mutex task_lock;
+	bool task_enable;
+	struct work_struct slow_event_task;
 	struct work_struct reset_task;
 
+	u32 msg_enable;
+
 	/* Frequently used values: keep some adjacent for cache effect. */
 	u32 quirks;
 	struct rx_desc *rx_head_desc;
@@ -476,41 +487,50 @@
 static void mdio_write(struct net_device *dev, int phy_id, int location, int value);
 static int  rhine_open(struct net_device *dev);
 static void rhine_reset_task(struct work_struct *work);
+static void rhine_slow_event_task(struct work_struct *work);
 static void rhine_tx_timeout(struct net_device *dev);
 static netdev_tx_t rhine_start_tx(struct sk_buff *skb,
 				  struct net_device *dev);
 static irqreturn_t rhine_interrupt(int irq, void *dev_instance);
 static void rhine_tx(struct net_device *dev);
 static int rhine_rx(struct net_device *dev, int limit);
-static void rhine_error(struct net_device *dev, int intr_status);
 static void rhine_set_rx_mode(struct net_device *dev);
 static struct net_device_stats *rhine_get_stats(struct net_device *dev);
 static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static const struct ethtool_ops netdev_ethtool_ops;
 static int  rhine_close(struct net_device *dev);
-static void rhine_shutdown (struct pci_dev *pdev);
 static int rhine_vlan_rx_add_vid(struct net_device *dev, unsigned short vid);
 static int rhine_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid);
-static void rhine_set_cam(void __iomem *ioaddr, int idx, u8 *addr);
-static void rhine_set_vlan_cam(void __iomem *ioaddr, int idx, u8 *addr);
-static void rhine_set_cam_mask(void __iomem *ioaddr, u32 mask);
-static void rhine_set_vlan_cam_mask(void __iomem *ioaddr, u32 mask);
-static void rhine_init_cam_filter(struct net_device *dev);
-static void rhine_update_vcam(struct net_device *dev);
+static void rhine_restart_tx(struct net_device *dev);
 
-#define RHINE_WAIT_FOR(condition)				\
-do {								\
-	int i = 1024;						\
-	while (!(condition) && --i)				\
-		;						\
-	if (debug > 1 && i < 512)				\
-		pr_info("%4d cycles used @ %s:%d\n",		\
-			1024 - i, __func__, __LINE__);		\
-} while (0)
-
-static inline u32 get_intr_status(struct net_device *dev)
+static void rhine_wait_bit(struct rhine_private *rp, u8 reg, u8 mask, bool high)
 {
-	struct rhine_private *rp = netdev_priv(dev);
+	void __iomem *ioaddr = rp->base;
+	int i;
+
+	for (i = 0; i < 1024; i++) {
+		if (high ^ !!(ioread8(ioaddr + reg) & mask))
+			break;
+		udelay(10);
+	}
+	if (i > 64) {
+		netif_dbg(rp, hw, rp->dev, "%s bit wait (%02x/%02x) cycle "
+			  "count: %04d\n", high ? "high" : "low", reg, mask, i);
+	}
+}
+
+static void rhine_wait_bit_high(struct rhine_private *rp, u8 reg, u8 mask)
+{
+	rhine_wait_bit(rp, reg, mask, true);
+}
+
+static void rhine_wait_bit_low(struct rhine_private *rp, u8 reg, u8 mask)
+{
+	rhine_wait_bit(rp, reg, mask, false);
+}
+
+static u32 rhine_get_events(struct rhine_private *rp)
+{
 	void __iomem *ioaddr = rp->base;
 	u32 intr_status;
 
@@ -521,6 +541,16 @@
 	return intr_status;
 }
 
+static void rhine_ack_events(struct rhine_private *rp, u32 mask)
+{
+	void __iomem *ioaddr = rp->base;
+
+	if (rp->quirks & rqStatusWBRace)
+		iowrite8(mask >> 16, ioaddr + IntrStatus2);
+	iowrite16(mask, ioaddr + IntrStatus);
+	mmiowb();
+}
+
 /*
  * Get power related registers into sane state.
  * Notify user about past WOL event.
@@ -585,6 +615,7 @@
 {
 	struct rhine_private *rp = netdev_priv(dev);
 	void __iomem *ioaddr = rp->base;
+	u8 cmd1;
 
 	iowrite8(Cmd1Reset, ioaddr + ChipCmd1);
 	IOSYNC;
@@ -597,13 +628,12 @@
 			iowrite8(0x40, ioaddr + MiscCmd);
 
 		/* Reset can take somewhat longer (rare) */
-		RHINE_WAIT_FOR(!(ioread8(ioaddr + ChipCmd1) & Cmd1Reset));
+		rhine_wait_bit_low(rp, ChipCmd1, Cmd1Reset);
 	}
 
-	if (debug > 1)
-		netdev_info(dev, "Reset %s\n",
-			    (ioread8(ioaddr + ChipCmd1) & Cmd1Reset) ?
-			    "failed" : "succeeded");
+	cmd1 = ioread8(ioaddr + ChipCmd1);
+	netif_info(rp, hw, dev, "Reset %s\n", (cmd1 & Cmd1Reset) ?
+		   "failed" : "succeeded");
 }
 
 #ifdef USE_MMIO
@@ -629,9 +659,15 @@
 {
 	struct rhine_private *rp = netdev_priv(dev);
 	void __iomem *ioaddr = rp->base;
+	int i;
 
 	outb(0x20, pioaddr + MACRegEEcsr);
-	RHINE_WAIT_FOR(!(inb(pioaddr + MACRegEEcsr) & 0x20));
+	for (i = 0; i < 1024; i++) {
+		if (!(inb(pioaddr + MACRegEEcsr) & 0x20))
+			break;
+	}
+	if (i > 512)
+		pr_info("%4d cycles used @ %s:%d\n", i, __func__, __LINE__);
 
 #ifdef USE_MMIO
 	/*
@@ -657,23 +693,127 @@
 }
 #endif
 
+static void rhine_kick_tx_threshold(struct rhine_private *rp)
+{
+	if (rp->tx_thresh < 0xe0) {
+		void __iomem *ioaddr = rp->base;
+
+		rp->tx_thresh += 0x20;
+		BYTE_REG_BITS_SET(rp->tx_thresh, 0x80, ioaddr + TxConfig);
+	}
+}
+
+static void rhine_tx_err(struct rhine_private *rp, u32 status)
+{
+	struct net_device *dev = rp->dev;
+
+	if (status & IntrTxAborted) {
+		netif_info(rp, tx_err, dev,
+			   "Abort %08x, frame dropped\n", status);
+	}
+
+	if (status & IntrTxUnderrun) {
+		rhine_kick_tx_threshold(rp);
+		netif_info(rp, tx_err ,dev, "Transmitter underrun, "
+			   "Tx threshold now %02x\n", rp->tx_thresh);
+	}
+
+	if (status & IntrTxDescRace)
+		netif_info(rp, tx_err, dev, "Tx descriptor write-back race\n");
+
+	if ((status & IntrTxError) &&
+	    (status & (IntrTxAborted | IntrTxUnderrun | IntrTxDescRace)) == 0) {
+		rhine_kick_tx_threshold(rp);
+		netif_info(rp, tx_err, dev, "Unspecified error. "
+			   "Tx threshold now %02x\n", rp->tx_thresh);
+	}
+
+	rhine_restart_tx(dev);
+}
+
+static void rhine_update_rx_crc_and_missed_errord(struct rhine_private *rp)
+{
+	void __iomem *ioaddr = rp->base;
+	struct net_device_stats *stats = &rp->dev->stats;
+
+	stats->rx_crc_errors    += ioread16(ioaddr + RxCRCErrs);
+	stats->rx_missed_errors += ioread16(ioaddr + RxMissed);
+
+	/*
+	 * Clears the "tally counters" for CRC errors and missed frames(?).
+	 * It has been reported that some chips need a write of 0 to clear
+	 * these, for others the counters are set to 1 when written to and
+	 * instead cleared when read. So we clear them both ways ...
+	 */
+	iowrite32(0, ioaddr + RxMissed);
+	ioread16(ioaddr + RxCRCErrs);
+	ioread16(ioaddr + RxMissed);
+}
+
+#define RHINE_EVENT_NAPI_RX	(IntrRxDone | \
+				 IntrRxErr | \
+				 IntrRxEmpty | \
+				 IntrRxOverflow	| \
+				 IntrRxDropped | \
+				 IntrRxNoBuf | \
+				 IntrRxWakeUp)
+
+#define RHINE_EVENT_NAPI_TX_ERR	(IntrTxError | \
+				 IntrTxAborted | \
+				 IntrTxUnderrun | \
+				 IntrTxDescRace)
+#define RHINE_EVENT_NAPI_TX	(IntrTxDone | RHINE_EVENT_NAPI_TX_ERR)
+
+#define RHINE_EVENT_NAPI	(RHINE_EVENT_NAPI_RX | \
+				 RHINE_EVENT_NAPI_TX | \
+				 IntrStatsMax)
+#define RHINE_EVENT_SLOW	(IntrPCIErr | IntrLinkChange)
+#define RHINE_EVENT		(RHINE_EVENT_NAPI | RHINE_EVENT_SLOW)
+
 static int rhine_napipoll(struct napi_struct *napi, int budget)
 {
 	struct rhine_private *rp = container_of(napi, struct rhine_private, napi);
 	struct net_device *dev = rp->dev;
 	void __iomem *ioaddr = rp->base;
-	int work_done;
+	u16 enable_mask = RHINE_EVENT & 0xffff;
+	int work_done = 0;
+	u32 status;
 
-	work_done = rhine_rx(dev, budget);
+	status = rhine_get_events(rp);
+	rhine_ack_events(rp, status & ~RHINE_EVENT_SLOW);
+
+	if (status & RHINE_EVENT_NAPI_RX)
+		work_done += rhine_rx(dev, budget);
+
+	if (status & RHINE_EVENT_NAPI_TX) {
+		if (status & RHINE_EVENT_NAPI_TX_ERR) {
+			/* Avoid scavenging before Tx engine turned off */
+			rhine_wait_bit_low(rp, ChipCmd, CmdTxOn);
+			if (ioread8(ioaddr + ChipCmd) & CmdTxOn)
+				netif_warn(rp, tx_err, dev, "Tx still on\n");
+		}
+
+		rhine_tx(dev);
+
+		if (status & RHINE_EVENT_NAPI_TX_ERR)
+			rhine_tx_err(rp, status);
+	}
+
+	if (status & IntrStatsMax) {
+		spin_lock(&rp->lock);
+		rhine_update_rx_crc_and_missed_errord(rp);
+		spin_unlock(&rp->lock);
+	}
+
+	if (status & RHINE_EVENT_SLOW) {
+		enable_mask &= ~RHINE_EVENT_SLOW;
+		schedule_work(&rp->slow_event_task);
+	}
 
 	if (work_done < budget) {
 		napi_complete(napi);
-
-		iowrite16(IntrRxDone | IntrRxErr | IntrRxEmpty| IntrRxOverflow |
-			  IntrRxDropped | IntrRxNoBuf | IntrTxAborted |
-			  IntrTxDone | IntrTxError | IntrTxUnderrun |
-			  IntrPCIErr | IntrStatsMax | IntrLinkChange,
-			  ioaddr + IntrEnable);
+		iowrite16(enable_mask, ioaddr + IntrEnable);
+		mmiowb();
 	}
 	return work_done;
 }
@@ -797,6 +937,7 @@
 	rp->quirks = quirks;
 	rp->pioaddr = pioaddr;
 	rp->pdev = pdev;
+	rp->msg_enable = netif_msg_init(debug, RHINE_MSG_DEFAULT);
 
 	rc = pci_request_regions(pdev, DRV_NAME);
 	if (rc)
@@ -856,7 +997,9 @@
 	dev->irq = pdev->irq;
 
 	spin_lock_init(&rp->lock);
+	mutex_init(&rp->task_lock);
 	INIT_WORK(&rp->reset_task, rhine_reset_task);
+	INIT_WORK(&rp->slow_event_task, rhine_slow_event_task);
 
 	rp->mii_if.dev = dev;
 	rp->mii_if.mdio_read = mdio_read;
@@ -916,8 +1059,8 @@
 		}
 	}
 	rp->mii_if.phy_id = phy_id;
-	if (debug > 1 && avoid_D3)
-		netdev_info(dev, "No D3 power state at shutdown\n");
+	if (avoid_D3)
+		netif_info(rp, probe, dev, "No D3 power state at shutdown\n");
 
 	return 0;
 
@@ -1093,7 +1236,7 @@
 	struct rhine_private *rp = netdev_priv(dev);
 	void __iomem *ioaddr = rp->base;
 
-	mii_check_media(&rp->mii_if, debug, init_media);
+	mii_check_media(&rp->mii_if, netif_msg_link(rp), init_media);
 
 	if (rp->mii_if.full_duplex)
 	    iowrite8(ioread8(ioaddr + ChipCmd1) | Cmd1FDuplex,
@@ -1101,24 +1244,26 @@
 	else
 	    iowrite8(ioread8(ioaddr + ChipCmd1) & ~Cmd1FDuplex,
 		   ioaddr + ChipCmd1);
-	if (debug > 1)
-		netdev_info(dev, "force_media %d, carrier %d\n",
-			    rp->mii_if.force_media, netif_carrier_ok(dev));
+
+	netif_info(rp, link, dev, "force_media %d, carrier %d\n",
+		   rp->mii_if.force_media, netif_carrier_ok(dev));
 }
 
 /* Called after status of force_media possibly changed */
 static void rhine_set_carrier(struct mii_if_info *mii)
 {
+	struct net_device *dev = mii->dev;
+	struct rhine_private *rp = netdev_priv(dev);
+
 	if (mii->force_media) {
 		/* autoneg is off: Link is always assumed to be up */
-		if (!netif_carrier_ok(mii->dev))
-			netif_carrier_on(mii->dev);
-	}
-	else	/* Let MMI library update carrier status */
-		rhine_check_media(mii->dev, 0);
-	if (debug > 1)
-		netdev_info(mii->dev, "force_media %d, carrier %d\n",
-			    mii->force_media, netif_carrier_ok(mii->dev));
+		if (!netif_carrier_ok(dev))
+			netif_carrier_on(dev);
+	} else	/* Let MMI library update carrier status */
+		rhine_check_media(dev, 0);
+
+	netif_info(rp, link, dev, "force_media %d, carrier %d\n",
+		   mii->force_media, netif_carrier_ok(dev));
 }
 
 /**
@@ -1266,10 +1411,10 @@
 {
 	struct rhine_private *rp = netdev_priv(dev);
 
-	spin_lock_irq(&rp->lock);
+	spin_lock_bh(&rp->lock);
 	set_bit(vid, rp->active_vlans);
 	rhine_update_vcam(dev);
-	spin_unlock_irq(&rp->lock);
+	spin_unlock_bh(&rp->lock);
 	return 0;
 }
 
@@ -1277,10 +1422,10 @@
 {
 	struct rhine_private *rp = netdev_priv(dev);
 
-	spin_lock_irq(&rp->lock);
+	spin_lock_bh(&rp->lock);
 	clear_bit(vid, rp->active_vlans);
 	rhine_update_vcam(dev);
-	spin_unlock_irq(&rp->lock);
+	spin_unlock_bh(&rp->lock);
 	return 0;
 }
 
@@ -1310,12 +1455,7 @@
 
 	napi_enable(&rp->napi);
 
-	/* Enable interrupts by setting the interrupt mask. */
-	iowrite16(IntrRxDone | IntrRxErr | IntrRxEmpty| IntrRxOverflow |
-	       IntrRxDropped | IntrRxNoBuf | IntrTxAborted |
-	       IntrTxDone | IntrTxError | IntrTxUnderrun |
-	       IntrPCIErr | IntrStatsMax | IntrLinkChange,
-	       ioaddr + IntrEnable);
+	iowrite16(RHINE_EVENT & 0xffff, ioaddr + IntrEnable);
 
 	iowrite16(CmdStart | CmdTxOn | CmdRxOn | (Cmd1NoTxPoll << 8),
 	       ioaddr + ChipCmd);
@@ -1323,23 +1463,27 @@
 }
 
 /* Enable MII link status auto-polling (required for IntrLinkChange) */
-static void rhine_enable_linkmon(void __iomem *ioaddr)
+static void rhine_enable_linkmon(struct rhine_private *rp)
 {
+	void __iomem *ioaddr = rp->base;
+
 	iowrite8(0, ioaddr + MIICmd);
 	iowrite8(MII_BMSR, ioaddr + MIIRegAddr);
 	iowrite8(0x80, ioaddr + MIICmd);
 
-	RHINE_WAIT_FOR((ioread8(ioaddr + MIIRegAddr) & 0x20));
+	rhine_wait_bit_high(rp, MIIRegAddr, 0x20);
 
 	iowrite8(MII_BMSR | 0x40, ioaddr + MIIRegAddr);
 }
 
 /* Disable MII link status auto-polling (required for MDIO access) */
-static void rhine_disable_linkmon(void __iomem *ioaddr, u32 quirks)
+static void rhine_disable_linkmon(struct rhine_private *rp)
 {
+	void __iomem *ioaddr = rp->base;
+
 	iowrite8(0, ioaddr + MIICmd);
 
-	if (quirks & rqRhineI) {
+	if (rp->quirks & rqRhineI) {
 		iowrite8(0x01, ioaddr + MIIRegAddr);	// MII_BMSR
 
 		/* Can be called from ISR. Evil. */
@@ -1348,13 +1492,13 @@
 		/* 0x80 must be set immediately before turning it off */
 		iowrite8(0x80, ioaddr + MIICmd);
 
-		RHINE_WAIT_FOR(ioread8(ioaddr + MIIRegAddr) & 0x20);
+		rhine_wait_bit_high(rp, MIIRegAddr, 0x20);
 
 		/* Heh. Now clear 0x80 again. */
 		iowrite8(0, ioaddr + MIICmd);
 	}
 	else
-		RHINE_WAIT_FOR(ioread8(ioaddr + MIIRegAddr) & 0x80);
+		rhine_wait_bit_high(rp, MIIRegAddr, 0x80);
 }
 
 /* Read and write over the MII Management Data I/O (MDIO) interface. */
@@ -1365,16 +1509,16 @@
 	void __iomem *ioaddr = rp->base;
 	int result;
 
-	rhine_disable_linkmon(ioaddr, rp->quirks);
+	rhine_disable_linkmon(rp);
 
 	/* rhine_disable_linkmon already cleared MIICmd */
 	iowrite8(phy_id, ioaddr + MIIPhyAddr);
 	iowrite8(regnum, ioaddr + MIIRegAddr);
 	iowrite8(0x40, ioaddr + MIICmd);		/* Trigger read */
-	RHINE_WAIT_FOR(!(ioread8(ioaddr + MIICmd) & 0x40));
+	rhine_wait_bit_low(rp, MIICmd, 0x40);
 	result = ioread16(ioaddr + MIIData);
 
-	rhine_enable_linkmon(ioaddr);
+	rhine_enable_linkmon(rp);
 	return result;
 }
 
@@ -1383,16 +1527,33 @@
 	struct rhine_private *rp = netdev_priv(dev);
 	void __iomem *ioaddr = rp->base;
 
-	rhine_disable_linkmon(ioaddr, rp->quirks);
+	rhine_disable_linkmon(rp);
 
 	/* rhine_disable_linkmon already cleared MIICmd */
 	iowrite8(phy_id, ioaddr + MIIPhyAddr);
 	iowrite8(regnum, ioaddr + MIIRegAddr);
 	iowrite16(value, ioaddr + MIIData);
 	iowrite8(0x20, ioaddr + MIICmd);		/* Trigger write */
-	RHINE_WAIT_FOR(!(ioread8(ioaddr + MIICmd) & 0x20));
+	rhine_wait_bit_low(rp, MIICmd, 0x20);
 
-	rhine_enable_linkmon(ioaddr);
+	rhine_enable_linkmon(rp);
+}
+
+static void rhine_task_disable(struct rhine_private *rp)
+{
+	mutex_lock(&rp->task_lock);
+	rp->task_enable = false;
+	mutex_unlock(&rp->task_lock);
+
+	cancel_work_sync(&rp->slow_event_task);
+	cancel_work_sync(&rp->reset_task);
+}
+
+static void rhine_task_enable(struct rhine_private *rp)
+{
+	mutex_lock(&rp->task_lock);
+	rp->task_enable = true;
+	mutex_unlock(&rp->task_lock);
 }
 
 static int rhine_open(struct net_device *dev)
@@ -1406,8 +1567,7 @@
 	if (rc)
 		return rc;
 
-	if (debug > 1)
-		netdev_dbg(dev, "%s() irq %d\n", __func__, rp->pdev->irq);
+	netif_dbg(rp, ifup, dev, "%s() irq %d\n", __func__, rp->pdev->irq);
 
 	rc = alloc_ring(dev);
 	if (rc) {
@@ -1417,11 +1577,12 @@
 	alloc_rbufs(dev);
 	alloc_tbufs(dev);
 	rhine_chip_reset(dev);
+	rhine_task_enable(rp);
 	init_registers(dev);
-	if (debug > 2)
-		netdev_dbg(dev, "%s() Done - status %04x MII status: %04x\n",
-			   __func__, ioread16(ioaddr + ChipCmd),
-			   mdio_read(dev, rp->mii_if.phy_id, MII_BMSR));
+
+	netif_dbg(rp, ifup, dev, "%s() Done - status %04x MII status: %04x\n",
+		  __func__, ioread16(ioaddr + ChipCmd),
+		  mdio_read(dev, rp->mii_if.phy_id, MII_BMSR));
 
 	netif_start_queue(dev);
 
@@ -1434,11 +1595,12 @@
 						reset_task);
 	struct net_device *dev = rp->dev;
 
-	/* protect against concurrent rx interrupts */
-	disable_irq(rp->pdev->irq);
+	mutex_lock(&rp->task_lock);
+
+	if (!rp->task_enable)
+		goto out_unlock;
 
 	napi_disable(&rp->napi);
-
 	spin_lock_bh(&rp->lock);
 
 	/* clear all descriptors */
@@ -1452,11 +1614,13 @@
 	init_registers(dev);
 
 	spin_unlock_bh(&rp->lock);
-	enable_irq(rp->pdev->irq);
 
 	dev->trans_start = jiffies; /* prevent tx timeout */
 	dev->stats.tx_errors++;
 	netif_wake_queue(dev);
+
+out_unlock:
+	mutex_unlock(&rp->task_lock);
 }
 
 static void rhine_tx_timeout(struct net_device *dev)
@@ -1477,7 +1641,6 @@
 	struct rhine_private *rp = netdev_priv(dev);
 	void __iomem *ioaddr = rp->base;
 	unsigned entry;
-	unsigned long flags;
 
 	/* Caution: the write order is important here, set the field
 	   with the "ownership" bits last. */
@@ -1529,7 +1692,6 @@
 		rp->tx_ring[entry].tx_status = 0;
 
 	/* lock eth irq */
-	spin_lock_irqsave(&rp->lock, flags);
 	wmb();
 	rp->tx_ring[entry].tx_status |= cpu_to_le32(DescOwn);
 	wmb();
@@ -1550,78 +1712,43 @@
 	if (rp->cur_tx == rp->dirty_tx + TX_QUEUE_LEN)
 		netif_stop_queue(dev);
 
-	spin_unlock_irqrestore(&rp->lock, flags);
+	netif_dbg(rp, tx_queued, dev, "Transmit frame #%d queued in slot %d\n",
+		  rp->cur_tx - 1, entry);
 
-	if (debug > 4) {
-		netdev_dbg(dev, "Transmit frame #%d queued in slot %d\n",
-			   rp->cur_tx-1, entry);
-	}
 	return NETDEV_TX_OK;
 }
 
+static void rhine_irq_disable(struct rhine_private *rp)
+{
+	iowrite16(0x0000, rp->base + IntrEnable);
+	mmiowb();
+}
+
 /* The interrupt handler does all of the Rx thread work and cleans up
    after the Tx thread. */
 static irqreturn_t rhine_interrupt(int irq, void *dev_instance)
 {
 	struct net_device *dev = dev_instance;
 	struct rhine_private *rp = netdev_priv(dev);
-	void __iomem *ioaddr = rp->base;
-	u32 intr_status;
-	int boguscnt = max_interrupt_work;
+	u32 status;
 	int handled = 0;
 
-	while ((intr_status = get_intr_status(dev))) {
+	status = rhine_get_events(rp);
+
+	netif_dbg(rp, intr, dev, "Interrupt, status %08x\n", status);
+
+	if (status & RHINE_EVENT) {
 		handled = 1;
 
-		/* Acknowledge all of the current interrupt sources ASAP. */
-		if (intr_status & IntrTxDescRace)
-			iowrite8(0x08, ioaddr + IntrStatus2);
-		iowrite16(intr_status & 0xffff, ioaddr + IntrStatus);
-		IOSYNC;
-
-		if (debug > 4)
-			netdev_dbg(dev, "Interrupt, status %08x\n",
-				   intr_status);
-
-		if (intr_status & (IntrRxDone | IntrRxErr | IntrRxDropped |
-				   IntrRxWakeUp | IntrRxEmpty | IntrRxNoBuf)) {
-			iowrite16(IntrTxAborted |
-				  IntrTxDone | IntrTxError | IntrTxUnderrun |
-				  IntrPCIErr | IntrStatsMax | IntrLinkChange,
-				  ioaddr + IntrEnable);
-
-			napi_schedule(&rp->napi);
-		}
-
-		if (intr_status & (IntrTxErrSummary | IntrTxDone)) {
-			if (intr_status & IntrTxErrSummary) {
-				/* Avoid scavenging before Tx engine turned off */
-				RHINE_WAIT_FOR(!(ioread8(ioaddr+ChipCmd) & CmdTxOn));
-				if (debug > 2 &&
-				    ioread8(ioaddr+ChipCmd) & CmdTxOn)
-					netdev_warn(dev,
-						    "%s: Tx engine still on\n",
-						    __func__);
-			}
-			rhine_tx(dev);
-		}
-
-		/* Abnormal error summary/uncommon events handlers. */
-		if (intr_status & (IntrPCIErr | IntrLinkChange |
-				   IntrStatsMax | IntrTxError | IntrTxAborted |
-				   IntrTxUnderrun | IntrTxDescRace))
-			rhine_error(dev, intr_status);
-
-		if (--boguscnt < 0) {
-			netdev_warn(dev, "Too much work at interrupt, status=%#08x\n",
-				    intr_status);
-			break;
-		}
+		rhine_irq_disable(rp);
+		napi_schedule(&rp->napi);
 	}
 
-	if (debug > 3)
-		netdev_dbg(dev, "exiting interrupt, status=%08x\n",
-			   ioread16(ioaddr + IntrStatus));
+	if (status & ~(IntrLinkChange | IntrStatsMax | RHINE_EVENT_NAPI)) {
+		netif_err(rp, intr, dev, "Something Wicked happened! %08x\n",
+			  status);
+	}
+
 	return IRQ_RETVAL(handled);
 }
 
@@ -1632,20 +1759,16 @@
 	struct rhine_private *rp = netdev_priv(dev);
 	int txstatus = 0, entry = rp->dirty_tx % TX_RING_SIZE;
 
-	spin_lock(&rp->lock);
-
 	/* find and cleanup dirty tx descriptors */
 	while (rp->dirty_tx != rp->cur_tx) {
 		txstatus = le32_to_cpu(rp->tx_ring[entry].tx_status);
-		if (debug > 6)
-			netdev_dbg(dev, "Tx scavenge %d status %08x\n",
-				   entry, txstatus);
+		netif_dbg(rp, tx_done, dev, "Tx scavenge %d status %08x\n",
+			  entry, txstatus);
 		if (txstatus & DescOwn)
 			break;
 		if (txstatus & 0x8000) {
-			if (debug > 1)
-				netdev_dbg(dev, "Transmit error, Tx status %08x\n",
-					   txstatus);
+			netif_dbg(rp, tx_done, dev,
+				  "Transmit error, Tx status %08x\n", txstatus);
 			dev->stats.tx_errors++;
 			if (txstatus & 0x0400)
 				dev->stats.tx_carrier_errors++;
@@ -1667,10 +1790,8 @@
 				dev->stats.collisions += (txstatus >> 3) & 0x0F;
 			else
 				dev->stats.collisions += txstatus & 0x0F;
-			if (debug > 6)
-				netdev_dbg(dev, "collisions: %1.1x:%1.1x\n",
-					   (txstatus >> 3) & 0xF,
-					   txstatus & 0xF);
+			netif_dbg(rp, tx_done, dev, "collisions: %1.1x:%1.1x\n",
+				  (txstatus >> 3) & 0xF, txstatus & 0xF);
 			dev->stats.tx_bytes += rp->tx_skbuff[entry]->len;
 			dev->stats.tx_packets++;
 		}
@@ -1687,8 +1808,6 @@
 	}
 	if ((rp->cur_tx - rp->dirty_tx) < TX_QUEUE_LEN - 4)
 		netif_wake_queue(dev);
-
-	spin_unlock(&rp->lock);
 }
 
 /**
@@ -1713,11 +1832,8 @@
 	int count;
 	int entry = rp->cur_rx % RX_RING_SIZE;
 
-	if (debug > 4) {
-		netdev_dbg(dev, "%s(), entry %d status %08x\n",
-			   __func__, entry,
-			   le32_to_cpu(rp->rx_head_desc->rx_status));
-	}
+	netif_dbg(rp, rx_status, dev, "%s(), entry %d status %08x\n", __func__,
+		  entry, le32_to_cpu(rp->rx_head_desc->rx_status));
 
 	/* If EOP is set on the next entry, it's a new packet. Send it up. */
 	for (count = 0; count < limit; ++count) {
@@ -1729,9 +1845,8 @@
 		if (desc_status & DescOwn)
 			break;
 
-		if (debug > 4)
-			netdev_dbg(dev, "%s() status is %08x\n",
-				   __func__, desc_status);
+		netif_dbg(rp, rx_status, dev, "%s() status %08x\n", __func__,
+			  desc_status);
 
 		if ((desc_status & (RxWholePkt | RxErr)) != RxWholePkt) {
 			if ((desc_status & RxWholePkt) != RxWholePkt) {
@@ -1747,9 +1862,9 @@
 				dev->stats.rx_length_errors++;
 			} else if (desc_status & RxErr) {
 				/* There was a error. */
-				if (debug > 2)
-					netdev_dbg(dev, "%s() Rx error was %08x\n",
-						   __func__, desc_status);
+				netif_dbg(rp, rx_err, dev,
+					  "%s() Rx error %08x\n", __func__,
+					  desc_status);
 				dev->stats.rx_errors++;
 				if (desc_status & 0x0030)
 					dev->stats.rx_length_errors++;
@@ -1839,19 +1954,6 @@
 	return count;
 }
 
-/*
- * Clears the "tally counters" for CRC errors and missed frames(?).
- * It has been reported that some chips need a write of 0 to clear
- * these, for others the counters are set to 1 when written to and
- * instead cleared when read. So we clear them both ways ...
- */
-static inline void clear_tally_counters(void __iomem *ioaddr)
-{
-	iowrite32(0, ioaddr + RxMissed);
-	ioread16(ioaddr + RxCRCErrs);
-	ioread16(ioaddr + RxMissed);
-}
-
 static void rhine_restart_tx(struct net_device *dev) {
 	struct rhine_private *rp = netdev_priv(dev);
 	void __iomem *ioaddr = rp->base;
@@ -1862,7 +1964,7 @@
 	 * If new errors occurred, we need to sort them out before doing Tx.
 	 * In that case the ISR will be back here RSN anyway.
 	 */
-	intr_status = get_intr_status(dev);
+	intr_status = rhine_get_events(rp);
 
 	if ((intr_status & IntrTxErrSummary) == 0) {
 
@@ -1883,79 +1985,50 @@
 	}
 	else {
 		/* This should never happen */
-		if (debug > 1)
-			netdev_warn(dev, "%s() Another error occurred %08x\n",
-				   __func__, intr_status);
+		netif_warn(rp, tx_err, dev, "another error occurred %08x\n",
+			   intr_status);
 	}
 
 }
 
-static void rhine_error(struct net_device *dev, int intr_status)
+static void rhine_slow_event_task(struct work_struct *work)
 {
-	struct rhine_private *rp = netdev_priv(dev);
-	void __iomem *ioaddr = rp->base;
+	struct rhine_private *rp =
+		container_of(work, struct rhine_private, slow_event_task);
+	struct net_device *dev = rp->dev;
+	u32 intr_status;
 
-	spin_lock(&rp->lock);
+	mutex_lock(&rp->task_lock);
+
+	if (!rp->task_enable)
+		goto out_unlock;
+
+	intr_status = rhine_get_events(rp);
+	rhine_ack_events(rp, intr_status & RHINE_EVENT_SLOW);
 
 	if (intr_status & IntrLinkChange)
 		rhine_check_media(dev, 0);
-	if (intr_status & IntrStatsMax) {
-		dev->stats.rx_crc_errors += ioread16(ioaddr + RxCRCErrs);
-		dev->stats.rx_missed_errors += ioread16(ioaddr + RxMissed);
-		clear_tally_counters(ioaddr);
-	}
-	if (intr_status & IntrTxAborted) {
-		if (debug > 1)
-			netdev_info(dev, "Abort %08x, frame dropped\n",
-				    intr_status);
-	}
-	if (intr_status & IntrTxUnderrun) {
-		if (rp->tx_thresh < 0xE0)
-			BYTE_REG_BITS_SET((rp->tx_thresh += 0x20), 0x80, ioaddr + TxConfig);
-		if (debug > 1)
-			netdev_info(dev, "Transmitter underrun, Tx threshold now %02x\n",
-				    rp->tx_thresh);
-	}
-	if (intr_status & IntrTxDescRace) {
-		if (debug > 2)
-			netdev_info(dev, "Tx descriptor write-back race\n");
-	}
-	if ((intr_status & IntrTxError) &&
-	    (intr_status & (IntrTxAborted |
-	     IntrTxUnderrun | IntrTxDescRace)) == 0) {
-		if (rp->tx_thresh < 0xE0) {
-			BYTE_REG_BITS_SET((rp->tx_thresh += 0x20), 0x80, ioaddr + TxConfig);
-		}
-		if (debug > 1)
-			netdev_info(dev, "Unspecified error. Tx threshold now %02x\n",
-				    rp->tx_thresh);
-	}
-	if (intr_status & (IntrTxAborted | IntrTxUnderrun | IntrTxDescRace |
-			   IntrTxError))
-		rhine_restart_tx(dev);
 
-	if (intr_status & ~(IntrLinkChange | IntrStatsMax | IntrTxUnderrun |
-			    IntrTxError | IntrTxAborted | IntrNormalSummary |
-			    IntrTxDescRace)) {
-		if (debug > 1)
-			netdev_err(dev, "Something Wicked happened! %08x\n",
-				   intr_status);
-	}
+	if (intr_status & IntrPCIErr)
+		netif_warn(rp, hw, dev, "PCI error\n");
 
-	spin_unlock(&rp->lock);
+	napi_disable(&rp->napi);
+	rhine_irq_disable(rp);
+	/* Slow and safe. Consider __napi_schedule as a replacement ? */
+	napi_enable(&rp->napi);
+	napi_schedule(&rp->napi);
+
+out_unlock:
+	mutex_unlock(&rp->task_lock);
 }
 
 static struct net_device_stats *rhine_get_stats(struct net_device *dev)
 {
 	struct rhine_private *rp = netdev_priv(dev);
-	void __iomem *ioaddr = rp->base;
-	unsigned long flags;
 
-	spin_lock_irqsave(&rp->lock, flags);
-	dev->stats.rx_crc_errors += ioread16(ioaddr + RxCRCErrs);
-	dev->stats.rx_missed_errors += ioread16(ioaddr + RxMissed);
-	clear_tally_counters(ioaddr);
-	spin_unlock_irqrestore(&rp->lock, flags);
+	spin_lock_bh(&rp->lock);
+	rhine_update_rx_crc_and_missed_errord(rp);
+	spin_unlock_bh(&rp->lock);
 
 	return &dev->stats;
 }
@@ -2022,9 +2095,9 @@
 	struct rhine_private *rp = netdev_priv(dev);
 	int rc;
 
-	spin_lock_irq(&rp->lock);
+	mutex_lock(&rp->task_lock);
 	rc = mii_ethtool_gset(&rp->mii_if, cmd);
-	spin_unlock_irq(&rp->lock);
+	mutex_unlock(&rp->task_lock);
 
 	return rc;
 }
@@ -2034,10 +2107,10 @@
 	struct rhine_private *rp = netdev_priv(dev);
 	int rc;
 
-	spin_lock_irq(&rp->lock);
+	mutex_lock(&rp->task_lock);
 	rc = mii_ethtool_sset(&rp->mii_if, cmd);
-	spin_unlock_irq(&rp->lock);
 	rhine_set_carrier(&rp->mii_if);
+	mutex_unlock(&rp->task_lock);
 
 	return rc;
 }
@@ -2058,12 +2131,16 @@
 
 static u32 netdev_get_msglevel(struct net_device *dev)
 {
-	return debug;
+	struct rhine_private *rp = netdev_priv(dev);
+
+	return rp->msg_enable;
 }
 
 static void netdev_set_msglevel(struct net_device *dev, u32 value)
 {
-	debug = value;
+	struct rhine_private *rp = netdev_priv(dev);
+
+	rp->msg_enable = value;
 }
 
 static void rhine_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
@@ -2119,10 +2196,10 @@
 	if (!netif_running(dev))
 		return -EINVAL;
 
-	spin_lock_irq(&rp->lock);
+	mutex_lock(&rp->task_lock);
 	rc = generic_mii_ioctl(&rp->mii_if, if_mii(rq), cmd, NULL);
-	spin_unlock_irq(&rp->lock);
 	rhine_set_carrier(&rp->mii_if);
+	mutex_unlock(&rp->task_lock);
 
 	return rc;
 }
@@ -2132,27 +2209,21 @@
 	struct rhine_private *rp = netdev_priv(dev);
 	void __iomem *ioaddr = rp->base;
 
+	rhine_task_disable(rp);
 	napi_disable(&rp->napi);
-	cancel_work_sync(&rp->reset_task);
 	netif_stop_queue(dev);
 
-	spin_lock_irq(&rp->lock);
-
-	if (debug > 1)
-		netdev_dbg(dev, "Shutting down ethercard, status was %04x\n",
-			   ioread16(ioaddr + ChipCmd));
+	netif_dbg(rp, ifdown, dev, "Shutting down ethercard, status was %04x\n",
+		  ioread16(ioaddr + ChipCmd));
 
 	/* Switch to loopback mode to avoid hardware races. */
 	iowrite8(rp->tx_thresh | 0x02, ioaddr + TxConfig);
 
-	/* Disable interrupts by clearing the interrupt mask. */
-	iowrite16(0x0000, ioaddr + IntrEnable);
+	rhine_irq_disable(rp);
 
 	/* Stop the chip's Tx and Rx processes. */
 	iowrite16(CmdStop, ioaddr + ChipCmd);
 
-	spin_unlock_irq(&rp->lock);
-
 	free_irq(rp->pdev->irq, dev);
 	free_rbufs(dev);
 	free_tbufs(dev);
@@ -2192,6 +2263,8 @@
 	if (rp->quirks & rq6patterns)
 		iowrite8(0x04, ioaddr + WOLcgClr);
 
+	spin_lock(&rp->lock);
+
 	if (rp->wolopts & WAKE_MAGIC) {
 		iowrite8(WOLmagic, ioaddr + WOLcrSet);
 		/*
@@ -2216,58 +2289,46 @@
 		iowrite8(ioread8(ioaddr + StickyHW) | 0x04, ioaddr + StickyHW);
 	}
 
-	/* Hit power state D3 (sleep) */
-	if (!avoid_D3)
+	spin_unlock(&rp->lock);
+
+	if (system_state == SYSTEM_POWER_OFF && !avoid_D3) {
 		iowrite8(ioread8(ioaddr + StickyHW) | 0x03, ioaddr + StickyHW);
 
-	/* TODO: Check use of pci_enable_wake() */
-
+		pci_wake_from_d3(pdev, true);
+		pci_set_power_state(pdev, PCI_D3hot);
+	}
 }
 
-#ifdef CONFIG_PM
-static int rhine_suspend(struct pci_dev *pdev, pm_message_t state)
+#ifdef CONFIG_PM_SLEEP
+static int rhine_suspend(struct device *device)
 {
+	struct pci_dev *pdev = to_pci_dev(device);
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct rhine_private *rp = netdev_priv(dev);
-	unsigned long flags;
 
 	if (!netif_running(dev))
 		return 0;
 
+	rhine_task_disable(rp);
+	rhine_irq_disable(rp);
 	napi_disable(&rp->napi);
 
 	netif_device_detach(dev);
-	pci_save_state(pdev);
 
-	spin_lock_irqsave(&rp->lock, flags);
 	rhine_shutdown(pdev);
-	spin_unlock_irqrestore(&rp->lock, flags);
 
-	free_irq(dev->irq, dev);
 	return 0;
 }
 
-static int rhine_resume(struct pci_dev *pdev)
+static int rhine_resume(struct device *device)
 {
+	struct pci_dev *pdev = to_pci_dev(device);
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct rhine_private *rp = netdev_priv(dev);
-	unsigned long flags;
-	int ret;
 
 	if (!netif_running(dev))
 		return 0;
 
-	if (request_irq(dev->irq, rhine_interrupt, IRQF_SHARED, dev->name, dev))
-		netdev_err(dev, "request_irq failed\n");
-
-	ret = pci_set_power_state(pdev, PCI_D0);
-	if (debug > 1)
-		netdev_info(dev, "Entering power state D0 %s (%d)\n",
-			    ret ? "failed" : "succeeded", ret);
-
-	pci_restore_state(pdev);
-
-	spin_lock_irqsave(&rp->lock, flags);
 #ifdef USE_MMIO
 	enable_mmio(rp->pioaddr, rp->quirks);
 #endif
@@ -2276,25 +2337,32 @@
 	free_rbufs(dev);
 	alloc_tbufs(dev);
 	alloc_rbufs(dev);
+	rhine_task_enable(rp);
+	spin_lock_bh(&rp->lock);
 	init_registers(dev);
-	spin_unlock_irqrestore(&rp->lock, flags);
+	spin_unlock_bh(&rp->lock);
 
 	netif_device_attach(dev);
 
 	return 0;
 }
-#endif /* CONFIG_PM */
+
+static SIMPLE_DEV_PM_OPS(rhine_pm_ops, rhine_suspend, rhine_resume);
+#define RHINE_PM_OPS	(&rhine_pm_ops)
+
+#else
+
+#define RHINE_PM_OPS	NULL
+
+#endif /* !CONFIG_PM_SLEEP */
 
 static struct pci_driver rhine_driver = {
 	.name		= DRV_NAME,
 	.id_table	= rhine_pci_tbl,
 	.probe		= rhine_init_one,
 	.remove		= __devexit_p(rhine_remove_one),
-#ifdef CONFIG_PM
-	.suspend	= rhine_suspend,
-	.resume		= rhine_resume,
-#endif /* CONFIG_PM */
-	.shutdown =	rhine_shutdown,
+	.shutdown	= rhine_shutdown,
+	.driver.pm	= RHINE_PM_OPS,
 };
 
 static struct dmi_system_id __initdata rhine_dmi_table[] = {

diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index f45c85a..72a854f 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c

@@ -529,7 +529,7 @@
 	mdio_bus->name = "IXP4xx MII Bus";
 	mdio_bus->read = &ixp4xx_mdio_read;
 	mdio_bus->write = &ixp4xx_mdio_write;
-	strcpy(mdio_bus->id, "0");
+	snprintf(mdio_bus->id, MII_BUS_ID_SIZE, "ixp4xx-eth-0");
 
 	if ((err = mdiobus_register(mdio_bus)))
 		mdiobus_free(mdio_bus);

diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index 9663e0b..ba3c591 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c

@@ -1159,7 +1159,7 @@
 			}
 		}
 		spin_unlock_irqrestore(&dp83640->rx_lock, flags);
-		netif_rx(skb);
+		netif_rx_ni(skb);
 	}
 
 	/* Clear out expired time stamps. */

diff --git a/drivers/net/phy/fixed.c b/drivers/net/phy/fixed.c
index 1fa4d73..633680d 100644
--- a/drivers/net/phy/fixed.c
+++ b/drivers/net/phy/fixed.c

@@ -220,7 +220,7 @@
 		goto err_mdiobus_reg;
 	}
 
-	snprintf(fmb->mii_bus->id, MII_BUS_ID_SIZE, "0");
+	snprintf(fmb->mii_bus->id, MII_BUS_ID_SIZE, "fixed-0");
 	fmb->mii_bus->name = "Fixed MDIO Bus";
 	fmb->mii_bus->priv = fmb;
 	fmb->mii_bus->parent = &pdev->dev;

diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c
index 89c5a3e..50e8e5e 100644
--- a/drivers/net/phy/mdio-gpio.c
+++ b/drivers/net/phy/mdio-gpio.c

@@ -116,7 +116,7 @@
 		if (!new_bus->irq[i])
 			new_bus->irq[i] = PHY_POLL;
 
-	snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", bus_id);
+	snprintf(new_bus->id, MII_BUS_ID_SIZE, "gpio-%x", bus_id);
 
 	if (gpio_request(bitbang->mdc, "mdc"))
 		goto out_free_bus;

diff --git a/drivers/net/phy/mdio-octeon.c b/drivers/net/phy/mdio-octeon.c
index bd12ba9..826d961 100644
--- a/drivers/net/phy/mdio-octeon.c
+++ b/drivers/net/phy/mdio-octeon.c

@@ -118,7 +118,8 @@
 	bus->mii_bus->priv = bus;
 	bus->mii_bus->irq = bus->phy_irq;
 	bus->mii_bus->name = "mdio-octeon";
-	snprintf(bus->mii_bus->id, MII_BUS_ID_SIZE, "%x", bus->unit);
+	snprintf(bus->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
+		bus->mii_bus->name, bus->unit);
 	bus->mii_bus->parent = &pdev->dev;
 
 	bus->mii_bus->read = octeon_mdiobus_read;

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 6c58da2..88cc5db 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c

@@ -37,22 +37,36 @@
 #include <asm/uaccess.h>
 
 /**
- * mdiobus_alloc - allocate a mii_bus structure
+ * mdiobus_alloc_size - allocate a mii_bus structure
  *
  * Description: called by a bus driver to allocate an mii_bus
  * structure to fill in.
+ *
+ * 'size' is an an extra amount of memory to allocate for private storage.
+ * If non-zero, then bus->priv is points to that memory.
  */
-struct mii_bus *mdiobus_alloc(void)
+struct mii_bus *mdiobus_alloc_size(size_t size)
 {
 	struct mii_bus *bus;
+	size_t aligned_size = ALIGN(sizeof(*bus), NETDEV_ALIGN);
+	size_t alloc_size;
 
-	bus = kzalloc(sizeof(*bus), GFP_KERNEL);
-	if (bus != NULL)
+	/* If we alloc extra space, it should be aligned */
+	if (size)
+		alloc_size = aligned_size + size;
+	else
+		alloc_size = sizeof(*bus);
+
+	bus = kzalloc(alloc_size, GFP_KERNEL);
+	if (bus) {
 		bus->state = MDIOBUS_ALLOCATED;
+		if (size)
+			bus->priv = (void *)bus + aligned_size;
+	}
 
 	return bus;
 }
-EXPORT_SYMBOL(mdiobus_alloc);
+EXPORT_SYMBOL(mdiobus_alloc_size);
 
 /**
  * mdiobus_release - mii_bus device release callback

diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index c1c9293..df884dd 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c

@@ -585,8 +585,8 @@
 	po = pppox_sk(sk);
 	opt = &po->proto.pptp;
 
-	opt->seq_sent = 0; opt->seq_recv = 0;
-	opt->ack_recv = 0; opt->ack_sent = 0;
+	opt->seq_sent = 0; opt->seq_recv = 0xffffffff;
+	opt->ack_recv = 0; opt->ack_sent = 0xffffffff;
 
 	error = 0;
 out:

diff --git a/drivers/net/usb/asix.c b/drivers/net/usb/asix.c
index d0937c4..8e84f5b 100644
--- a/drivers/net/usb/asix.c
+++ b/drivers/net/usb/asix.c

@@ -978,6 +978,7 @@
 
 static int ax88772_reset(struct usbnet *dev)
 {
+	struct asix_data *data = (struct asix_data *)&dev->data;
 	int ret, embd_phy;
 	u16 rx_ctl;
 
@@ -1055,6 +1056,13 @@
 		goto out;
 	}
 
+	/* Rewrite MAC address */
+	memcpy(data->mac_addr, dev->net->dev_addr, ETH_ALEN);
+	ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN,
+							data->mac_addr);
+	if (ret < 0)
+		goto out;
+
 	/* Set RX_CTL to default values with 2k buffer, and enable cactus */
 	ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL);
 	if (ret < 0)
@@ -1320,6 +1328,13 @@
 	if (ret < 0)
 		return ret;
 
+	/* Rewrite MAC address */
+	memcpy(data->mac_addr, dev->net->dev_addr, ETH_ALEN);
+	ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN,
+							data->mac_addr);
+	if (ret < 0)
+		return ret;
+
 	ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL);
 	if (ret < 0)
 		return ret;

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 76fe14e..4880aa8 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c

@@ -370,7 +370,7 @@
 
 	skb_to_sgvec(skb, vi->rx_sg + 1, 0, skb->len);
 
-	err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, 2, skb, gfp);
+	err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, 2, skb, gfp);
 	if (err < 0)
 		dev_kfree_skb(skb);
 
@@ -415,8 +415,8 @@
 
 	/* chain first in list head */
 	first->private = (unsigned long)list;
-	err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, MAX_SKB_FRAGS + 2,
-				    first, gfp);
+	err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, MAX_SKB_FRAGS + 2,
+				first, gfp);
 	if (err < 0)
 		give_pages(vi, first);
 
@@ -434,7 +434,7 @@
 
 	sg_init_one(vi->rx_sg, page_address(page), PAGE_SIZE);
 
-	err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, 1, page, gfp);
+	err = virtqueue_add_buf(vi->rvq, vi->rx_sg, 0, 1, page, gfp);
 	if (err < 0)
 		give_pages(vi, page);
 
@@ -609,7 +609,7 @@
 
 	hdr->num_sg = skb_to_sgvec(skb, vi->tx_sg + 1, 0, skb->len) + 1;
 	return virtqueue_add_buf(vi->svq, vi->tx_sg, hdr->num_sg,
-					0, skb);
+				 0, skb, GFP_ATOMIC);
 }
 
 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -767,7 +767,7 @@
 		sg_set_buf(&sg[i + 1], sg_virt(s), s->length);
 	sg_set_buf(&sg[out + in - 1], &status, sizeof(status));
 
-	BUG_ON(virtqueue_add_buf(vi->cvq, sg, out, in, vi) < 0);
+	BUG_ON(virtqueue_add_buf(vi->cvq, sg, out, in, vi, GFP_ATOMIC) < 0);
 
 	virtqueue_kick(vi->cvq);
 
@@ -985,15 +985,38 @@
 	virtnet_update_status(vi);
 }
 
+static int init_vqs(struct virtnet_info *vi)
+{
+	struct virtqueue *vqs[3];
+	vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL};
+	const char *names[] = { "input", "output", "control" };
+	int nvqs, err;
+
+	/* We expect two virtqueues, receive then send,
+	 * and optionally control. */
+	nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
+
+	err = vi->vdev->config->find_vqs(vi->vdev, nvqs, vqs, callbacks, names);
+	if (err)
+		return err;
+
+	vi->rvq = vqs[0];
+	vi->svq = vqs[1];
+
+	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
+		vi->cvq = vqs[2];
+
+		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
+			vi->dev->features |= NETIF_F_HW_VLAN_FILTER;
+	}
+	return 0;
+}
+
 static int virtnet_probe(struct virtio_device *vdev)
 {
 	int err;
 	struct net_device *dev;
 	struct virtnet_info *vi;
-	struct virtqueue *vqs[3];
-	vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL};
-	const char *names[] = { "input", "output", "control" };
-	int nvqs;
 
 	/* Allocate ourselves a network device with room for our info */
 	dev = alloc_etherdev(sizeof(struct virtnet_info));
@@ -1065,24 +1088,10 @@
 	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
 		vi->mergeable_rx_bufs = true;
 
-	/* We expect two virtqueues, receive then send,
-	 * and optionally control. */
-	nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
-
-	err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
+	err = init_vqs(vi);
 	if (err)
 		goto free_stats;
 
-	vi->rvq = vqs[0];
-	vi->svq = vqs[1];
-
-	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
-		vi->cvq = vqs[2];
-
-		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
-			dev->features |= NETIF_F_HW_VLAN_FILTER;
-	}
-
 	err = register_netdev(dev);
 	if (err) {
 		pr_debug("virtio_net: registering device failed\n");
@@ -1144,27 +1153,73 @@
 	BUG_ON(vi->num != 0);
 }
 
-static void __devexit virtnet_remove(struct virtio_device *vdev)
+static void remove_vq_common(struct virtnet_info *vi)
 {
-	struct virtnet_info *vi = vdev->priv;
-
-	/* Stop all the virtqueues. */
-	vdev->config->reset(vdev);
-
-	unregister_netdev(vi->dev);
+	vi->vdev->config->reset(vi->vdev);
 
 	/* Free unused buffers in both send and recv, if any. */
 	free_unused_bufs(vi);
 
-	vdev->config->del_vqs(vi->vdev);
+	vi->vdev->config->del_vqs(vi->vdev);
 
 	while (vi->pages)
 		__free_pages(get_a_page(vi, GFP_KERNEL), 0);
+}
+
+static void __devexit virtnet_remove(struct virtio_device *vdev)
+{
+	struct virtnet_info *vi = vdev->priv;
+
+	unregister_netdev(vi->dev);
+
+	remove_vq_common(vi);
 
 	free_percpu(vi->stats);
 	free_netdev(vi->dev);
 }
 
+#ifdef CONFIG_PM
+static int virtnet_freeze(struct virtio_device *vdev)
+{
+	struct virtnet_info *vi = vdev->priv;
+
+	virtqueue_disable_cb(vi->rvq);
+	virtqueue_disable_cb(vi->svq);
+	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ))
+		virtqueue_disable_cb(vi->cvq);
+
+	netif_device_detach(vi->dev);
+	cancel_delayed_work_sync(&vi->refill);
+
+	if (netif_running(vi->dev))
+		napi_disable(&vi->napi);
+
+	remove_vq_common(vi);
+
+	return 0;
+}
+
+static int virtnet_restore(struct virtio_device *vdev)
+{
+	struct virtnet_info *vi = vdev->priv;
+	int err;
+
+	err = init_vqs(vi);
+	if (err)
+		return err;
+
+	if (netif_running(vi->dev))
+		virtnet_napi_enable(vi);
+
+	netif_device_attach(vi->dev);
+
+	if (!try_fill_recv(vi, GFP_KERNEL))
+		queue_delayed_work(system_nrt_wq, &vi->refill, 0);
+
+	return 0;
+}
+#endif
+
 static struct virtio_device_id id_table[] = {
 	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
 	{ 0 },
@@ -1189,6 +1244,10 @@
 	.probe =	virtnet_probe,
 	.remove =	__devexit_p(virtnet_remove),
 	.config_changed = virtnet_config_changed,
+#ifdef CONFIG_PM
+	.freeze =	virtnet_freeze,
+	.restore =	virtnet_restore,
+#endif
 };
 
 static int __init init(void)

diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.c b/drivers/net/wireless/ath/ath9k/ar9003_phy.c
index 2589b38..2b0bfb8 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_phy.c
+++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.c

@@ -46,7 +46,7 @@
  * @chan:
  *
  * This is the function to change channel on single-chip devices, that is
- * all devices after ar9280.
+ * for AR9300 family of chipsets.
  *
  * This function takes the channel value in MHz and sets
  * hardware channel value. Assumes writes have been enabled to analog bus.

diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h
index b30e9fc..171ccf7 100644
--- a/drivers/net/wireless/ath/ath9k/ath9k.h
+++ b/drivers/net/wireless/ath/ath9k/ath9k.h

@@ -679,7 +679,6 @@
 void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw);
 void ath9k_reload_chainmask_settings(struct ath_softc *sc);
 
-void ath_radio_disable(struct ath_softc *sc, struct ieee80211_hw *hw);
 bool ath9k_uses_beacons(int type);
 
 #ifdef CONFIG_ATH9K_PCI

diff --git a/drivers/net/wireless/ath/ath9k/calib.c b/drivers/net/wireless/ath/ath9k/calib.c
index 172e33d..2f4b48e 100644
--- a/drivers/net/wireless/ath/ath9k/calib.c
+++ b/drivers/net/wireless/ath/ath9k/calib.c

@@ -400,6 +400,7 @@
 	ah->noise = ath9k_hw_getchan_noise(ah, chan);
 	return true;
 }
+EXPORT_SYMBOL(ath9k_hw_getnf);
 
 void ath9k_init_nfcal_hist_buffer(struct ath_hw *ah,
 				  struct ath9k_channel *chan)

diff --git a/drivers/net/wireless/ath/ath9k/calib.h b/drivers/net/wireless/ath/ath9k/calib.h
index 05b9dbf..3b33996 100644
--- a/drivers/net/wireless/ath/ath9k/calib.h
+++ b/drivers/net/wireless/ath/ath9k/calib.h

@@ -19,7 +19,6 @@
 
 #include "hw.h"
 
-#define AR_PHY_CCA_FILTERWINDOW_LENGTH_INIT     3
 #define AR_PHY_CCA_FILTERWINDOW_LENGTH          5
 
 #define NUM_NF_READINGS       6

diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index e267c92..4a00806 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c

@@ -1629,7 +1629,6 @@
 
 	if (changed & IEEE80211_CONF_CHANGE_CHANNEL) {
 		struct ieee80211_channel *curchan = hw->conf.channel;
-		struct ath9k_channel old_chan;
 		int pos = curchan->hw_value;
 		int old_pos = -1;
 		unsigned long flags;
@@ -1654,11 +1653,8 @@
 		 * Preserve the current channel values, before updating
 		 * the same channel
 		 */
-		if (old_pos == pos) {
-			memcpy(&old_chan, &sc->sc_ah->channels[pos],
-				sizeof(struct ath9k_channel));
-			ah->curchan = &old_chan;
-		}
+		if (ah->curchan && (old_pos == pos))
+			ath9k_hw_getnf(ah, ah->curchan);
 
 		ath9k_cmn_update_ichannel(&sc->sc_ah->channels[pos],
 					  curchan, conf->channel_type);

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c
index 5a002a2..f7eeee1 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c

@@ -3119,8 +3119,10 @@
 		/* Verify NVRAM bytes */
 		brcmf_dbg(INFO, "Compare NVRAM dl & ul; varsize=%d\n", varsize);
 		nvram_ularray = kmalloc(varsize, GFP_ATOMIC);
-		if (!nvram_ularray)
+		if (!nvram_ularray) {
+			kfree(vbuffer);
 			return -ENOMEM;
+		}
 
 		/* Upload image to verify downloaded contents. */
 		memset(nvram_ularray, 0xaa, varsize);

diff --git a/drivers/net/wireless/brcm80211/brcmsmac/srom.c b/drivers/net/wireless/brcm80211/brcmsmac/srom.c
index 6109215..5637436 100644
--- a/drivers/net/wireless/brcm80211/brcmsmac/srom.c
+++ b/drivers/net/wireless/brcm80211/brcmsmac/srom.c

@@ -764,6 +764,22 @@
 }
 
 /*
+ * The crc check is done on a little-endian array, we need
+ * to switch the bytes around before checking crc (and
+ * then switch it back).
+ */
+static int do_crc_check(u16 *buf, unsigned nwords)
+{
+	u8 crc;
+
+	cpu_to_le16_buf(buf, nwords);
+	crc = crc8(brcms_srom_crc8_table, (void *)buf, nwords << 1, CRC8_INIT_VALUE);
+	le16_to_cpu_buf(buf, nwords);
+
+	return crc == CRC8_GOOD_VALUE(brcms_srom_crc8_table);
+}
+
+/*
  * Read in and validate sprom.
  * Return 0 on success, nonzero on error.
  */
@@ -772,8 +788,6 @@
 {
 	int err = 0;
 	uint i;
-	u8 *bbuf = (u8 *)buf; /* byte buffer */
-	uint nbytes = nwords << 1;
 	struct bcma_device *core;
 	uint sprom_offset;
 
@@ -786,9 +800,9 @@
 		sprom_offset = CHIPCREGOFFS(sromotp);
 	}
 
-	/* read the sprom in bytes */
-	for (i = 0; i < nbytes; i++)
-		bbuf[i] = bcma_read8(core, sprom_offset+i);
+	/* read the sprom */
+	for (i = 0; i < nwords; i++)
+		buf[i] = bcma_read16(core, sprom_offset+i*2);
 
 	if (buf[0] == 0xffff)
 		/*
@@ -798,13 +812,8 @@
 		 */
 		return -ENODATA;
 
-	if (check_crc &&
-	    crc8(brcms_srom_crc8_table, bbuf, nbytes, CRC8_INIT_VALUE) !=
-		 CRC8_GOOD_VALUE(brcms_srom_crc8_table))
+	if (check_crc && !do_crc_check(buf, nwords))
 		err = -EIO;
-	else
-		/* now correct the endianness of the byte array */
-		le16_to_cpu_buf(buf, nwords);
 
 	return err;
 }

diff --git a/drivers/net/wireless/rtlwifi/rtl8192se/fw.c b/drivers/net/wireless/rtlwifi/rtl8192se/fw.c
index 6f91a14..3fda6b1 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192se/fw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192se/fw.c

@@ -196,6 +196,8 @@
 		/* Allocate skb buffer to contain firmware */
 		/* info and tx descriptor info. */
 		skb = dev_alloc_skb(frag_length);
+		if (!skb)
+			return false;
 		skb_reserve(skb, extra_descoffset);
 		seg_ptr = (u8 *)skb_put(skb, (u32)(frag_length -
 					extra_descoffset));
@@ -573,6 +575,8 @@
 
 	len = _rtl92s_get_h2c_cmdlen(MAX_TRANSMIT_BUFFER_SIZE, 1, &cmd_len);
 	skb = dev_alloc_skb(len);
+	if (!skb)
+		return false;
 	cb_desc = (struct rtl_tcb_desc *)(skb->cb);
 	cb_desc->queue_index = TXCMD_QUEUE;
 	cb_desc->cmd_or_init = DESC_PACKET_TYPE_NORMAL;

diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
index d0b597b..0cb64f5 100644
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c

@@ -3404,8 +3404,8 @@
 #endif
 
 #ifdef MODULE
-static const char *irq[PARPORT_PC_MAX_PORTS];
-static const char *dma[PARPORT_PC_MAX_PORTS];
+static char *irq[PARPORT_PC_MAX_PORTS];
+static char *dma[PARPORT_PC_MAX_PORTS];
 
 MODULE_PARM_DESC(io, "Base I/O address (SPP regs)");
 module_param_array(io, int, NULL, 0);

diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c
index e91b8dd..c9b9253 100644
--- a/drivers/regulator/ab8500.c
+++ b/drivers/regulator/ab8500.c

@@ -16,8 +16,8 @@
 #include <linux/module.h>
 #include <linux/err.h>
 #include <linux/platform_device.h>
-#include <linux/mfd/ab8500.h>
 #include <linux/mfd/abx500.h>
+#include <linux/mfd/abx500/ab8500.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
 #include <linux/regulator/ab8500.h>

diff --git a/drivers/rtc/rtc-ab8500.c b/drivers/rtc/rtc-ab8500.c
index a0a9810..4bcf9ca 100644
--- a/drivers/rtc/rtc-ab8500.c
+++ b/drivers/rtc/rtc-ab8500.c

@@ -15,7 +15,7 @@
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
 #include <linux/mfd/abx500.h>
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 #include <linux/delay.h>
 
 #define AB8500_RTC_SOFF_STAT_REG	0x00

diff --git a/drivers/rtc/rtc-max8925.c b/drivers/rtc/rtc-max8925.c
index 4a55293..2d71943 100644
--- a/drivers/rtc/rtc-max8925.c
+++ b/drivers/rtc/rtc-max8925.c

@@ -261,6 +261,8 @@
 	/* XXX - isn't this redundant? */
 	platform_set_drvdata(pdev, info);
 
+	device_init_wakeup(&pdev->dev, 1);
+
 	info->rtc_dev = rtc_device_register("max8925-rtc", &pdev->dev,
 					&max8925_rtc_ops, THIS_MODULE);
 	ret = PTR_ERR(info->rtc_dev);
@@ -290,10 +292,34 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int max8925_rtc_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct max8925_chip *chip = dev_get_drvdata(pdev->dev.parent);
+
+	if (device_may_wakeup(dev))
+		chip->wakeup_flag |= 1 << MAX8925_IRQ_RTC_ALARM0;
+	return 0;
+}
+static int max8925_rtc_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct max8925_chip *chip = dev_get_drvdata(pdev->dev.parent);
+
+	if (device_may_wakeup(dev))
+		chip->wakeup_flag &= ~(1 << MAX8925_IRQ_RTC_ALARM0);
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(max8925_rtc_pm_ops, max8925_rtc_suspend, max8925_rtc_resume);
+
 static struct platform_driver max8925_rtc_driver = {
 	.driver		= {
 		.name	= "max8925-rtc",
 		.owner	= THIS_MODULE,
+		.pm     = &max8925_rtc_pm_ops,
 	},
 	.probe		= max8925_rtc_probe,
 	.remove		= __devexit_p(max8925_rtc_remove),

diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index 8af868b..7bc1955 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c

@@ -198,7 +198,7 @@
 		goto out;
 
 	vq = vring_new_virtqueue(config->num, KVM_S390_VIRTIO_RING_ALIGN,
-				 vdev, (void *) config->address,
+				 vdev, true, (void *) config->address,
 				 kvm_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;

diff --git a/drivers/usb/otg/ab8500-usb.c b/drivers/usb/otg/ab8500-usb.c
index 07ccea9..74fe6e6 100644
--- a/drivers/usb/otg/ab8500-usb.c
+++ b/drivers/usb/otg/ab8500-usb.c

@@ -30,7 +30,7 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/mfd/abx500.h>
-#include <linux/mfd/ab8500.h>
+#include <linux/mfd/abx500/ab8500.h>
 
 #define AB8500_MAIN_WD_CTRL_REG 0x01
 #define AB8500_USB_LINE_STAT_REG 0x80

diff --git a/drivers/video/nvidia/nvidia.c b/drivers/video/nvidia/nvidia.c
index 081dc47..fe13ac5 100644
--- a/drivers/video/nvidia/nvidia.c
+++ b/drivers/video/nvidia/nvidia.c

@@ -81,7 +81,7 @@
 static int bpp __devinitdata = 8;
 static int reverse_i2c __devinitdata;
 #ifdef CONFIG_MTRR
-static int nomtrr __devinitdata = 0;
+static bool nomtrr __devinitdata = false;
 #endif
 #ifdef CONFIG_PMAC_BACKLIGHT
 static int backlight __devinitdata = 1;
@@ -1509,7 +1509,7 @@
 			backlight = simple_strtoul(this_opt+10, NULL, 0);
 #ifdef CONFIG_MTRR
 		} else if (!strncmp(this_opt, "nomtrr", 6)) {
-			nomtrr = 1;
+			nomtrr = true;
 #endif
 		} else if (!strncmp(this_opt, "fpdither:", 9)) {
 			fpdither = simple_strtol(this_opt+9, NULL, 0);
@@ -1599,7 +1599,7 @@
 module_param(reverse_i2c, int, 0);
 MODULE_PARM_DESC(reverse_i2c, "reverse port assignment of the i2c bus");
 #ifdef CONFIG_MTRR
-module_param(nomtrr, bool, 0);
+module_param(nomtrr, bool, false);
 MODULE_PARM_DESC(nomtrr, "Disables MTRR support (0 or 1=disabled) "
 		 "(default=0)");
 #endif

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 94fd738..95aeedf 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c

@@ -1,4 +1,5 @@
-/* Virtio balloon implementation, inspired by Dor Loar and Marcelo
+/*
+ * Virtio balloon implementation, inspired by Dor Laor and Marcelo
  * Tosatti's implementations.
  *
  *  Copyright 2008 Rusty Russell IBM Corporation
@@ -17,7 +18,7 @@
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-//#define DEBUG
+
 #include <linux/virtio.h>
 #include <linux/virtio_balloon.h>
 #include <linux/swap.h>
@@ -87,7 +88,7 @@
 	init_completion(&vb->acked);
 
 	/* We should always be able to add one buffer to an empty queue. */
-	if (virtqueue_add_buf(vq, &sg, 1, 0, vb) < 0)
+	if (virtqueue_add_buf(vq, &sg, 1, 0, vb, GFP_KERNEL) < 0)
 		BUG();
 	virtqueue_kick(vq);
 
@@ -149,7 +150,6 @@
 		vb->num_pages--;
 	}
 
-
 	/*
 	 * Note that if
 	 * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
@@ -220,7 +220,7 @@
 
 	vq = vb->stats_vq;
 	sg_init_one(&sg, vb->stats, sizeof(vb->stats));
-	if (virtqueue_add_buf(vq, &sg, 1, 0, vb) < 0)
+	if (virtqueue_add_buf(vq, &sg, 1, 0, vb, GFP_KERNEL) < 0)
 		BUG();
 	virtqueue_kick(vq);
 }
@@ -275,14 +275,46 @@
 	return 0;
 }
 
-static int virtballoon_probe(struct virtio_device *vdev)
+static int init_vqs(struct virtio_balloon *vb)
 {
-	struct virtio_balloon *vb;
 	struct virtqueue *vqs[3];
 	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request };
 	const char *names[] = { "inflate", "deflate", "stats" };
 	int err, nvqs;
 
+	/*
+	 * We expect two virtqueues: inflate and deflate, and
+	 * optionally stat.
+	 */
+	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
+	err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names);
+	if (err)
+		return err;
+
+	vb->inflate_vq = vqs[0];
+	vb->deflate_vq = vqs[1];
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
+		struct scatterlist sg;
+		vb->stats_vq = vqs[2];
+
+		/*
+		 * Prime this virtqueue with one buffer so the hypervisor can
+		 * use it to signal us later.
+		 */
+		sg_init_one(&sg, vb->stats, sizeof vb->stats);
+		if (virtqueue_add_buf(vb->stats_vq, &sg, 1, 0, vb, GFP_KERNEL)
+		    < 0)
+			BUG();
+		virtqueue_kick(vb->stats_vq);
+	}
+	return 0;
+}
+
+static int virtballoon_probe(struct virtio_device *vdev)
+{
+	struct virtio_balloon *vb;
+	int err;
+
 	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
 	if (!vb) {
 		err = -ENOMEM;
@@ -295,29 +327,10 @@
 	vb->vdev = vdev;
 	vb->need_stats_update = 0;
 
-	/* We expect two virtqueues: inflate and deflate,
-	 * and optionally stat. */
-	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
-	err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
+	err = init_vqs(vb);
 	if (err)
 		goto out_free_vb;
 
-	vb->inflate_vq = vqs[0];
-	vb->deflate_vq = vqs[1];
-	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
-		struct scatterlist sg;
-		vb->stats_vq = vqs[2];
-
-		/*
-		 * Prime this virtqueue with one buffer so the hypervisor can
-		 * use it to signal us later.
-		 */
-		sg_init_one(&sg, vb->stats, sizeof vb->stats);
-		if (virtqueue_add_buf(vb->stats_vq, &sg, 1, 0, vb) < 0)
-			BUG();
-		virtqueue_kick(vb->stats_vq);
-	}
-
 	vb->thread = kthread_run(balloon, vb, "vballoon");
 	if (IS_ERR(vb->thread)) {
 		err = PTR_ERR(vb->thread);
@@ -351,6 +364,48 @@
 	kfree(vb);
 }
 
+#ifdef CONFIG_PM
+static int virtballoon_freeze(struct virtio_device *vdev)
+{
+	/*
+	 * The kthread is already frozen by the PM core before this
+	 * function is called.
+	 */
+
+	/* Ensure we don't get any more requests from the host */
+	vdev->config->reset(vdev);
+	vdev->config->del_vqs(vdev);
+	return 0;
+}
+
+static int virtballoon_thaw(struct virtio_device *vdev)
+{
+	return init_vqs(vdev->priv);
+}
+
+static int virtballoon_restore(struct virtio_device *vdev)
+{
+	struct virtio_balloon *vb = vdev->priv;
+	struct page *page, *page2;
+
+	/* We're starting from a clean slate */
+	vb->num_pages = 0;
+
+	/*
+	 * If a request wasn't complete at the time of freezing, this
+	 * could have been set.
+	 */
+	vb->need_stats_update = 0;
+
+	/* We don't have these pages in the balloon anymore! */
+	list_for_each_entry_safe(page, page2, &vb->pages, lru) {
+		list_del(&page->lru);
+		totalram_pages++;
+	}
+	return init_vqs(vdev->priv);
+}
+#endif
+
 static unsigned int features[] = {
 	VIRTIO_BALLOON_F_MUST_TELL_HOST,
 	VIRTIO_BALLOON_F_STATS_VQ,
@@ -365,6 +420,11 @@
 	.probe =	virtballoon_probe,
 	.remove =	__devexit_p(virtballoon_remove),
 	.config_changed = virtballoon_changed,
+#ifdef CONFIG_PM
+	.freeze	=	virtballoon_freeze,
+	.restore =	virtballoon_restore,
+	.thaw =		virtballoon_thaw,
+#endif
 };
 
 static int __init init(void)

diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 0269717..01d6dc2 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c

@@ -310,8 +310,8 @@
 			vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
 
 	/* Create the vring */
-	vq = vring_new_virtqueue(info->num, VIRTIO_MMIO_VRING_ALIGN,
-				 vdev, info->queue, vm_notify, callback, name);
+	vq = vring_new_virtqueue(info->num, VIRTIO_MMIO_VRING_ALIGN, vdev,
+				 true, info->queue, vm_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto error_new_virtqueue;

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index baabb79..635e1ef 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c

@@ -55,6 +55,10 @@
 	unsigned msix_vectors;
 	/* Vectors allocated, excluding per-vq vectors if any */
 	unsigned msix_used_vectors;
+
+	/* Status saved during hibernate/restore */
+	u8 saved_status;
+
 	/* Whether we have vector per vq */
 	bool per_vq_vectors;
 };
@@ -414,8 +418,8 @@
 		  vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 
 	/* create the vring */
-	vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN,
-				 vdev, info->queue, vp_notify, callback, name);
+	vq = vring_new_virtqueue(info->num, VIRTIO_PCI_VRING_ALIGN, vdev,
+				 true, info->queue, vp_notify, callback, name);
 	if (!vq) {
 		err = -ENOMEM;
 		goto out_activate_queue;
@@ -716,19 +720,114 @@
 }
 
 #ifdef CONFIG_PM
-static int virtio_pci_suspend(struct pci_dev *pci_dev, pm_message_t state)
+static int virtio_pci_suspend(struct device *dev)
 {
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+
 	pci_save_state(pci_dev);
 	pci_set_power_state(pci_dev, PCI_D3hot);
 	return 0;
 }
 
-static int virtio_pci_resume(struct pci_dev *pci_dev)
+static int virtio_pci_resume(struct device *dev)
 {
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+
 	pci_restore_state(pci_dev);
 	pci_set_power_state(pci_dev, PCI_D0);
 	return 0;
 }
+
+static int virtio_pci_freeze(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+	struct virtio_driver *drv;
+	int ret;
+
+	drv = container_of(vp_dev->vdev.dev.driver,
+			   struct virtio_driver, driver);
+
+	ret = 0;
+	vp_dev->saved_status = vp_get_status(&vp_dev->vdev);
+	if (drv && drv->freeze)
+		ret = drv->freeze(&vp_dev->vdev);
+
+	if (!ret)
+		pci_disable_device(pci_dev);
+	return ret;
+}
+
+static int restore_common(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+	int ret;
+
+	ret = pci_enable_device(pci_dev);
+	if (ret)
+		return ret;
+	pci_set_master(pci_dev);
+	vp_finalize_features(&vp_dev->vdev);
+
+	return ret;
+}
+
+static int virtio_pci_thaw(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+	struct virtio_driver *drv;
+	int ret;
+
+	ret = restore_common(dev);
+	if (ret)
+		return ret;
+
+	drv = container_of(vp_dev->vdev.dev.driver,
+			   struct virtio_driver, driver);
+
+	if (drv && drv->thaw)
+		ret = drv->thaw(&vp_dev->vdev);
+	else if (drv && drv->restore)
+		ret = drv->restore(&vp_dev->vdev);
+
+	/* Finally, tell the device we're all set */
+	if (!ret)
+		vp_set_status(&vp_dev->vdev, vp_dev->saved_status);
+
+	return ret;
+}
+
+static int virtio_pci_restore(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+	struct virtio_driver *drv;
+	int ret;
+
+	drv = container_of(vp_dev->vdev.dev.driver,
+			   struct virtio_driver, driver);
+
+	ret = restore_common(dev);
+	if (!ret && drv && drv->restore)
+		ret = drv->restore(&vp_dev->vdev);
+
+	/* Finally, tell the device we're all set */
+	if (!ret)
+		vp_set_status(&vp_dev->vdev, vp_dev->saved_status);
+
+	return ret;
+}
+
+static const struct dev_pm_ops virtio_pci_pm_ops = {
+	.suspend	= virtio_pci_suspend,
+	.resume		= virtio_pci_resume,
+	.freeze		= virtio_pci_freeze,
+	.thaw		= virtio_pci_thaw,
+	.restore	= virtio_pci_restore,
+	.poweroff	= virtio_pci_suspend,
+};
 #endif
 
 static struct pci_driver virtio_pci_driver = {
@@ -737,8 +836,7 @@
 	.probe		= virtio_pci_probe,
 	.remove		= __devexit_p(virtio_pci_remove),
 #ifdef CONFIG_PM
-	.suspend	= virtio_pci_suspend,
-	.resume		= virtio_pci_resume,
+	.driver.pm	= &virtio_pci_pm_ops,
 #endif
 };
 

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index c7a2c20..79e1b29 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c

@@ -22,23 +22,27 @@
 #include <linux/device.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/hrtimer.h>
 
 /* virtio guest is communicating with a virtual "device" that actually runs on
  * a host processor.  Memory barriers are used to control SMP effects. */
 #ifdef CONFIG_SMP
 /* Where possible, use SMP barriers which are more lightweight than mandatory
  * barriers, because mandatory barriers control MMIO effects on accesses
- * through relaxed memory I/O windows (which virtio does not use). */
-#define virtio_mb() smp_mb()
-#define virtio_rmb() smp_rmb()
-#define virtio_wmb() smp_wmb()
+ * through relaxed memory I/O windows (which virtio-pci does not use). */
+#define virtio_mb(vq) \
+	do { if ((vq)->weak_barriers) smp_mb(); else mb(); } while(0)
+#define virtio_rmb(vq) \
+	do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0)
+#define virtio_wmb(vq) \
+	do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0)
 #else
 /* We must force memory ordering even if guest is UP since host could be
  * running on another CPU, but SMP barriers are defined to barrier() in that
  * configuration. So fall back to mandatory barriers instead. */
-#define virtio_mb() mb()
-#define virtio_rmb() rmb()
-#define virtio_wmb() wmb()
+#define virtio_mb(vq) mb()
+#define virtio_rmb(vq) rmb()
+#define virtio_wmb(vq) wmb()
 #endif
 
 #ifdef DEBUG
@@ -77,6 +81,9 @@
 	/* Actual memory layout for this queue */
 	struct vring vring;
 
+	/* Can we use weak barriers? */
+	bool weak_barriers;
+
 	/* Other side has made a mess, don't try any more. */
 	bool broken;
 
@@ -102,6 +109,10 @@
 #ifdef DEBUG
 	/* They're supposed to lock for us. */
 	unsigned int in_use;
+
+	/* Figure out if their kicks are too delayed. */
+	bool last_add_time_valid;
+	ktime_t last_add_time;
 #endif
 
 	/* Tokens for callbacks. */
@@ -160,12 +171,29 @@
 	return head;
 }
 
-int virtqueue_add_buf_gfp(struct virtqueue *_vq,
-			  struct scatterlist sg[],
-			  unsigned int out,
-			  unsigned int in,
-			  void *data,
-			  gfp_t gfp)
+/**
+ * virtqueue_add_buf - expose buffer to other end
+ * @vq: the struct virtqueue we're talking about.
+ * @sg: the description of the buffer(s).
+ * @out_num: the number of sg readable by other side
+ * @in_num: the number of sg which are writable (after readable ones)
+ * @data: the token identifying the buffer.
+ * @gfp: how to do memory allocations (if necessary).
+ *
+ * Caller must ensure we don't call this with other virtqueue operations
+ * at the same time (except where noted).
+ *
+ * Returns remaining capacity of queue or a negative error
+ * (ie. ENOSPC).  Note that it only really makes sense to treat all
+ * positive return values as "available": indirect buffers mean that
+ * we can put an entire sg[] array inside a single queue entry.
+ */
+int virtqueue_add_buf(struct virtqueue *_vq,
+		      struct scatterlist sg[],
+		      unsigned int out,
+		      unsigned int in,
+		      void *data,
+		      gfp_t gfp)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	unsigned int i, avail, uninitialized_var(prev);
@@ -175,6 +203,19 @@
 
 	BUG_ON(data == NULL);
 
+#ifdef DEBUG
+	{
+		ktime_t now = ktime_get();
+
+		/* No kick or get, with .1 second between?  Warn. */
+		if (vq->last_add_time_valid)
+			WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
+					    > 100);
+		vq->last_add_time = now;
+		vq->last_add_time_valid = true;
+	}
+#endif
+
 	/* If the host supports indirect descriptor tables, and we have multiple
 	 * buffers, then go indirect. FIXME: tune this threshold */
 	if (vq->indirect && (out + in) > 1 && vq->num_free) {
@@ -227,40 +268,102 @@
 	vq->data[head] = data;
 
 	/* Put entry in available array (but don't update avail->idx until they
-	 * do sync).  FIXME: avoid modulus here? */
-	avail = (vq->vring.avail->idx + vq->num_added++) % vq->vring.num;
+	 * do sync). */
+	avail = (vq->vring.avail->idx & (vq->vring.num-1));
 	vq->vring.avail->ring[avail] = head;
 
+	/* Descriptors and available array need to be set before we expose the
+	 * new available array entries. */
+	virtio_wmb(vq);
+	vq->vring.avail->idx++;
+	vq->num_added++;
+
+	/* This is very unlikely, but theoretically possible.  Kick
+	 * just in case. */
+	if (unlikely(vq->num_added == (1 << 16) - 1))
+		virtqueue_kick(_vq);
+
 	pr_debug("Added buffer head %i to %p\n", head, vq);
 	END_USE(vq);
 
 	return vq->num_free;
 }
-EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp);
+EXPORT_SYMBOL_GPL(virtqueue_add_buf);
 
-void virtqueue_kick(struct virtqueue *_vq)
+/**
+ * virtqueue_kick_prepare - first half of split virtqueue_kick call.
+ * @vq: the struct virtqueue
+ *
+ * Instead of virtqueue_kick(), you can do:
+ *	if (virtqueue_kick_prepare(vq))
+ *		virtqueue_notify(vq);
+ *
+ * This is sometimes useful because the virtqueue_kick_prepare() needs
+ * to be serialized, but the actual virtqueue_notify() call does not.
+ */
+bool virtqueue_kick_prepare(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	u16 new, old;
+	bool needs_kick;
+
 	START_USE(vq);
 	/* Descriptors and available array need to be set before we expose the
 	 * new available array entries. */
-	virtio_wmb();
+	virtio_wmb(vq);
 
-	old = vq->vring.avail->idx;
-	new = vq->vring.avail->idx = old + vq->num_added;
+	old = vq->vring.avail->idx - vq->num_added;
+	new = vq->vring.avail->idx;
 	vq->num_added = 0;
 
-	/* Need to update avail index before checking if we should notify */
-	virtio_mb();
+#ifdef DEBUG
+	if (vq->last_add_time_valid) {
+		WARN_ON(ktime_to_ms(ktime_sub(ktime_get(),
+					      vq->last_add_time)) > 100);
+	}
+	vq->last_add_time_valid = false;
+#endif
 
-	if (vq->event ?
-	    vring_need_event(vring_avail_event(&vq->vring), new, old) :
-	    !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY))
-		/* Prod other side to tell it about changes. */
-		vq->notify(&vq->vq);
-
+	if (vq->event) {
+		needs_kick = vring_need_event(vring_avail_event(&vq->vring),
+					      new, old);
+	} else {
+		needs_kick = !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY);
+	}
 	END_USE(vq);
+	return needs_kick;
+}
+EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
+
+/**
+ * virtqueue_notify - second half of split virtqueue_kick call.
+ * @vq: the struct virtqueue
+ *
+ * This does not need to be serialized.
+ */
+void virtqueue_notify(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	/* Prod other side to tell it about changes. */
+	vq->notify(_vq);
+}
+EXPORT_SYMBOL_GPL(virtqueue_notify);
+
+/**
+ * virtqueue_kick - update after add_buf
+ * @vq: the struct virtqueue
+ *
+ * After one or more virtqueue_add_buf calls, invoke this to kick
+ * the other side.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ */
+void virtqueue_kick(struct virtqueue *vq)
+{
+	if (virtqueue_kick_prepare(vq))
+		virtqueue_notify(vq);
 }
 EXPORT_SYMBOL_GPL(virtqueue_kick);
 
@@ -294,11 +397,28 @@
 	return vq->last_used_idx != vq->vring.used->idx;
 }
 
+/**
+ * virtqueue_get_buf - get the next used buffer
+ * @vq: the struct virtqueue we're talking about.
+ * @len: the length written into the buffer
+ *
+ * If the driver wrote data into the buffer, @len will be set to the
+ * amount written.  This means you don't need to clear the buffer
+ * beforehand to ensure there's no data leakage in the case of short
+ * writes.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ *
+ * Returns NULL if there are no used buffers, or the "data" token
+ * handed to virtqueue_add_buf().
+ */
 void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	void *ret;
 	unsigned int i;
+	u16 last_used;
 
 	START_USE(vq);
 
@@ -314,10 +434,11 @@
 	}
 
 	/* Only get used array entries after they have been exposed by host. */
-	virtio_rmb();
+	virtio_rmb(vq);
 
-	i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id;
-	*len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len;
+	last_used = (vq->last_used_idx & (vq->vring.num - 1));
+	i = vq->vring.used->ring[last_used].id;
+	*len = vq->vring.used->ring[last_used].len;
 
 	if (unlikely(i >= vq->vring.num)) {
 		BAD_RING(vq, "id %u out of range\n", i);
@@ -337,14 +458,27 @@
 	 * the read in the next get_buf call. */
 	if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
 		vring_used_event(&vq->vring) = vq->last_used_idx;
-		virtio_mb();
+		virtio_mb(vq);
 	}
 
+#ifdef DEBUG
+	vq->last_add_time_valid = false;
+#endif
+
 	END_USE(vq);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
 
+/**
+ * virtqueue_disable_cb - disable callbacks
+ * @vq: the struct virtqueue we're talking about.
+ *
+ * Note that this is not necessarily synchronous, hence unreliable and only
+ * useful as an optimization.
+ *
+ * Unlike other operations, this need not be serialized.
+ */
 void virtqueue_disable_cb(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -353,6 +487,17 @@
 }
 EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
 
+/**
+ * virtqueue_enable_cb - restart callbacks after disable_cb.
+ * @vq: the struct virtqueue we're talking about.
+ *
+ * This re-enables callbacks; it returns "false" if there are pending
+ * buffers in the queue, to detect a possible race between the driver
+ * checking for more work, and enabling callbacks.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ */
 bool virtqueue_enable_cb(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -366,7 +511,7 @@
 	 * entry. Always do both to keep code simple. */
 	vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
 	vring_used_event(&vq->vring) = vq->last_used_idx;
-	virtio_mb();
+	virtio_mb(vq);
 	if (unlikely(more_used(vq))) {
 		END_USE(vq);
 		return false;
@@ -377,6 +522,19 @@
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
 
+/**
+ * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
+ * @vq: the struct virtqueue we're talking about.
+ *
+ * This re-enables callbacks but hints to the other side to delay
+ * interrupts until most of the available buffers have been processed;
+ * it returns "false" if there are many pending buffers in the queue,
+ * to detect a possible race between the driver checking for more work,
+ * and enabling callbacks.
+ *
+ * Caller must ensure we don't call this with other virtqueue
+ * operations at the same time (except where noted).
+ */
 bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -393,7 +551,7 @@
 	/* TODO: tune this threshold */
 	bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4;
 	vring_used_event(&vq->vring) = vq->last_used_idx + bufs;
-	virtio_mb();
+	virtio_mb(vq);
 	if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) {
 		END_USE(vq);
 		return false;
@@ -404,6 +562,14 @@
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
 
+/**
+ * virtqueue_detach_unused_buf - detach first unused buffer
+ * @vq: the struct virtqueue we're talking about.
+ *
+ * Returns NULL or the "data" token handed to virtqueue_add_buf().
+ * This is not valid on an active queue; it is useful only for device
+ * shutdown.
+ */
 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -453,6 +619,7 @@
 struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      unsigned int vring_align,
 				      struct virtio_device *vdev,
+				      bool weak_barriers,
 				      void *pages,
 				      void (*notify)(struct virtqueue *),
 				      void (*callback)(struct virtqueue *),
@@ -476,12 +643,14 @@
 	vq->vq.vdev = vdev;
 	vq->vq.name = name;
 	vq->notify = notify;
+	vq->weak_barriers = weak_barriers;
 	vq->broken = false;
 	vq->last_used_idx = 0;
 	vq->num_added = 0;
 	list_add_tail(&vq->vq.list, &vdev->vqs);
 #ifdef DEBUG
 	vq->in_use = false;
+	vq->last_add_time_valid = false;
 #endif
 
 	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
@@ -530,7 +699,13 @@
 }
 EXPORT_SYMBOL_GPL(vring_transport_features);
 
-/* return the size of the vring within the virtqueue */
+/**
+ * virtqueue_get_vring_size - return the size of the virtqueue's vring
+ * @vq: the struct virtqueue containing the vring of interest.
+ *
+ * Returns the size of the vring.  This is mainly used for boasting to
+ * userspace.  Unlike other operations, this need not be serialized.
+ */
 unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
 {
 

diff --git a/fs/aio.c b/fs/aio.c
index 78c514c..969beb0 100644
--- a/fs/aio.c
+++ b/fs/aio.c

@@ -476,14 +476,21 @@
 	batch->count = total;
 }
 
-static void kiocb_batch_free(struct kiocb_batch *batch)
+static void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch)
 {
 	struct kiocb *req, *n;
 
+	if (list_empty(&batch->head))
+		return;
+
+	spin_lock_irq(&ctx->ctx_lock);
 	list_for_each_entry_safe(req, n, &batch->head, ki_batch) {
 		list_del(&req->ki_batch);
+		list_del(&req->ki_list);
 		kmem_cache_free(kiocb_cachep, req);
+		ctx->reqs_active--;
 	}
+	spin_unlock_irq(&ctx->ctx_lock);
 }
 
 /*
@@ -1742,7 +1749,7 @@
 	}
 	blk_finish_plug(&plug);
 
-	kiocb_batch_free(&batch);
+	kiocb_batch_free(ctx, &batch);
 	put_ioctx(ctx);
 	return i ? i : ret;
 }

diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 9ef5b29..da8876d 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c

@@ -76,7 +76,7 @@
 		data += wr;
 		bytes -= wr;
 	}
-	mutex_lock(&sbi->pipe_mutex);
+	mutex_unlock(&sbi->pipe_mutex);
 
 	set_fs(fs);
 

diff --git a/fs/block_dev.c b/fs/block_dev.c
index afe74dd..0e575d1 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c

@@ -1139,6 +1139,7 @@
 	mutex_lock_nested(&bdev->bd_mutex, for_part);
 	if (!bdev->bd_openers) {
 		bdev->bd_disk = disk;
+		bdev->bd_queue = disk->queue;
 		bdev->bd_contains = bdev;
 		if (!partno) {
 			struct backing_dev_info *bdi;
@@ -1159,6 +1160,7 @@
 					disk_put_part(bdev->bd_part);
 					bdev->bd_part = NULL;
 					bdev->bd_disk = NULL;
+					bdev->bd_queue = NULL;
 					mutex_unlock(&bdev->bd_mutex);
 					disk_unblock_events(disk);
 					put_disk(disk);
@@ -1232,6 +1234,7 @@
 	disk_put_part(bdev->bd_part);
 	bdev->bd_disk = NULL;
 	bdev->bd_part = NULL;
+	bdev->bd_queue = NULL;
 	bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info);
 	if (bdev != bdev->bd_contains)
 		__blkdev_put(bdev->bd_contains, mode, 1);

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f99a099..d852566 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c

@@ -872,7 +872,8 @@
 
 #ifdef CONFIG_MIGRATION
 static int btree_migratepage(struct address_space *mapping,
-			struct page *newpage, struct page *page)
+			struct page *newpage, struct page *page,
+			enum migrate_mode mode)
 {
 	/*
 	 * we can't safely write a btree page from here,
@@ -887,7 +888,7 @@
 	if (page_has_private(page) &&
 	    !try_to_release_page(page, GFP_KERNEL))
 		return -EAGAIN;
-	return migrate_page(mapping, newpage, page);
+	return migrate_page(mapping, newpage, page, mode);
 }
 #endif
 

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 74fd747..618246b 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c

@@ -973,7 +973,7 @@
 
 	spin_lock(&dentry->d_lock);
 	di = ceph_dentry(dentry);
-	if (di && di->lease_session) {
+	if (di->lease_session) {
 		s = di->lease_session;
 		spin_lock(&s->s_cap_lock);
 		gen = s->s_cap_gen;
@@ -1072,13 +1072,11 @@
 	struct ceph_dentry_info *di = ceph_dentry(dentry);
 
 	dout("d_release %p\n", dentry);
-	if (di) {
-		ceph_dentry_lru_del(dentry);
-		if (di->lease_session)
-			ceph_put_mds_session(di->lease_session);
-		kmem_cache_free(ceph_dentry_cachep, di);
-		dentry->d_fsdata = NULL;
-	}
+	ceph_dentry_lru_del(dentry);
+	if (di->lease_session)
+		ceph_put_mds_session(di->lease_session);
+	kmem_cache_free(ceph_dentry_cachep, di);
+	dentry->d_fsdata = NULL;
 }
 
 static int ceph_snapdir_d_revalidate(struct dentry *dentry,
@@ -1096,17 +1094,36 @@
  */
 void ceph_dir_set_complete(struct inode *inode)
 {
-	/* not yet implemented */
+	struct dentry *dentry = d_find_any_alias(inode);
+	
+	if (dentry && ceph_dentry(dentry) &&
+	    ceph_test_mount_opt(ceph_sb_to_client(dentry->d_sb), DCACHE)) {
+		dout(" marking %p (%p) complete\n", inode, dentry);
+		set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
+	}
+	dput(dentry);
 }
 
 void ceph_dir_clear_complete(struct inode *inode)
 {
-	/* not yet implemented */
+	struct dentry *dentry = d_find_any_alias(inode);
+
+	if (dentry && ceph_dentry(dentry)) {
+		dout(" marking %p (%p) complete\n", inode, dentry);
+		set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
+	}
+	dput(dentry);
 }
 
 bool ceph_dir_test_complete(struct inode *inode)
 {
-	/* not yet implemented */
+	struct dentry *dentry = d_find_any_alias(inode);
+
+	if (dentry && ceph_dentry(dentry)) {
+		dout(" marking %p (%p) NOT complete\n", inode, dentry);
+		clear_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
+	}
+	dput(dentry);
 	return false;
 }
 
@@ -1220,6 +1237,7 @@
 	do {
 		ceph_mdsc_get_request(req);
 		spin_unlock(&ci->i_unsafe_lock);
+
 		dout("dir_fsync %p wait on tid %llu (until %llu)\n",
 		     inode, req->r_tid, last_tid);
 		if (req->r_timeout) {
@@ -1232,9 +1250,9 @@
 		} else {
 			wait_for_completion(&req->r_safe_completion);
 		}
-		spin_lock(&ci->i_unsafe_lock);
 		ceph_mdsc_put_request(req);
 
+		spin_lock(&ci->i_unsafe_lock);
 		if (ret || list_empty(head))
 			break;
 		req = list_entry(head->next,
@@ -1259,13 +1277,11 @@
 
 	dout("dentry_lru_add %p %p '%.*s'\n", di, dn,
 	     dn->d_name.len, dn->d_name.name);
-	if (di) {
-		mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
-		spin_lock(&mdsc->dentry_lru_lock);
-		list_add_tail(&di->lru, &mdsc->dentry_lru);
-		mdsc->num_dentry++;
-		spin_unlock(&mdsc->dentry_lru_lock);
-	}
+	mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
+	spin_lock(&mdsc->dentry_lru_lock);
+	list_add_tail(&di->lru, &mdsc->dentry_lru);
+	mdsc->num_dentry++;
+	spin_unlock(&mdsc->dentry_lru_lock);
 }
 
 void ceph_dentry_lru_touch(struct dentry *dn)
@@ -1275,12 +1291,10 @@
 
 	dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn,
 	     dn->d_name.len, dn->d_name.name, di->offset);
-	if (di) {
-		mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
-		spin_lock(&mdsc->dentry_lru_lock);
-		list_move_tail(&di->lru, &mdsc->dentry_lru);
-		spin_unlock(&mdsc->dentry_lru_lock);
-	}
+	mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
+	spin_lock(&mdsc->dentry_lru_lock);
+	list_move_tail(&di->lru, &mdsc->dentry_lru);
+	spin_unlock(&mdsc->dentry_lru_lock);
 }
 
 void ceph_dentry_lru_del(struct dentry *dn)
@@ -1290,13 +1304,11 @@
 
 	dout("dentry_lru_del %p %p '%.*s'\n", di, dn,
 	     dn->d_name.len, dn->d_name.name);
-	if (di) {
-		mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
-		spin_lock(&mdsc->dentry_lru_lock);
-		list_del_init(&di->lru);
-		mdsc->num_dentry--;
-		spin_unlock(&mdsc->dentry_lru_lock);
-	}
+	mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
+	spin_lock(&mdsc->dentry_lru_lock);
+	list_del_init(&di->lru);
+	mdsc->num_dentry--;
+	spin_unlock(&mdsc->dentry_lru_lock);
 }
 
 /*

diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 9fbcdec..fbb2a64 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c

@@ -56,9 +56,7 @@
 		return -EINVAL;
 
 	spin_lock(&dentry->d_lock);
-	parent = dget(dentry->d_parent);
-	spin_unlock(&dentry->d_lock);
-
+	parent = dentry->d_parent;
 	if (*max_len >= connected_handle_length) {
 		dout("encode_fh %p connectable\n", dentry);
 		cfh->ino = ceph_ino(dentry->d_inode);
@@ -81,7 +79,7 @@
 		*max_len = handle_length;
 		type = 255;
 	}
-	dput(parent);
+	spin_unlock(&dentry->d_lock);
 	return type;
 }
 

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 25283e7..2c48937 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c

@@ -850,11 +850,12 @@
 {
 	struct dentry *dir = dn->d_parent;
 	struct inode *inode = dir->d_inode;
-	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_inode_info *ci;
 	struct ceph_dentry_info *di;
 
 	BUG_ON(!inode);
 
+	ci = ceph_inode(inode);
 	di = ceph_dentry(dn);
 
 	spin_lock(&ci->i_ceph_lock);

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 6203d80..23ab6a3 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c

@@ -2772,7 +2772,7 @@
 	di = ceph_dentry(dentry);
 	switch (h->action) {
 	case CEPH_MDS_LEASE_REVOKE:
-		if (di && di->lease_session == session) {
+		if (di->lease_session == session) {
 			if (ceph_seq_cmp(di->lease_seq, seq) > 0)
 				h->seq = cpu_to_le32(di->lease_seq);
 			__ceph_mdsc_drop_dentry_lease(dentry);
@@ -2781,7 +2781,7 @@
 		break;
 
 	case CEPH_MDS_LEASE_RENEW:
-		if (di && di->lease_session == session &&
+		if (di->lease_session == session &&
 		    di->lease_gen == session->s_cap_gen &&
 		    di->lease_renew_from &&
 		    di->lease_renew_after == 0) {

diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 48f61a1..00de2c9 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c

@@ -131,6 +131,8 @@
 	Opt_rbytes,
 	Opt_norbytes,
 	Opt_noasyncreaddir,
+	Opt_dcache,
+	Opt_nodcache,
 	Opt_ino32,
 };
 
@@ -152,6 +154,8 @@
 	{Opt_rbytes, "rbytes"},
 	{Opt_norbytes, "norbytes"},
 	{Opt_noasyncreaddir, "noasyncreaddir"},
+	{Opt_dcache, "dcache"},
+	{Opt_nodcache, "nodcache"},
 	{Opt_ino32, "ino32"},
 	{-1, NULL}
 };
@@ -231,6 +235,12 @@
 	case Opt_noasyncreaddir:
 		fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
 		break;
+	case Opt_dcache:
+		fsopt->flags |= CEPH_MOUNT_OPT_DCACHE;
+		break;
+	case Opt_nodcache:
+		fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE;
+		break;
 	case Opt_ino32:
 		fsopt->flags |= CEPH_MOUNT_OPT_INO32;
 		break;
@@ -377,6 +387,10 @@
 		seq_puts(m, ",norbytes");
 	if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
 		seq_puts(m, ",noasyncreaddir");
+	if (fsopt->flags & CEPH_MOUNT_OPT_DCACHE)
+		seq_puts(m, ",dcache");
+	else
+		seq_puts(m, ",nodcache");
 
 	if (fsopt->wsize)
 		seq_printf(m, ",wsize=%d", fsopt->wsize);
@@ -647,10 +661,10 @@
 				root = ERR_PTR(-ENOMEM);
 				goto out;
 			}
-			ceph_init_dentry(root);
 		} else {
 			root = d_obtain_alias(inode);
 		}
+		ceph_init_dentry(root);
 		dout("open_root_inode success, root dentry is %p\n", root);
 	} else {
 		root = ERR_PTR(err);

diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index cb3652b..1421f3d 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h

@@ -28,6 +28,7 @@
 #define CEPH_MOUNT_OPT_RBYTES          (1<<5) /* dir st_bytes = rbytes */
 #define CEPH_MOUNT_OPT_NOASYNCREADDIR  (1<<7) /* no dcache readdir */
 #define CEPH_MOUNT_OPT_INO32           (1<<8) /* 32 bit inos */
+#define CEPH_MOUNT_OPT_DCACHE          (1<<9) /* use dcache for readdir etc */
 
 #define CEPH_MOUNT_OPT_DEFAULT    (CEPH_MOUNT_OPT_RBYTES)
 

diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index a5e36e4..857214a 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c

@@ -818,6 +818,7 @@
 	struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode);
 	int issued;
 	int err;
+	int required_blob_size;
 	int dirty;
 
 	if (ceph_snap(inode) != CEPH_NOSNAP)
@@ -833,14 +834,34 @@
 			return -EOPNOTSUPP;
 	}
 
+	err = -ENOMEM;
 	spin_lock(&ci->i_ceph_lock);
 	__build_xattrs(inode);
+retry:
 	issued = __ceph_caps_issued(ci, NULL);
 	dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
 
 	if (!(issued & CEPH_CAP_XATTR_EXCL))
 		goto do_sync;
 
+	required_blob_size = __get_required_blob_size(ci, 0, 0);
+
+	if (!ci->i_xattrs.prealloc_blob ||
+	    required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
+		struct ceph_buffer *blob;
+
+		spin_unlock(&ci->i_ceph_lock);
+		dout(" preaallocating new blob size=%d\n", required_blob_size);
+		blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
+		if (!blob)
+			goto out;
+		spin_lock(&ci->i_ceph_lock);
+		if (ci->i_xattrs.prealloc_blob)
+			ceph_buffer_put(ci->i_xattrs.prealloc_blob);
+		ci->i_xattrs.prealloc_blob = blob;
+		goto retry;
+	}
+
 	err = __remove_xattr_by_name(ceph_inode(inode), name);
 	dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
 	ci->i_xattrs.dirty = true;
@@ -853,6 +874,7 @@
 do_sync:
 	spin_unlock(&ci->i_ceph_lock);
 	err = ceph_send_removexattr(dentry, name);
+out:
 	return err;
 }
 

diff --git a/fs/dcache.c b/fs/dcache.c
index 616fedf..16a53cc 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c

@@ -1475,7 +1475,14 @@
 	return alias;
 }
 
-static struct dentry * d_find_any_alias(struct inode *inode)
+/**
+ * d_find_any_alias - find any alias for a given inode
+ * @inode: inode to find an alias for
+ *
+ * If any aliases exist for the given inode, take and return a
+ * reference for one of them.  If no aliases exist, return %NULL.
+ */
+struct dentry *d_find_any_alias(struct inode *inode)
 {
 	struct dentry *de;
 
@@ -1484,7 +1491,7 @@
 	spin_unlock(&inode->i_lock);
 	return de;
 }
-
+EXPORT_SYMBOL(d_find_any_alias);
 
 /**
  * d_obtain_alias - find or allocate a dentry for a given inode

diff --git a/fs/direct-io.c b/fs/direct-io.c
index d740ab6..4a588db 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c

@@ -36,6 +36,7 @@
 #include <linux/rwsem.h>
 #include <linux/uio.h>
 #include <linux/atomic.h>
+#include <linux/prefetch.h>
 
 /*
  * How many user pages to map in one call to get_user_pages().  This determines
@@ -580,9 +581,8 @@
 {
 	int ret;
 	sector_t fs_startblk;	/* Into file, in filesystem-sized blocks */
+	sector_t fs_endblk;	/* Into file, in filesystem-sized blocks */
 	unsigned long fs_count;	/* Number of filesystem-sized blocks */
-	unsigned long dio_count;/* Number of dio_block-sized blocks */
-	unsigned long blkmask;
 	int create;
 
 	/*
@@ -593,11 +593,9 @@
 	if (ret == 0) {
 		BUG_ON(sdio->block_in_file >= sdio->final_block_in_request);
 		fs_startblk = sdio->block_in_file >> sdio->blkfactor;
-		dio_count = sdio->final_block_in_request - sdio->block_in_file;
-		fs_count = dio_count >> sdio->blkfactor;
-		blkmask = (1 << sdio->blkfactor) - 1;
-		if (dio_count & blkmask)	
-			fs_count++;
+		fs_endblk = (sdio->final_block_in_request - 1) >>
+					sdio->blkfactor;
+		fs_count = fs_endblk - fs_startblk + 1;
 
 		map_bh->b_state = 0;
 		map_bh->b_size = fs_count << dio->inode->i_blkbits;
@@ -1090,8 +1088,8 @@
  * individual fields and will generate much worse code. This is important
  * for the whole file.
  */
-ssize_t
-__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
+static inline ssize_t
+do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	struct block_device *bdev, const struct iovec *iov, loff_t offset, 
 	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
 	dio_submit_t submit_io,	int flags)
@@ -1100,7 +1098,6 @@
 	size_t size;
 	unsigned long addr;
 	unsigned blkbits = inode->i_blkbits;
-	unsigned bdev_blkbits = 0;
 	unsigned blocksize_mask = (1 << blkbits) - 1;
 	ssize_t retval = -EINVAL;
 	loff_t end = offset;
@@ -1113,12 +1110,14 @@
 	if (rw & WRITE)
 		rw = WRITE_ODIRECT;
 
-	if (bdev)
-		bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev));
+	/*
+	 * Avoid references to bdev if not absolutely needed to give
+	 * the early prefetch in the caller enough time.
+	 */
 
 	if (offset & blocksize_mask) {
 		if (bdev)
-			 blkbits = bdev_blkbits;
+			blkbits = blksize_bits(bdev_logical_block_size(bdev));
 		blocksize_mask = (1 << blkbits) - 1;
 		if (offset & blocksize_mask)
 			goto out;
@@ -1129,11 +1128,13 @@
 		addr = (unsigned long)iov[seg].iov_base;
 		size = iov[seg].iov_len;
 		end += size;
-		if ((addr & blocksize_mask) || (size & blocksize_mask))  {
+		if (unlikely((addr & blocksize_mask) ||
+			     (size & blocksize_mask))) {
 			if (bdev)
-				 blkbits = bdev_blkbits;
+				blkbits = blksize_bits(
+					 bdev_logical_block_size(bdev));
 			blocksize_mask = (1 << blkbits) - 1;
-			if ((addr & blocksize_mask) || (size & blocksize_mask))  
+			if ((addr & blocksize_mask) || (size & blocksize_mask))
 				goto out;
 		}
 	}
@@ -1316,6 +1317,30 @@
 out:
 	return retval;
 }
+
+ssize_t
+__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
+	struct block_device *bdev, const struct iovec *iov, loff_t offset,
+	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
+	dio_submit_t submit_io,	int flags)
+{
+	/*
+	 * The block device state is needed in the end to finally
+	 * submit everything.  Since it's likely to be cache cold
+	 * prefetch it here as first thing to hide some of the
+	 * latency.
+	 *
+	 * Attempt to prefetch the pieces we likely need later.
+	 */
+	prefetch(&bdev->bd_disk->part_tbl);
+	prefetch(bdev->bd_queue);
+	prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES);
+
+	return do_blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
+				     nr_segs, get_block, end_io,
+				     submit_io, flags);
+}
+
 EXPORT_SYMBOL(__blockdev_direct_IO);
 
 static __init int dio_init(void)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 828e750..aabdfc3 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c

@@ -197,6 +197,12 @@
 
 	/* The user that created the eventpoll descriptor */
 	struct user_struct *user;
+
+	struct file *file;
+
+	/* used to optimize loop detection check */
+	int visited;
+	struct list_head visited_list_link;
 };
 
 /* Wait structure used by the poll hooks */
@@ -255,6 +261,15 @@
 /* Slab cache used to allocate "struct eppoll_entry" */
 static struct kmem_cache *pwq_cache __read_mostly;
 
+/* Visited nodes during ep_loop_check(), so we can unset them when we finish */
+static LIST_HEAD(visited_list);
+
+/*
+ * List of files with newly added links, where we may need to limit the number
+ * of emanating paths. Protected by the epmutex.
+ */
+static LIST_HEAD(tfile_check_list);
+
 #ifdef CONFIG_SYSCTL
 
 #include <linux/sysctl.h>
@@ -276,6 +291,12 @@
 };
 #endif /* CONFIG_SYSCTL */
 
+static const struct file_operations eventpoll_fops;
+
+static inline int is_file_epoll(struct file *f)
+{
+	return f->f_op == &eventpoll_fops;
+}
 
 /* Setup the structure that is used as key for the RB tree */
 static inline void ep_set_ffd(struct epoll_filefd *ffd,
@@ -711,12 +732,6 @@
 	.llseek		= noop_llseek,
 };
 
-/* Fast test to see if the file is an eventpoll file */
-static inline int is_file_epoll(struct file *f)
-{
-	return f->f_op == &eventpoll_fops;
-}
-
 /*
  * This is called from eventpoll_release() to unlink files from the eventpoll
  * interface. We need to have this facility to cleanup correctly files that are
@@ -926,6 +941,99 @@
 	rb_insert_color(&epi->rbn, &ep->rbr);
 }
 
+
+
+#define PATH_ARR_SIZE 5
+/*
+ * These are the number paths of length 1 to 5, that we are allowing to emanate
+ * from a single file of interest. For example, we allow 1000 paths of length
+ * 1, to emanate from each file of interest. This essentially represents the
+ * potential wakeup paths, which need to be limited in order to avoid massive
+ * uncontrolled wakeup storms. The common use case should be a single ep which
+ * is connected to n file sources. In this case each file source has 1 path
+ * of length 1. Thus, the numbers below should be more than sufficient. These
+ * path limits are enforced during an EPOLL_CTL_ADD operation, since a modify
+ * and delete can't add additional paths. Protected by the epmutex.
+ */
+static const int path_limits[PATH_ARR_SIZE] = { 1000, 500, 100, 50, 10 };
+static int path_count[PATH_ARR_SIZE];
+
+static int path_count_inc(int nests)
+{
+	if (++path_count[nests] > path_limits[nests])
+		return -1;
+	return 0;
+}
+
+static void path_count_init(void)
+{
+	int i;
+
+	for (i = 0; i < PATH_ARR_SIZE; i++)
+		path_count[i] = 0;
+}
+
+static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
+{
+	int error = 0;
+	struct file *file = priv;
+	struct file *child_file;
+	struct epitem *epi;
+
+	list_for_each_entry(epi, &file->f_ep_links, fllink) {
+		child_file = epi->ep->file;
+		if (is_file_epoll(child_file)) {
+			if (list_empty(&child_file->f_ep_links)) {
+				if (path_count_inc(call_nests)) {
+					error = -1;
+					break;
+				}
+			} else {
+				error = ep_call_nested(&poll_loop_ncalls,
+							EP_MAX_NESTS,
+							reverse_path_check_proc,
+							child_file, child_file,
+							current);
+			}
+			if (error != 0)
+				break;
+		} else {
+			printk(KERN_ERR "reverse_path_check_proc: "
+				"file is not an ep!\n");
+		}
+	}
+	return error;
+}
+
+/**
+ * reverse_path_check - The tfile_check_list is list of file *, which have
+ *                      links that are proposed to be newly added. We need to
+ *                      make sure that those added links don't add too many
+ *                      paths such that we will spend all our time waking up
+ *                      eventpoll objects.
+ *
+ * Returns: Returns zero if the proposed links don't create too many paths,
+ *	    -1 otherwise.
+ */
+static int reverse_path_check(void)
+{
+	int length = 0;
+	int error = 0;
+	struct file *current_file;
+
+	/* let's call this for all tfiles */
+	list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) {
+		length++;
+		path_count_init();
+		error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+					reverse_path_check_proc, current_file,
+					current_file, current);
+		if (error)
+			break;
+	}
+	return error;
+}
+
 /*
  * Must be called with "mtx" held.
  */
@@ -987,6 +1095,11 @@
 	 */
 	ep_rbtree_insert(ep, epi);
 
+	/* now check if we've created too many backpaths */
+	error = -EINVAL;
+	if (reverse_path_check())
+		goto error_remove_epi;
+
 	/* We have to drop the new item inside our item list to keep track of it */
 	spin_lock_irqsave(&ep->lock, flags);
 
@@ -1011,6 +1124,14 @@
 
 	return 0;
 
+error_remove_epi:
+	spin_lock(&tfile->f_lock);
+	if (ep_is_linked(&epi->fllink))
+		list_del_init(&epi->fllink);
+	spin_unlock(&tfile->f_lock);
+
+	rb_erase(&epi->rbn, &ep->rbr);
+
 error_unregister:
 	ep_unregister_pollwait(ep, epi);
 
@@ -1275,18 +1396,36 @@
 	int error = 0;
 	struct file *file = priv;
 	struct eventpoll *ep = file->private_data;
+	struct eventpoll *ep_tovisit;
 	struct rb_node *rbp;
 	struct epitem *epi;
 
 	mutex_lock_nested(&ep->mtx, call_nests + 1);
+	ep->visited = 1;
+	list_add(&ep->visited_list_link, &visited_list);
 	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
 		epi = rb_entry(rbp, struct epitem, rbn);
 		if (unlikely(is_file_epoll(epi->ffd.file))) {
+			ep_tovisit = epi->ffd.file->private_data;
+			if (ep_tovisit->visited)
+				continue;
 			error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
-					       ep_loop_check_proc, epi->ffd.file,
-					       epi->ffd.file->private_data, current);
+					ep_loop_check_proc, epi->ffd.file,
+					ep_tovisit, current);
 			if (error != 0)
 				break;
+		} else {
+			/*
+			 * If we've reached a file that is not associated with
+			 * an ep, then we need to check if the newly added
+			 * links are going to add too many wakeup paths. We do
+			 * this by adding it to the tfile_check_list, if it's
+			 * not already there, and calling reverse_path_check()
+			 * during ep_insert().
+			 */
+			if (list_empty(&epi->ffd.file->f_tfile_llink))
+				list_add(&epi->ffd.file->f_tfile_llink,
+					 &tfile_check_list);
 		}
 	}
 	mutex_unlock(&ep->mtx);
@@ -1307,8 +1446,31 @@
  */
 static int ep_loop_check(struct eventpoll *ep, struct file *file)
 {
-	return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+	int ret;
+	struct eventpoll *ep_cur, *ep_next;
+
+	ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
 			      ep_loop_check_proc, file, ep, current);
+	/* clear visited list */
+	list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
+							visited_list_link) {
+		ep_cur->visited = 0;
+		list_del(&ep_cur->visited_list_link);
+	}
+	return ret;
+}
+
+static void clear_tfile_check_list(void)
+{
+	struct file *file;
+
+	/* first clear the tfile_check_list */
+	while (!list_empty(&tfile_check_list)) {
+		file = list_first_entry(&tfile_check_list, struct file,
+					f_tfile_llink);
+		list_del_init(&file->f_tfile_llink);
+	}
+	INIT_LIST_HEAD(&tfile_check_list);
 }
 
 /*
@@ -1316,8 +1478,9 @@
  */
 SYSCALL_DEFINE1(epoll_create1, int, flags)
 {
-	int error;
+	int error, fd;
 	struct eventpoll *ep = NULL;
+	struct file *file;
 
 	/* Check the EPOLL_* constant for consistency.  */
 	BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
@@ -1334,11 +1497,25 @@
 	 * Creates all the items needed to setup an eventpoll file. That is,
 	 * a file structure and a free file descriptor.
 	 */
-	error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
+	fd = get_unused_fd_flags(O_RDWR | (flags & O_CLOEXEC));
+	if (fd < 0) {
+		error = fd;
+		goto out_free_ep;
+	}
+	file = anon_inode_getfile("[eventpoll]", &eventpoll_fops, ep,
 				 O_RDWR | (flags & O_CLOEXEC));
-	if (error < 0)
-		ep_free(ep);
+	if (IS_ERR(file)) {
+		error = PTR_ERR(file);
+		goto out_free_fd;
+	}
+	fd_install(fd, file);
+	ep->file = file;
+	return fd;
 
+out_free_fd:
+	put_unused_fd(fd);
+out_free_ep:
+	ep_free(ep);
 	return error;
 }
 
@@ -1404,21 +1581,27 @@
 	/*
 	 * When we insert an epoll file descriptor, inside another epoll file
 	 * descriptor, there is the change of creating closed loops, which are
-	 * better be handled here, than in more critical paths.
+	 * better be handled here, than in more critical paths. While we are
+	 * checking for loops we also determine the list of files reachable
+	 * and hang them on the tfile_check_list, so we can check that we
+	 * haven't created too many possible wakeup paths.
 	 *
-	 * We hold epmutex across the loop check and the insert in this case, in
-	 * order to prevent two separate inserts from racing and each doing the
-	 * insert "at the same time" such that ep_loop_check passes on both
-	 * before either one does the insert, thereby creating a cycle.
+	 * We need to hold the epmutex across both ep_insert and ep_remove
+	 * b/c we want to make sure we are looking at a coherent view of
+	 * epoll network.
 	 */
-	if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) {
+	if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) {
 		mutex_lock(&epmutex);
 		did_lock_epmutex = 1;
-		error = -ELOOP;
-		if (ep_loop_check(ep, tfile) != 0)
-			goto error_tgt_fput;
 	}
-
+	if (op == EPOLL_CTL_ADD) {
+		if (is_file_epoll(tfile)) {
+			error = -ELOOP;
+			if (ep_loop_check(ep, tfile) != 0)
+				goto error_tgt_fput;
+		} else
+			list_add(&tfile->f_tfile_llink, &tfile_check_list);
+	}
 
 	mutex_lock_nested(&ep->mtx, 0);
 
@@ -1437,6 +1620,7 @@
 			error = ep_insert(ep, &epds, tfile, fd);
 		} else
 			error = -EEXIST;
+		clear_tfile_check_list();
 		break;
 	case EPOLL_CTL_DEL:
 		if (epi)
@@ -1455,7 +1639,7 @@
 	mutex_unlock(&ep->mtx);
 
 error_tgt_fput:
-	if (unlikely(did_lock_epmutex))
+	if (did_lock_epmutex)
 		mutex_unlock(&epmutex);
 
 	fput(tfile);

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 2aaf3ea..5f3368a 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c

@@ -1378,7 +1378,59 @@
 	down_read(&fc->killsb);
 	err = -ENOENT;
 	if (fc->sb)
-		err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name);
+		err = fuse_reverse_inval_entry(fc->sb, outarg.parent, 0, &name);
+	up_read(&fc->killsb);
+	kfree(buf);
+	return err;
+
+err:
+	kfree(buf);
+	fuse_copy_finish(cs);
+	return err;
+}
+
+static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
+			      struct fuse_copy_state *cs)
+{
+	struct fuse_notify_delete_out outarg;
+	int err = -ENOMEM;
+	char *buf;
+	struct qstr name;
+
+	buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
+	if (!buf)
+		goto err;
+
+	err = -EINVAL;
+	if (size < sizeof(outarg))
+		goto err;
+
+	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+	if (err)
+		goto err;
+
+	err = -ENAMETOOLONG;
+	if (outarg.namelen > FUSE_NAME_MAX)
+		goto err;
+
+	err = -EINVAL;
+	if (size != sizeof(outarg) + outarg.namelen + 1)
+		goto err;
+
+	name.name = buf;
+	name.len = outarg.namelen;
+	err = fuse_copy_one(cs, buf, outarg.namelen + 1);
+	if (err)
+		goto err;
+	fuse_copy_finish(cs);
+	buf[outarg.namelen] = 0;
+	name.hash = full_name_hash(name.name, name.len);
+
+	down_read(&fc->killsb);
+	err = -ENOENT;
+	if (fc->sb)
+		err = fuse_reverse_inval_entry(fc->sb, outarg.parent,
+					       outarg.child, &name);
 	up_read(&fc->killsb);
 	kfree(buf);
 	return err;
@@ -1597,6 +1649,9 @@
 	case FUSE_NOTIFY_RETRIEVE:
 		return fuse_notify_retrieve(fc, size, cs);
 
+	case FUSE_NOTIFY_DELETE:
+		return fuse_notify_delete(fc, size, cs);
+
 	default:
 		fuse_copy_finish(cs);
 		return -EINVAL;

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 5ddd6ea..2066328 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c

@@ -868,7 +868,7 @@
 }
 
 int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
-			     struct qstr *name)
+			     u64 child_nodeid, struct qstr *name)
 {
 	int err = -ENOTDIR;
 	struct inode *parent;
@@ -895,8 +895,36 @@
 
 	fuse_invalidate_attr(parent);
 	fuse_invalidate_entry(entry);
+
+	if (child_nodeid != 0 && entry->d_inode) {
+		mutex_lock(&entry->d_inode->i_mutex);
+		if (get_node_id(entry->d_inode) != child_nodeid) {
+			err = -ENOENT;
+			goto badentry;
+		}
+		if (d_mountpoint(entry)) {
+			err = -EBUSY;
+			goto badentry;
+		}
+		if (S_ISDIR(entry->d_inode->i_mode)) {
+			shrink_dcache_parent(entry);
+			if (!simple_empty(entry)) {
+				err = -ENOTEMPTY;
+				goto badentry;
+			}
+			entry->d_inode->i_flags |= S_DEAD;
+		}
+		dont_mount(entry);
+		clear_nlink(entry->d_inode);
+		err = 0;
+ badentry:
+		mutex_unlock(&entry->d_inode->i_mutex);
+		if (!err)
+			d_delete(entry);
+	} else {
+		err = 0;
+	}
 	dput(entry);
-	err = 0;
 
  unlock:
 	mutex_unlock(&parent->i_mutex);
@@ -1182,6 +1210,30 @@
 	return fuse_fsync_common(file, start, end, datasync, 1);
 }
 
+static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
+			    unsigned long arg)
+{
+	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
+
+	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
+	if (fc->minor < 18)
+		return -ENOTTY;
+
+	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
+}
+
+static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
+				   unsigned long arg)
+{
+	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);
+
+	if (fc->minor < 18)
+		return -ENOTTY;
+
+	return fuse_ioctl_common(file, cmd, arg,
+				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
+}
+
 static bool update_mtime(unsigned ivalid)
 {
 	/* Always update if mtime is explicitly set  */
@@ -1596,6 +1648,8 @@
 	.open		= fuse_dir_open,
 	.release	= fuse_dir_release,
 	.fsync		= fuse_dir_fsync,
+	.unlocked_ioctl	= fuse_dir_ioctl,
+	.compat_ioctl	= fuse_dir_compat_ioctl,
 };
 
 static const struct inode_operations fuse_common_inode_operations = {

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 0c84100..4a199fd 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c

@@ -1555,48 +1555,16 @@
 	loff_t retval;
 	struct inode *inode = file->f_path.dentry->d_inode;
 
-	mutex_lock(&inode->i_mutex);
-	if (origin != SEEK_CUR && origin != SEEK_SET) {
-		retval = fuse_update_attributes(inode, NULL, file, NULL);
-		if (retval)
-			goto exit;
-	}
+	/* No i_mutex protection necessary for SEEK_CUR and SEEK_SET */
+	if (origin == SEEK_CUR || origin == SEEK_SET)
+		return generic_file_llseek(file, offset, origin);
 
-	switch (origin) {
-	case SEEK_END:
-		offset += i_size_read(inode);
-		break;
-	case SEEK_CUR:
-		if (offset == 0) {
-			retval = file->f_pos;
-			goto exit;
-		}
-		offset += file->f_pos;
-		break;
-	case SEEK_DATA:
-		if (offset >= i_size_read(inode)) {
-			retval = -ENXIO;
-			goto exit;
-		}
-		break;
-	case SEEK_HOLE:
-		if (offset >= i_size_read(inode)) {
-			retval = -ENXIO;
-			goto exit;
-		}
-		offset = i_size_read(inode);
-		break;
-	}
-	retval = -EINVAL;
-	if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
-		if (offset != file->f_pos) {
-			file->f_pos = offset;
-			file->f_version = 0;
-		}
-		retval = offset;
-	}
-exit:
+	mutex_lock(&inode->i_mutex);
+	retval = fuse_update_attributes(inode, NULL, file, NULL);
+	if (!retval)
+		retval = generic_file_llseek(file, offset, origin);
 	mutex_unlock(&inode->i_mutex);
+
 	return retval;
 }
 
@@ -1808,7 +1776,7 @@
 	BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
 
 	err = -ENOMEM;
-	pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL);
+	pages = kcalloc(FUSE_MAX_PAGES_PER_REQ, sizeof(pages[0]), GFP_KERNEL);
 	iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
 	if (!pages || !iov_page)
 		goto out;
@@ -1958,8 +1926,8 @@
 }
 EXPORT_SYMBOL_GPL(fuse_do_ioctl);
 
-static long fuse_file_ioctl_common(struct file *file, unsigned int cmd,
-				   unsigned long arg, unsigned int flags)
+long fuse_ioctl_common(struct file *file, unsigned int cmd,
+		       unsigned long arg, unsigned int flags)
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	struct fuse_conn *fc = get_fuse_conn(inode);
@@ -1976,13 +1944,13 @@
 static long fuse_file_ioctl(struct file *file, unsigned int cmd,
 			    unsigned long arg)
 {
-	return fuse_file_ioctl_common(file, cmd, arg, 0);
+	return fuse_ioctl_common(file, cmd, arg, 0);
 }
 
 static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
 				   unsigned long arg)
 {
-	return fuse_file_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT);
+	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT);
 }
 
 /*

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 1964da0..572cefc 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h

@@ -755,9 +755,15 @@
 /**
  * File-system tells the kernel to invalidate parent attributes and
  * the dentry matching parent/name.
+ *
+ * If the child_nodeid is non-zero and:
+ *    - matches the inode number for the dentry matching parent/name,
+ *    - is not a mount point
+ *    - is a file or oan empty directory
+ * then the dentry is unhashed (d_delete()).
  */
 int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
-			     struct qstr *name);
+			     u64 child_nodeid, struct qstr *name);
 
 int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
 		 bool isdir);
@@ -765,6 +771,8 @@
 		       size_t count, loff_t *ppos, int write);
 long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
 		   unsigned int flags);
+long fuse_ioctl_common(struct file *file, unsigned int cmd,
+		       unsigned long arg, unsigned int flags);
 unsigned fuse_file_poll(struct file *file, poll_table *wait);
 int fuse_dev_release(struct inode *inode, struct file *file);
 

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 88e8a23..376816f 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c

@@ -1353,7 +1353,7 @@
 	spin_lock(&gl->gl_spin);
 	gl->gl_reply = ret;
 
-	if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) {
+	if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) {
 		if (gfs2_should_freeze(gl)) {
 			set_bit(GLF_FROZEN, &gl->gl_flags);
 			spin_unlock(&gl->gl_spin);

diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 2553b85..307ac31 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h

@@ -121,8 +121,11 @@
 
 struct lm_lockops {
 	const char *lm_proto_name;
-	int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
- 	void (*lm_unmount) (struct gfs2_sbd *sdp);
+	int (*lm_mount) (struct gfs2_sbd *sdp, const char *table);
+	void (*lm_first_done) (struct gfs2_sbd *sdp);
+	void (*lm_recovery_result) (struct gfs2_sbd *sdp, unsigned int jid,
+				    unsigned int result);
+	void (*lm_unmount) (struct gfs2_sbd *sdp);
 	void (*lm_withdraw) (struct gfs2_sbd *sdp);
 	void (*lm_put_lock) (struct gfs2_glock *gl);
 	int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,

diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index e1d3bb5..97742a7 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h

@@ -139,8 +139,45 @@
 #define GDLM_STRNAME_BYTES	25
 #define GDLM_LVB_SIZE		32
 
+/*
+ * ls_recover_flags:
+ *
+ * DFL_BLOCK_LOCKS: dlm is in recovery and will grant locks that had been
+ * held by failed nodes whose journals need recovery.  Those locks should
+ * only be used for journal recovery until the journal recovery is done.
+ * This is set by the dlm recover_prep callback and cleared by the
+ * gfs2_control thread when journal recovery is complete.  To avoid
+ * races between recover_prep setting and gfs2_control clearing, recover_spin
+ * is held while changing this bit and reading/writing recover_block
+ * and recover_start.
+ *
+ * DFL_NO_DLM_OPS: dlm lockspace ops/callbacks are not being used.
+ *
+ * DFL_FIRST_MOUNT: this node is the first to mount this fs and is doing
+ * recovery of all journals before allowing other nodes to mount the fs.
+ * This is cleared when FIRST_MOUNT_DONE is set.
+ *
+ * DFL_FIRST_MOUNT_DONE: this node was the first mounter, and has finished
+ * recovery of all journals, and now allows other nodes to mount the fs.
+ *
+ * DFL_MOUNT_DONE: gdlm_mount has completed successfully and cleared
+ * BLOCK_LOCKS for the first time.  The gfs2_control thread should now
+ * control clearing BLOCK_LOCKS for further recoveries.
+ *
+ * DFL_UNMOUNT: gdlm_unmount sets to keep sdp off gfs2_control_wq.
+ *
+ * DFL_DLM_RECOVERY: set while dlm is in recovery, between recover_prep()
+ * and recover_done(), i.e. set while recover_block == recover_start.
+ */
+
 enum {
 	DFL_BLOCK_LOCKS		= 0,
+	DFL_NO_DLM_OPS		= 1,
+	DFL_FIRST_MOUNT		= 2,
+	DFL_FIRST_MOUNT_DONE	= 3,
+	DFL_MOUNT_DONE		= 4,
+	DFL_UNMOUNT		= 5,
+	DFL_DLM_RECOVERY	= 6,
 };
 
 struct lm_lockname {
@@ -392,6 +429,7 @@
 #define JDF_RECOVERY 1
 	unsigned int jd_jid;
 	unsigned int jd_blocks;
+	int jd_recover_error;
 };
 
 struct gfs2_statfs_change_host {
@@ -461,6 +499,7 @@
 	SDF_NORECOVERY		= 4,
 	SDF_DEMOTE		= 5,
 	SDF_NOJOURNALID		= 6,
+	SDF_RORECOVERY		= 7, /* read only recovery */
 };
 
 #define GFS2_FSNAME_LEN		256
@@ -499,14 +538,26 @@
 struct lm_lockstruct {
 	int ls_jid;
 	unsigned int ls_first;
-	unsigned int ls_first_done;
 	unsigned int ls_nodir;
 	const struct lm_lockops *ls_ops;
-	unsigned long ls_flags;
 	dlm_lockspace_t *ls_dlm;
 
-	int ls_recover_jid_done;
-	int ls_recover_jid_status;
+	int ls_recover_jid_done;   /* These two are deprecated, */
+	int ls_recover_jid_status; /* used previously by gfs_controld */
+
+	struct dlm_lksb ls_mounted_lksb; /* mounted_lock */
+	struct dlm_lksb ls_control_lksb; /* control_lock */
+	char ls_control_lvb[GDLM_LVB_SIZE]; /* control_lock lvb */
+	struct completion ls_sync_wait; /* {control,mounted}_{lock,unlock} */
+
+	spinlock_t ls_recover_spin; /* protects following fields */
+	unsigned long ls_recover_flags; /* DFL_ */
+	uint32_t ls_recover_mount; /* gen in first recover_done cb */
+	uint32_t ls_recover_start; /* gen in last recover_done cb */
+	uint32_t ls_recover_block; /* copy recover_start in last recover_prep */
+	uint32_t ls_recover_size; /* size of recover_submit, recover_result */
+	uint32_t *ls_recover_submit; /* gen in last recover_slot cb per jid */
+	uint32_t *ls_recover_result; /* result of last jid recovery */
 };
 
 struct gfs2_sbd {
@@ -544,6 +595,7 @@
 	wait_queue_head_t sd_glock_wait;
 	atomic_t sd_glock_disposal;
 	struct completion sd_locking_init;
+	struct delayed_work sd_control_work;
 
 	/* Inode Stuff */
 

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 017960c..a7d611b 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c

@@ -599,9 +599,7 @@
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 	if (error)
 		goto fail_end_trans;
-	inc_nlink(&ip->i_inode);
-	if (S_ISDIR(ip->i_inode.i_mode))
-		inc_nlink(&ip->i_inode);
+	set_nlink(&ip->i_inode, S_ISDIR(ip->i_inode.i_mode) ? 2 : 1);
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
 	brelse(dibh);

diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index ce85b62..8944d1e 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c

@@ -1,6 +1,6 @@
 /*
  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2009 Red Hat, Inc.  All rights reserved.
+ * Copyright 2004-2011 Red Hat, Inc.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
@@ -11,12 +11,15 @@
 #include <linux/dlm.h>
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <linux/delay.h>
 #include <linux/gfs2_ondisk.h>
 
 #include "incore.h"
 #include "glock.h"
 #include "util.h"
+#include "sys.h"
 
+extern struct workqueue_struct *gfs2_control_wq;
 
 static void gdlm_ast(void *arg)
 {
@@ -185,34 +188,1002 @@
 	dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl);
 }
 
-static int gdlm_mount(struct gfs2_sbd *sdp, const char *fsname)
+/*
+ * dlm/gfs2 recovery coordination using dlm_recover callbacks
+ *
+ *  1. dlm_controld sees lockspace members change
+ *  2. dlm_controld blocks dlm-kernel locking activity
+ *  3. dlm_controld within dlm-kernel notifies gfs2 (recover_prep)
+ *  4. dlm_controld starts and finishes its own user level recovery
+ *  5. dlm_controld starts dlm-kernel dlm_recoverd to do kernel recovery
+ *  6. dlm_recoverd notifies gfs2 of failed nodes (recover_slot)
+ *  7. dlm_recoverd does its own lock recovery
+ *  8. dlm_recoverd unblocks dlm-kernel locking activity
+ *  9. dlm_recoverd notifies gfs2 when done (recover_done with new generation)
+ * 10. gfs2_control updates control_lock lvb with new generation and jid bits
+ * 11. gfs2_control enqueues journals for gfs2_recover to recover (maybe none)
+ * 12. gfs2_recover dequeues and recovers journals of failed nodes
+ * 13. gfs2_recover provides recovery results to gfs2_control (recovery_result)
+ * 14. gfs2_control updates control_lock lvb jid bits for recovered journals
+ * 15. gfs2_control unblocks normal locking when all journals are recovered
+ *
+ * - failures during recovery
+ *
+ * recover_prep() may set BLOCK_LOCKS (step 3) again before gfs2_control
+ * clears BLOCK_LOCKS (step 15), e.g. another node fails while still
+ * recovering for a prior failure.  gfs2_control needs a way to detect
+ * this so it can leave BLOCK_LOCKS set in step 15.  This is managed using
+ * the recover_block and recover_start values.
+ *
+ * recover_done() provides a new lockspace generation number each time it
+ * is called (step 9).  This generation number is saved as recover_start.
+ * When recover_prep() is called, it sets BLOCK_LOCKS and sets
+ * recover_block = recover_start.  So, while recover_block is equal to
+ * recover_start, BLOCK_LOCKS should remain set.  (recover_spin must
+ * be held around the BLOCK_LOCKS/recover_block/recover_start logic.)
+ *
+ * - more specific gfs2 steps in sequence above
+ *
+ *  3. recover_prep sets BLOCK_LOCKS and sets recover_block = recover_start
+ *  6. recover_slot records any failed jids (maybe none)
+ *  9. recover_done sets recover_start = new generation number
+ * 10. gfs2_control sets control_lock lvb = new gen + bits for failed jids
+ * 12. gfs2_recover does journal recoveries for failed jids identified above
+ * 14. gfs2_control clears control_lock lvb bits for recovered jids
+ * 15. gfs2_control checks if recover_block == recover_start (step 3 occured
+ *     again) then do nothing, otherwise if recover_start > recover_block
+ *     then clear BLOCK_LOCKS.
+ *
+ * - parallel recovery steps across all nodes
+ *
+ * All nodes attempt to update the control_lock lvb with the new generation
+ * number and jid bits, but only the first to get the control_lock EX will
+ * do so; others will see that it's already done (lvb already contains new
+ * generation number.)
+ *
+ * . All nodes get the same recover_prep/recover_slot/recover_done callbacks
+ * . All nodes attempt to set control_lock lvb gen + bits for the new gen
+ * . One node gets control_lock first and writes the lvb, others see it's done
+ * . All nodes attempt to recover jids for which they see control_lock bits set
+ * . One node succeeds for a jid, and that one clears the jid bit in the lvb
+ * . All nodes will eventually see all lvb bits clear and unblock locks
+ *
+ * - is there a problem with clearing an lvb bit that should be set
+ *   and missing a journal recovery?
+ *
+ * 1. jid fails
+ * 2. lvb bit set for step 1
+ * 3. jid recovered for step 1
+ * 4. jid taken again (new mount)
+ * 5. jid fails (for step 4)
+ * 6. lvb bit set for step 5 (will already be set)
+ * 7. lvb bit cleared for step 3
+ *
+ * This is not a problem because the failure in step 5 does not
+ * require recovery, because the mount in step 4 could not have
+ * progressed far enough to unblock locks and access the fs.  The
+ * control_mount() function waits for all recoveries to be complete
+ * for the latest lockspace generation before ever unblocking locks
+ * and returning.  The mount in step 4 waits until the recovery in
+ * step 1 is done.
+ *
+ * - special case of first mounter: first node to mount the fs
+ *
+ * The first node to mount a gfs2 fs needs to check all the journals
+ * and recover any that need recovery before other nodes are allowed
+ * to mount the fs.  (Others may begin mounting, but they must wait
+ * for the first mounter to be done before taking locks on the fs
+ * or accessing the fs.)  This has two parts:
+ *
+ * 1. The mounted_lock tells a node it's the first to mount the fs.
+ * Each node holds the mounted_lock in PR while it's mounted.
+ * Each node tries to acquire the mounted_lock in EX when it mounts.
+ * If a node is granted the mounted_lock EX it means there are no
+ * other mounted nodes (no PR locks exist), and it is the first mounter.
+ * The mounted_lock is demoted to PR when first recovery is done, so
+ * others will fail to get an EX lock, but will get a PR lock.
+ *
+ * 2. The control_lock blocks others in control_mount() while the first
+ * mounter is doing first mount recovery of all journals.
+ * A mounting node needs to acquire control_lock in EX mode before
+ * it can proceed.  The first mounter holds control_lock in EX while doing
+ * the first mount recovery, blocking mounts from other nodes, then demotes
+ * control_lock to NL when it's done (others_may_mount/first_done),
+ * allowing other nodes to continue mounting.
+ *
+ * first mounter:
+ * control_lock EX/NOQUEUE success
+ * mounted_lock EX/NOQUEUE success (no other PR, so no other mounters)
+ * set first=1
+ * do first mounter recovery
+ * mounted_lock EX->PR
+ * control_lock EX->NL, write lvb generation
+ *
+ * other mounter:
+ * control_lock EX/NOQUEUE success (if fail -EAGAIN, retry)
+ * mounted_lock EX/NOQUEUE fail -EAGAIN (expected due to other mounters PR)
+ * mounted_lock PR/NOQUEUE success
+ * read lvb generation
+ * control_lock EX->NL
+ * set first=0
+ *
+ * - mount during recovery
+ *
+ * If a node mounts while others are doing recovery (not first mounter),
+ * the mounting node will get its initial recover_done() callback without
+ * having seen any previous failures/callbacks.
+ *
+ * It must wait for all recoveries preceding its mount to be finished
+ * before it unblocks locks.  It does this by repeating the "other mounter"
+ * steps above until the lvb generation number is >= its mount generation
+ * number (from initial recover_done) and all lvb bits are clear.
+ *
+ * - control_lock lvb format
+ *
+ * 4 bytes generation number: the latest dlm lockspace generation number
+ * from recover_done callback.  Indicates the jid bitmap has been updated
+ * to reflect all slot failures through that generation.
+ * 4 bytes unused.
+ * GDLM_LVB_SIZE-8 bytes of jid bit map. If bit N is set, it indicates
+ * that jid N needs recovery.
+ */
+
+#define JID_BITMAP_OFFSET 8 /* 4 byte generation number + 4 byte unused */
+
+static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen,
+			     char *lvb_bits)
+{
+	uint32_t gen;
+	memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE);
+	memcpy(&gen, lvb_bits, sizeof(uint32_t));
+	*lvb_gen = le32_to_cpu(gen);
+}
+
+static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen,
+			      char *lvb_bits)
+{
+	uint32_t gen;
+	memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE);
+	gen = cpu_to_le32(lvb_gen);
+	memcpy(ls->ls_control_lvb, &gen, sizeof(uint32_t));
+}
+
+static int all_jid_bits_clear(char *lvb)
+{
+	int i;
+	for (i = JID_BITMAP_OFFSET; i < GDLM_LVB_SIZE; i++) {
+		if (lvb[i])
+			return 0;
+	}
+	return 1;
+}
+
+static void sync_wait_cb(void *arg)
+{
+	struct lm_lockstruct *ls = arg;
+	complete(&ls->ls_sync_wait);
+}
+
+static int sync_unlock(struct gfs2_sbd *sdp, struct dlm_lksb *lksb, char *name)
 {
 	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 	int error;
 
-	if (fsname == NULL) {
-		fs_info(sdp, "no fsname found\n");
-		return -EINVAL;
+	error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls);
+	if (error) {
+		fs_err(sdp, "%s lkid %x error %d\n",
+		       name, lksb->sb_lkid, error);
+		return error;
 	}
 
-	error = dlm_new_lockspace(fsname, NULL, 
-				  DLM_LSFL_FS | DLM_LSFL_NEWEXCL |
-				  (ls->ls_nodir ? DLM_LSFL_NODIR : 0),
-				  GDLM_LVB_SIZE, NULL, NULL, NULL, &ls->ls_dlm);
+	wait_for_completion(&ls->ls_sync_wait);
+
+	if (lksb->sb_status != -DLM_EUNLOCK) {
+		fs_err(sdp, "%s lkid %x status %d\n",
+		       name, lksb->sb_lkid, lksb->sb_status);
+		return -1;
+	}
+	return 0;
+}
+
+static int sync_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags,
+		     unsigned int num, struct dlm_lksb *lksb, char *name)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	char strname[GDLM_STRNAME_BYTES];
+	int error, status;
+
+	memset(strname, 0, GDLM_STRNAME_BYTES);
+	snprintf(strname, GDLM_STRNAME_BYTES, "%8x%16x", LM_TYPE_NONDISK, num);
+
+	error = dlm_lock(ls->ls_dlm, mode, lksb, flags,
+			 strname, GDLM_STRNAME_BYTES - 1,
+			 0, sync_wait_cb, ls, NULL);
+	if (error) {
+		fs_err(sdp, "%s lkid %x flags %x mode %d error %d\n",
+		       name, lksb->sb_lkid, flags, mode, error);
+		return error;
+	}
+
+	wait_for_completion(&ls->ls_sync_wait);
+
+	status = lksb->sb_status;
+
+	if (status && status != -EAGAIN) {
+		fs_err(sdp, "%s lkid %x flags %x mode %d status %d\n",
+		       name, lksb->sb_lkid, flags, mode, status);
+	}
+
+	return status;
+}
+
+static int mounted_unlock(struct gfs2_sbd *sdp)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	return sync_unlock(sdp, &ls->ls_mounted_lksb, "mounted_lock");
+}
+
+static int mounted_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	return sync_lock(sdp, mode, flags, GFS2_MOUNTED_LOCK,
+			 &ls->ls_mounted_lksb, "mounted_lock");
+}
+
+static int control_unlock(struct gfs2_sbd *sdp)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	return sync_unlock(sdp, &ls->ls_control_lksb, "control_lock");
+}
+
+static int control_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	return sync_lock(sdp, mode, flags, GFS2_CONTROL_LOCK,
+			 &ls->ls_control_lksb, "control_lock");
+}
+
+static void gfs2_control_func(struct work_struct *work)
+{
+	struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work);
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	char lvb_bits[GDLM_LVB_SIZE];
+	uint32_t block_gen, start_gen, lvb_gen, flags;
+	int recover_set = 0;
+	int write_lvb = 0;
+	int recover_size;
+	int i, error;
+
+	spin_lock(&ls->ls_recover_spin);
+	/*
+	 * No MOUNT_DONE means we're still mounting; control_mount()
+	 * will set this flag, after which this thread will take over
+	 * all further clearing of BLOCK_LOCKS.
+	 *
+	 * FIRST_MOUNT means this node is doing first mounter recovery,
+	 * for which recovery control is handled by
+	 * control_mount()/control_first_done(), not this thread.
+	 */
+	if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) ||
+	     test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
+		spin_unlock(&ls->ls_recover_spin);
+		return;
+	}
+	block_gen = ls->ls_recover_block;
+	start_gen = ls->ls_recover_start;
+	spin_unlock(&ls->ls_recover_spin);
+
+	/*
+	 * Equal block_gen and start_gen implies we are between
+	 * recover_prep and recover_done callbacks, which means
+	 * dlm recovery is in progress and dlm locking is blocked.
+	 * There's no point trying to do any work until recover_done.
+	 */
+
+	if (block_gen == start_gen)
+		return;
+
+	/*
+	 * Propagate recover_submit[] and recover_result[] to lvb:
+	 * dlm_recoverd adds to recover_submit[] jids needing recovery
+	 * gfs2_recover adds to recover_result[] journal recovery results
+	 *
+	 * set lvb bit for jids in recover_submit[] if the lvb has not
+	 * yet been updated for the generation of the failure
+	 *
+	 * clear lvb bit for jids in recover_result[] if the result of
+	 * the journal recovery is SUCCESS
+	 */
+
+	error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_VALBLK);
+	if (error) {
+		fs_err(sdp, "control lock EX error %d\n", error);
+		return;
+	}
+
+	control_lvb_read(ls, &lvb_gen, lvb_bits);
+
+	spin_lock(&ls->ls_recover_spin);
+	if (block_gen != ls->ls_recover_block ||
+	    start_gen != ls->ls_recover_start) {
+		fs_info(sdp, "recover generation %u block1 %u %u\n",
+			start_gen, block_gen, ls->ls_recover_block);
+		spin_unlock(&ls->ls_recover_spin);
+		control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT);
+		return;
+	}
+
+	recover_size = ls->ls_recover_size;
+
+	if (lvb_gen <= start_gen) {
+		/*
+		 * Clear lvb bits for jids we've successfully recovered.
+		 * Because all nodes attempt to recover failed journals,
+		 * a journal can be recovered multiple times successfully
+		 * in succession.  Only the first will really do recovery,
+		 * the others find it clean, but still report a successful
+		 * recovery.  So, another node may have already recovered
+		 * the jid and cleared the lvb bit for it.
+		 */
+		for (i = 0; i < recover_size; i++) {
+			if (ls->ls_recover_result[i] != LM_RD_SUCCESS)
+				continue;
+
+			ls->ls_recover_result[i] = 0;
+
+			if (!test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET))
+				continue;
+
+			__clear_bit_le(i, lvb_bits + JID_BITMAP_OFFSET);
+			write_lvb = 1;
+		}
+	}
+
+	if (lvb_gen == start_gen) {
+		/*
+		 * Failed slots before start_gen are already set in lvb.
+		 */
+		for (i = 0; i < recover_size; i++) {
+			if (!ls->ls_recover_submit[i])
+				continue;
+			if (ls->ls_recover_submit[i] < lvb_gen)
+				ls->ls_recover_submit[i] = 0;
+		}
+	} else if (lvb_gen < start_gen) {
+		/*
+		 * Failed slots before start_gen are not yet set in lvb.
+		 */
+		for (i = 0; i < recover_size; i++) {
+			if (!ls->ls_recover_submit[i])
+				continue;
+			if (ls->ls_recover_submit[i] < start_gen) {
+				ls->ls_recover_submit[i] = 0;
+				__set_bit_le(i, lvb_bits + JID_BITMAP_OFFSET);
+			}
+		}
+		/* even if there are no bits to set, we need to write the
+		   latest generation to the lvb */
+		write_lvb = 1;
+	} else {
+		/*
+		 * we should be getting a recover_done() for lvb_gen soon
+		 */
+	}
+	spin_unlock(&ls->ls_recover_spin);
+
+	if (write_lvb) {
+		control_lvb_write(ls, start_gen, lvb_bits);
+		flags = DLM_LKF_CONVERT | DLM_LKF_VALBLK;
+	} else {
+		flags = DLM_LKF_CONVERT;
+	}
+
+	error = control_lock(sdp, DLM_LOCK_NL, flags);
+	if (error) {
+		fs_err(sdp, "control lock NL error %d\n", error);
+		return;
+	}
+
+	/*
+	 * Everyone will see jid bits set in the lvb, run gfs2_recover_set(),
+	 * and clear a jid bit in the lvb if the recovery is a success.
+	 * Eventually all journals will be recovered, all jid bits will
+	 * be cleared in the lvb, and everyone will clear BLOCK_LOCKS.
+	 */
+
+	for (i = 0; i < recover_size; i++) {
+		if (test_bit_le(i, lvb_bits + JID_BITMAP_OFFSET)) {
+			fs_info(sdp, "recover generation %u jid %d\n",
+				start_gen, i);
+			gfs2_recover_set(sdp, i);
+			recover_set++;
+		}
+	}
+	if (recover_set)
+		return;
+
+	/*
+	 * No more jid bits set in lvb, all recovery is done, unblock locks
+	 * (unless a new recover_prep callback has occured blocking locks
+	 * again while working above)
+	 */
+
+	spin_lock(&ls->ls_recover_spin);
+	if (ls->ls_recover_block == block_gen &&
+	    ls->ls_recover_start == start_gen) {
+		clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
+		spin_unlock(&ls->ls_recover_spin);
+		fs_info(sdp, "recover generation %u done\n", start_gen);
+		gfs2_glock_thaw(sdp);
+	} else {
+		fs_info(sdp, "recover generation %u block2 %u %u\n",
+			start_gen, block_gen, ls->ls_recover_block);
+		spin_unlock(&ls->ls_recover_spin);
+	}
+}
+
+static int control_mount(struct gfs2_sbd *sdp)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	char lvb_bits[GDLM_LVB_SIZE];
+	uint32_t start_gen, block_gen, mount_gen, lvb_gen;
+	int mounted_mode;
+	int retries = 0;
+	int error;
+
+	memset(&ls->ls_mounted_lksb, 0, sizeof(struct dlm_lksb));
+	memset(&ls->ls_control_lksb, 0, sizeof(struct dlm_lksb));
+	memset(&ls->ls_control_lvb, 0, GDLM_LVB_SIZE);
+	ls->ls_control_lksb.sb_lvbptr = ls->ls_control_lvb;
+	init_completion(&ls->ls_sync_wait);
+
+	set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
+
+	error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_VALBLK);
+	if (error) {
+		fs_err(sdp, "control_mount control_lock NL error %d\n", error);
+		return error;
+	}
+
+	error = mounted_lock(sdp, DLM_LOCK_NL, 0);
+	if (error) {
+		fs_err(sdp, "control_mount mounted_lock NL error %d\n", error);
+		control_unlock(sdp);
+		return error;
+	}
+	mounted_mode = DLM_LOCK_NL;
+
+restart:
+	if (retries++ && signal_pending(current)) {
+		error = -EINTR;
+		goto fail;
+	}
+
+	/*
+	 * We always start with both locks in NL. control_lock is
+	 * demoted to NL below so we don't need to do it here.
+	 */
+
+	if (mounted_mode != DLM_LOCK_NL) {
+		error = mounted_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT);
+		if (error)
+			goto fail;
+		mounted_mode = DLM_LOCK_NL;
+	}
+
+	/*
+	 * Other nodes need to do some work in dlm recovery and gfs2_control
+	 * before the recover_done and control_lock will be ready for us below.
+	 * A delay here is not required but often avoids having to retry.
+	 */
+
+	msleep_interruptible(500);
+
+	/*
+	 * Acquire control_lock in EX and mounted_lock in either EX or PR.
+	 * control_lock lvb keeps track of any pending journal recoveries.
+	 * mounted_lock indicates if any other nodes have the fs mounted.
+	 */
+
+	error = control_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE|DLM_LKF_VALBLK);
+	if (error == -EAGAIN) {
+		goto restart;
+	} else if (error) {
+		fs_err(sdp, "control_mount control_lock EX error %d\n", error);
+		goto fail;
+	}
+
+	error = mounted_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE);
+	if (!error) {
+		mounted_mode = DLM_LOCK_EX;
+		goto locks_done;
+	} else if (error != -EAGAIN) {
+		fs_err(sdp, "control_mount mounted_lock EX error %d\n", error);
+		goto fail;
+	}
+
+	error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE);
+	if (!error) {
+		mounted_mode = DLM_LOCK_PR;
+		goto locks_done;
+	} else {
+		/* not even -EAGAIN should happen here */
+		fs_err(sdp, "control_mount mounted_lock PR error %d\n", error);
+		goto fail;
+	}
+
+locks_done:
+	/*
+	 * If we got both locks above in EX, then we're the first mounter.
+	 * If not, then we need to wait for the control_lock lvb to be
+	 * updated by other mounted nodes to reflect our mount generation.
+	 *
+	 * In simple first mounter cases, first mounter will see zero lvb_gen,
+	 * but in cases where all existing nodes leave/fail before mounting
+	 * nodes finish control_mount, then all nodes will be mounting and
+	 * lvb_gen will be non-zero.
+	 */
+
+	control_lvb_read(ls, &lvb_gen, lvb_bits);
+
+	if (lvb_gen == 0xFFFFFFFF) {
+		/* special value to force mount attempts to fail */
+		fs_err(sdp, "control_mount control_lock disabled\n");
+		error = -EINVAL;
+		goto fail;
+	}
+
+	if (mounted_mode == DLM_LOCK_EX) {
+		/* first mounter, keep both EX while doing first recovery */
+		spin_lock(&ls->ls_recover_spin);
+		clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
+		set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags);
+		set_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
+		spin_unlock(&ls->ls_recover_spin);
+		fs_info(sdp, "first mounter control generation %u\n", lvb_gen);
+		return 0;
+	}
+
+	error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT);
 	if (error)
-		printk(KERN_ERR "dlm_new_lockspace error %d", error);
+		goto fail;
+
+	/*
+	 * We are not first mounter, now we need to wait for the control_lock
+	 * lvb generation to be >= the generation from our first recover_done
+	 * and all lvb bits to be clear (no pending journal recoveries.)
+	 */
+
+	if (!all_jid_bits_clear(lvb_bits)) {
+		/* journals need recovery, wait until all are clear */
+		fs_info(sdp, "control_mount wait for journal recovery\n");
+		goto restart;
+	}
+
+	spin_lock(&ls->ls_recover_spin);
+	block_gen = ls->ls_recover_block;
+	start_gen = ls->ls_recover_start;
+	mount_gen = ls->ls_recover_mount;
+
+	if (lvb_gen < mount_gen) {
+		/* wait for mounted nodes to update control_lock lvb to our
+		   generation, which might include new recovery bits set */
+		fs_info(sdp, "control_mount wait1 block %u start %u mount %u "
+			"lvb %u flags %lx\n", block_gen, start_gen, mount_gen,
+			lvb_gen, ls->ls_recover_flags);
+		spin_unlock(&ls->ls_recover_spin);
+		goto restart;
+	}
+
+	if (lvb_gen != start_gen) {
+		/* wait for mounted nodes to update control_lock lvb to the
+		   latest recovery generation */
+		fs_info(sdp, "control_mount wait2 block %u start %u mount %u "
+			"lvb %u flags %lx\n", block_gen, start_gen, mount_gen,
+			lvb_gen, ls->ls_recover_flags);
+		spin_unlock(&ls->ls_recover_spin);
+		goto restart;
+	}
+
+	if (block_gen == start_gen) {
+		/* dlm recovery in progress, wait for it to finish */
+		fs_info(sdp, "control_mount wait3 block %u start %u mount %u "
+			"lvb %u flags %lx\n", block_gen, start_gen, mount_gen,
+			lvb_gen, ls->ls_recover_flags);
+		spin_unlock(&ls->ls_recover_spin);
+		goto restart;
+	}
+
+	clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
+	set_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags);
+	memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t));
+	memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t));
+	spin_unlock(&ls->ls_recover_spin);
+	return 0;
+
+fail:
+	mounted_unlock(sdp);
+	control_unlock(sdp);
+	return error;
+}
+
+static int dlm_recovery_wait(void *word)
+{
+	schedule();
+	return 0;
+}
+
+static int control_first_done(struct gfs2_sbd *sdp)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	char lvb_bits[GDLM_LVB_SIZE];
+	uint32_t start_gen, block_gen;
+	int error;
+
+restart:
+	spin_lock(&ls->ls_recover_spin);
+	start_gen = ls->ls_recover_start;
+	block_gen = ls->ls_recover_block;
+
+	if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags) ||
+	    !test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) ||
+	    !test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
+		/* sanity check, should not happen */
+		fs_err(sdp, "control_first_done start %u block %u flags %lx\n",
+		       start_gen, block_gen, ls->ls_recover_flags);
+		spin_unlock(&ls->ls_recover_spin);
+		control_unlock(sdp);
+		return -1;
+	}
+
+	if (start_gen == block_gen) {
+		/*
+		 * Wait for the end of a dlm recovery cycle to switch from
+		 * first mounter recovery.  We can ignore any recover_slot
+		 * callbacks between the recover_prep and next recover_done
+		 * because we are still the first mounter and any failed nodes
+		 * have not fully mounted, so they don't need recovery.
+		 */
+		spin_unlock(&ls->ls_recover_spin);
+		fs_info(sdp, "control_first_done wait gen %u\n", start_gen);
+
+		wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY,
+			    dlm_recovery_wait, TASK_UNINTERRUPTIBLE);
+		goto restart;
+	}
+
+	clear_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
+	set_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags);
+	memset(ls->ls_recover_submit, 0, ls->ls_recover_size*sizeof(uint32_t));
+	memset(ls->ls_recover_result, 0, ls->ls_recover_size*sizeof(uint32_t));
+	spin_unlock(&ls->ls_recover_spin);
+
+	memset(lvb_bits, 0, sizeof(lvb_bits));
+	control_lvb_write(ls, start_gen, lvb_bits);
+
+	error = mounted_lock(sdp, DLM_LOCK_PR, DLM_LKF_CONVERT);
+	if (error)
+		fs_err(sdp, "control_first_done mounted PR error %d\n", error);
+
+	error = control_lock(sdp, DLM_LOCK_NL, DLM_LKF_CONVERT|DLM_LKF_VALBLK);
+	if (error)
+		fs_err(sdp, "control_first_done control NL error %d\n", error);
 
 	return error;
 }
 
+/*
+ * Expand static jid arrays if necessary (by increments of RECOVER_SIZE_INC)
+ * to accomodate the largest slot number.  (NB dlm slot numbers start at 1,
+ * gfs2 jids start at 0, so jid = slot - 1)
+ */
+
+#define RECOVER_SIZE_INC 16
+
+static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots,
+			    int num_slots)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	uint32_t *submit = NULL;
+	uint32_t *result = NULL;
+	uint32_t old_size, new_size;
+	int i, max_jid;
+
+	max_jid = 0;
+	for (i = 0; i < num_slots; i++) {
+		if (max_jid < slots[i].slot - 1)
+			max_jid = slots[i].slot - 1;
+	}
+
+	old_size = ls->ls_recover_size;
+
+	if (old_size >= max_jid + 1)
+		return 0;
+
+	new_size = old_size + RECOVER_SIZE_INC;
+
+	submit = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS);
+	result = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS);
+	if (!submit || !result) {
+		kfree(submit);
+		kfree(result);
+		return -ENOMEM;
+	}
+
+	spin_lock(&ls->ls_recover_spin);
+	memcpy(submit, ls->ls_recover_submit, old_size * sizeof(uint32_t));
+	memcpy(result, ls->ls_recover_result, old_size * sizeof(uint32_t));
+	kfree(ls->ls_recover_submit);
+	kfree(ls->ls_recover_result);
+	ls->ls_recover_submit = submit;
+	ls->ls_recover_result = result;
+	ls->ls_recover_size = new_size;
+	spin_unlock(&ls->ls_recover_spin);
+	return 0;
+}
+
+static void free_recover_size(struct lm_lockstruct *ls)
+{
+	kfree(ls->ls_recover_submit);
+	kfree(ls->ls_recover_result);
+	ls->ls_recover_submit = NULL;
+	ls->ls_recover_result = NULL;
+	ls->ls_recover_size = 0;
+}
+
+/* dlm calls before it does lock recovery */
+
+static void gdlm_recover_prep(void *arg)
+{
+	struct gfs2_sbd *sdp = arg;
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+
+	spin_lock(&ls->ls_recover_spin);
+	ls->ls_recover_block = ls->ls_recover_start;
+	set_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags);
+
+	if (!test_bit(DFL_MOUNT_DONE, &ls->ls_recover_flags) ||
+	     test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
+		spin_unlock(&ls->ls_recover_spin);
+		return;
+	}
+	set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
+	spin_unlock(&ls->ls_recover_spin);
+}
+
+/* dlm calls after recover_prep has been completed on all lockspace members;
+   identifies slot/jid of failed member */
+
+static void gdlm_recover_slot(void *arg, struct dlm_slot *slot)
+{
+	struct gfs2_sbd *sdp = arg;
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	int jid = slot->slot - 1;
+
+	spin_lock(&ls->ls_recover_spin);
+	if (ls->ls_recover_size < jid + 1) {
+		fs_err(sdp, "recover_slot jid %d gen %u short size %d",
+		       jid, ls->ls_recover_block, ls->ls_recover_size);
+		spin_unlock(&ls->ls_recover_spin);
+		return;
+	}
+
+	if (ls->ls_recover_submit[jid]) {
+		fs_info(sdp, "recover_slot jid %d gen %u prev %u",
+			jid, ls->ls_recover_block, ls->ls_recover_submit[jid]);
+	}
+	ls->ls_recover_submit[jid] = ls->ls_recover_block;
+	spin_unlock(&ls->ls_recover_spin);
+}
+
+/* dlm calls after recover_slot and after it completes lock recovery */
+
+static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots,
+			      int our_slot, uint32_t generation)
+{
+	struct gfs2_sbd *sdp = arg;
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+
+	/* ensure the ls jid arrays are large enough */
+	set_recover_size(sdp, slots, num_slots);
+
+	spin_lock(&ls->ls_recover_spin);
+	ls->ls_recover_start = generation;
+
+	if (!ls->ls_recover_mount) {
+		ls->ls_recover_mount = generation;
+		ls->ls_jid = our_slot - 1;
+	}
+
+	if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags))
+		queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0);
+
+	clear_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY);
+	spin_unlock(&ls->ls_recover_spin);
+}
+
+/* gfs2_recover thread has a journal recovery result */
+
+static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid,
+				 unsigned int result)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+
+	if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
+		return;
+
+	/* don't care about the recovery of own journal during mount */
+	if (jid == ls->ls_jid)
+		return;
+
+	spin_lock(&ls->ls_recover_spin);
+	if (test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags)) {
+		spin_unlock(&ls->ls_recover_spin);
+		return;
+	}
+	if (ls->ls_recover_size < jid + 1) {
+		fs_err(sdp, "recovery_result jid %d short size %d",
+		       jid, ls->ls_recover_size);
+		spin_unlock(&ls->ls_recover_spin);
+		return;
+	}
+
+	fs_info(sdp, "recover jid %d result %s\n", jid,
+		result == LM_RD_GAVEUP ? "busy" : "success");
+
+	ls->ls_recover_result[jid] = result;
+
+	/* GAVEUP means another node is recovering the journal; delay our
+	   next attempt to recover it, to give the other node a chance to
+	   finish before trying again */
+
+	if (!test_bit(DFL_UNMOUNT, &ls->ls_recover_flags))
+		queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work,
+				   result == LM_RD_GAVEUP ? HZ : 0);
+	spin_unlock(&ls->ls_recover_spin);
+}
+
+const struct dlm_lockspace_ops gdlm_lockspace_ops = {
+	.recover_prep = gdlm_recover_prep,
+	.recover_slot = gdlm_recover_slot,
+	.recover_done = gdlm_recover_done,
+};
+
+static int gdlm_mount(struct gfs2_sbd *sdp, const char *table)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	char cluster[GFS2_LOCKNAME_LEN];
+	const char *fsname;
+	uint32_t flags;
+	int error, ops_result;
+
+	/*
+	 * initialize everything
+	 */
+
+	INIT_DELAYED_WORK(&sdp->sd_control_work, gfs2_control_func);
+	spin_lock_init(&ls->ls_recover_spin);
+	ls->ls_recover_flags = 0;
+	ls->ls_recover_mount = 0;
+	ls->ls_recover_start = 0;
+	ls->ls_recover_block = 0;
+	ls->ls_recover_size = 0;
+	ls->ls_recover_submit = NULL;
+	ls->ls_recover_result = NULL;
+
+	error = set_recover_size(sdp, NULL, 0);
+	if (error)
+		goto fail;
+
+	/*
+	 * prepare dlm_new_lockspace args
+	 */
+
+	fsname = strchr(table, ':');
+	if (!fsname) {
+		fs_info(sdp, "no fsname found\n");
+		error = -EINVAL;
+		goto fail_free;
+	}
+	memset(cluster, 0, sizeof(cluster));
+	memcpy(cluster, table, strlen(table) - strlen(fsname));
+	fsname++;
+
+	flags = DLM_LSFL_FS | DLM_LSFL_NEWEXCL;
+	if (ls->ls_nodir)
+		flags |= DLM_LSFL_NODIR;
+
+	/*
+	 * create/join lockspace
+	 */
+
+	error = dlm_new_lockspace(fsname, cluster, flags, GDLM_LVB_SIZE,
+				  &gdlm_lockspace_ops, sdp, &ops_result,
+				  &ls->ls_dlm);
+	if (error) {
+		fs_err(sdp, "dlm_new_lockspace error %d\n", error);
+		goto fail_free;
+	}
+
+	if (ops_result < 0) {
+		/*
+		 * dlm does not support ops callbacks,
+		 * old dlm_controld/gfs_controld are used, try without ops.
+		 */
+		fs_info(sdp, "dlm lockspace ops not used\n");
+		free_recover_size(ls);
+		set_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags);
+		return 0;
+	}
+
+	if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) {
+		fs_err(sdp, "dlm lockspace ops disallow jid preset\n");
+		error = -EINVAL;
+		goto fail_release;
+	}
+
+	/*
+	 * control_mount() uses control_lock to determine first mounter,
+	 * and for later mounts, waits for any recoveries to be cleared.
+	 */
+
+	error = control_mount(sdp);
+	if (error) {
+		fs_err(sdp, "mount control error %d\n", error);
+		goto fail_release;
+	}
+
+	ls->ls_first = !!test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
+	clear_bit(SDF_NOJOURNALID, &sdp->sd_flags);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID);
+	return 0;
+
+fail_release:
+	dlm_release_lockspace(ls->ls_dlm, 2);
+fail_free:
+	free_recover_size(ls);
+fail:
+	return error;
+}
+
+static void gdlm_first_done(struct gfs2_sbd *sdp)
+{
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	int error;
+
+	if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
+		return;
+
+	error = control_first_done(sdp);
+	if (error)
+		fs_err(sdp, "mount first_done error %d\n", error);
+}
+
 static void gdlm_unmount(struct gfs2_sbd *sdp)
 {
 	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 
+	if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
+		goto release;
+
+	/* wait for gfs2_control_wq to be done with this mount */
+
+	spin_lock(&ls->ls_recover_spin);
+	set_bit(DFL_UNMOUNT, &ls->ls_recover_flags);
+	spin_unlock(&ls->ls_recover_spin);
+	flush_delayed_work_sync(&sdp->sd_control_work);
+
+	/* mounted_lock and control_lock will be purged in dlm recovery */
+release:
 	if (ls->ls_dlm) {
 		dlm_release_lockspace(ls->ls_dlm, 2);
 		ls->ls_dlm = NULL;
 	}
+
+	free_recover_size(ls);
 }
 
 static const match_table_t dlm_tokens = {
@@ -226,6 +1197,8 @@
 const struct lm_lockops gfs2_dlm_ops = {
 	.lm_proto_name = "lock_dlm",
 	.lm_mount = gdlm_mount,
+	.lm_first_done = gdlm_first_done,
+	.lm_recovery_result = gdlm_recovery_result,
 	.lm_unmount = gdlm_unmount,
 	.lm_put_lock = gdlm_put_lock,
 	.lm_lock = gdlm_lock,

diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index c150298..a8d9bcd 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c

@@ -28,6 +28,8 @@
 #include "recovery.h"
 #include "dir.h"
 
+struct workqueue_struct *gfs2_control_wq;
+
 static struct shrinker qd_shrinker = {
 	.shrink = gfs2_shrink_qd_memory,
 	.seeks = DEFAULT_SEEKS,
@@ -146,12 +148,19 @@
 	if (!gfs_recovery_wq)
 		goto fail_wq;
 
+	gfs2_control_wq = alloc_workqueue("gfs2_control",
+			       WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0);
+	if (!gfs2_control_wq)
+		goto fail_control;
+
 	gfs2_register_debugfs();
 
 	printk("GFS2 installed\n");
 
 	return 0;
 
+fail_control:
+	destroy_workqueue(gfs_recovery_wq);
 fail_wq:
 	unregister_filesystem(&gfs2meta_fs_type);
 fail_unregister:
@@ -195,6 +204,7 @@
 	unregister_filesystem(&gfs2_fs_type);
 	unregister_filesystem(&gfs2meta_fs_type);
 	destroy_workqueue(gfs_recovery_wq);
+	destroy_workqueue(gfs2_control_wq);
 
 	rcu_barrier();
 

diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index fe72e79..6aacf3f 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c

@@ -562,8 +562,12 @@
 {
 	char *message = "FIRSTMOUNT=Done";
 	char *envp[] = { message, NULL };
-	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
-	ls->ls_first_done = 1;
+
+	fs_info(sdp, "first mount done, others may mount\n");
+
+	if (sdp->sd_lockstruct.ls_ops->lm_first_done)
+		sdp->sd_lockstruct.ls_ops->lm_first_done(sdp);
+
 	kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
 }
 
@@ -944,7 +948,6 @@
 	struct gfs2_args *args = &sdp->sd_args;
 	const char *proto = sdp->sd_proto_name;
 	const char *table = sdp->sd_table_name;
-	const char *fsname;
 	char *o, *options;
 	int ret;
 
@@ -1004,21 +1007,12 @@
 		}
 	}
 
-	if (sdp->sd_args.ar_spectator)
-		snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table);
-	else
-		snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table,
-			 sdp->sd_lockstruct.ls_jid);
-
-	fsname = strchr(table, ':');
-	if (fsname)
-		fsname++;
 	if (lm->lm_mount == NULL) {
 		fs_info(sdp, "Now mounting FS...\n");
 		complete_all(&sdp->sd_locking_init);
 		return 0;
 	}
-	ret = lm->lm_mount(sdp, fsname);
+	ret = lm->lm_mount(sdp, table);
 	if (ret == 0)
 		fs_info(sdp, "Joined cluster. Now mounting FS...\n");
 	complete_all(&sdp->sd_locking_init);
@@ -1084,7 +1078,7 @@
 
 	if (sdp->sd_args.ar_spectator) {
                 sb->s_flags |= MS_RDONLY;
-		set_bit(SDF_NORECOVERY, &sdp->sd_flags);
+		set_bit(SDF_RORECOVERY, &sdp->sd_flags);
 	}
 	if (sdp->sd_args.ar_posix_acl)
 		sb->s_flags |= MS_POSIXACL;
@@ -1124,6 +1118,8 @@
 	if (error)
 		goto fail;
 
+	snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name);
+
 	gfs2_create_debugfs_file(sdp);
 
 	error = gfs2_sys_fs_add(sdp);
@@ -1160,6 +1156,13 @@
 		goto fail_sb;
 	}
 
+	if (sdp->sd_args.ar_spectator)
+		snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s",
+			 sdp->sd_table_name);
+	else
+		snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u",
+			 sdp->sd_table_name, sdp->sd_lockstruct.ls_jid);
+
 	error = init_inodes(sdp, DO);
 	if (error)
 		goto fail_sb;

diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index f2a02ed..963b2d7 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c

@@ -436,12 +436,16 @@
 	char env_status[20];
 	char *envp[] = { env_jid, env_status, NULL };
 	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+
         ls->ls_recover_jid_done = jid;
         ls->ls_recover_jid_status = message;
 	sprintf(env_jid, "JID=%d", jid);
 	sprintf(env_status, "RECOVERY=%s",
 		message == LM_RD_SUCCESS ? "Done" : "Failed");
         kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp);
+
+	if (sdp->sd_lockstruct.ls_ops->lm_recovery_result)
+		sdp->sd_lockstruct.ls_ops->lm_recovery_result(sdp, jid, message);
 }
 
 void gfs2_recover_func(struct work_struct *work)
@@ -512,7 +516,9 @@
 		if (error)
 			goto fail_gunlock_ji;
 
-		if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
+		if (test_bit(SDF_RORECOVERY, &sdp->sd_flags)) {
+			ro = 1;
+		} else if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
 			if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
 				ro = 1;
 		} else {
@@ -577,6 +583,7 @@
 
 	fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
 fail:
+	jd->jd_recover_error = error;
 	gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
 done:
 	clear_bit(JDF_RECOVERY, &jd->jd_flags);
@@ -605,6 +612,6 @@
 		wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait,
 			    TASK_UNINTERRUPTIBLE);
 
-	return 0;
+	return wait ? jd->jd_recover_error : 0;
 }
 

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 2223462..981bfa3 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c

@@ -1108,9 +1108,9 @@
 {
 	struct gfs2_blkreserv *rs = ip->i_res;
 
-	gfs2_blkrsv_put(ip);
 	if (rs->rs_rgd_gh.gh_gl)
 		gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
+	gfs2_blkrsv_put(ip);
 }
 
 /**

diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 443cabc..d33172c 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c

@@ -298,7 +298,7 @@
 	ssize_t ret;
 	int val = 0;
 
-	if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))
+	if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))
 		val = 1;
 	ret = sprintf(buf, "%d\n", val);
 	return ret;
@@ -313,9 +313,9 @@
 	val = simple_strtol(buf, NULL, 0);
 
 	if (val == 1)
-		set_bit(DFL_BLOCK_LOCKS, &ls->ls_flags);
+		set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
 	else if (val == 0) {
-		clear_bit(DFL_BLOCK_LOCKS, &ls->ls_flags);
+		clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
 		smp_mb__after_clear_bit();
 		gfs2_glock_thaw(sdp);
 	} else {
@@ -350,8 +350,8 @@
 		goto out;
 	if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL)
 		goto out;
-        sdp->sd_lockstruct.ls_first = first;
-        rv = 0;
+	sdp->sd_lockstruct.ls_first = first;
+	rv = 0;
 out:
         spin_unlock(&sdp->sd_jindex_spin);
         return rv ? rv : len;
@@ -360,19 +360,14 @@
 static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf)
 {
 	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
-	return sprintf(buf, "%d\n", ls->ls_first_done);
+	return sprintf(buf, "%d\n", !!test_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags));
 }
 
-static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
+int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid)
 {
-	unsigned jid;
 	struct gfs2_jdesc *jd;
 	int rv;
 
-	rv = sscanf(buf, "%u", &jid);
-	if (rv != 1)
-		return -EINVAL;
-
 	rv = -ESHUTDOWN;
 	spin_lock(&sdp->sd_jindex_spin);
 	if (test_bit(SDF_NORECOVERY, &sdp->sd_flags))
@@ -389,6 +384,20 @@
 	}
 out:
 	spin_unlock(&sdp->sd_jindex_spin);
+	return rv;
+}
+
+static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
+{
+	unsigned jid;
+	int rv;
+
+	rv = sscanf(buf, "%u", &jid);
+	if (rv != 1)
+		return -EINVAL;
+
+	rv = gfs2_recover_set(sdp, jid);
+
 	return rv ? rv : len;
 }
 

diff --git a/fs/gfs2/sys.h b/fs/gfs2/sys.h
index e94560e..79182d6 100644
--- a/fs/gfs2/sys.h
+++ b/fs/gfs2/sys.h

@@ -19,5 +19,7 @@
 int gfs2_sys_init(void);
 void gfs2_sys_uninit(void);
 
+int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid);
+
 #endif /* __SYS_DOT_H__ */
 

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index e425ad9..1e85a7a 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c

@@ -583,7 +583,8 @@
 }
 
 static int hugetlbfs_migrate_page(struct address_space *mapping,
-				struct page *newpage, struct page *page)
+				struct page *newpage, struct page *page,
+				enum migrate_mode mode)
 {
 	int rc;
 

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 5ee9253..8102db9 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h

@@ -332,7 +332,7 @@
 
 #ifdef CONFIG_MIGRATION
 extern int nfs_migrate_page(struct address_space *,
-		struct page *, struct page *);
+		struct page *, struct page *, enum migrate_mode);
 #else
 #define nfs_migrate_page NULL
 #endif

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0c38852..834f0fe 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c

@@ -1688,7 +1688,7 @@
 
 #ifdef CONFIG_MIGRATION
 int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
-		struct page *page)
+		struct page *page, enum migrate_mode mode)
 {
 	/*
 	 * If PagePrivate is set, then the page is currently associated with
@@ -1703,7 +1703,7 @@
 
 	nfs_fscache_release_page(page, GFP_KERNEL);
 
-	return migrate_page(mapping, newpage, page);
+	return migrate_page(mapping, newpage, page, mode);
 }
 #endif
 

diff --git a/fs/pipe.c b/fs/pipe.c
index f0e485d..a932ced 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c

@@ -1137,7 +1137,7 @@
 	if (nr_pages < pipe->nrbufs)
 		return -EBUSY;
 
-	bufs = kcalloc(nr_pages, sizeof(struct pipe_buffer), GFP_KERNEL);
+	bufs = kcalloc(nr_pages, sizeof(*bufs), GFP_KERNEL | __GFP_NOWARN);
 	if (unlikely(!bufs))
 		return -ENOMEM;
 

diff --git a/fs/proc/array.c b/fs/proc/array.c
index 8c344f0..9252ee3 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c

@@ -464,7 +464,7 @@
 
 	seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \
 %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
-%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n",
+%lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld %lu %lu %lu\n",
 		pid_nr_ns(pid, ns),
 		tcomm,
 		state,
@@ -511,7 +511,10 @@
 		task->policy,
 		(unsigned long long)delayacct_blkio_ticks(task),
 		cputime_to_clock_t(gtime),
-		cputime_to_clock_t(cgtime));
+		cputime_to_clock_t(cgtime),
+		(mm && permitted) ? mm->start_data : 0,
+		(mm && permitted) ? mm->end_data : 0,
+		(mm && permitted) ? mm->start_brk : 0);
 	if (mm)
 		mmput(mm);
 	return 0;

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 8173dfd..5485a53 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c

@@ -654,6 +654,8 @@
 	bool has_perms;
 
 	task = get_proc_task(inode);
+	if (!task)
+		return -ESRCH;
 	has_perms = has_pid_permissions(pid, task, 1);
 	put_task_struct(task);
 

diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index f744be9..af0b738 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c

@@ -70,11 +70,15 @@
 	spin_lock(&cache->lock);
 
 	while (1) {
-		for (i = 0; i < cache->entries; i++)
-			if (cache->entry[i].block == block)
+		for (i = cache->curr_blk, n = 0; n < cache->entries; n++) {
+			if (cache->entry[i].block == block) {
+				cache->curr_blk = i;
 				break;
+			}
+			i = (i + 1) % cache->entries;
+		}
 
-		if (i == cache->entries) {
+		if (n == cache->entries) {
 			/*
 			 * Block not in cache, if all cache entries are used
 			 * go to sleep waiting for one to become available.
@@ -245,6 +249,7 @@
 		goto cleanup;
 	}
 
+	cache->curr_blk = 0;
 	cache->next_blk = 0;
 	cache->unused = entries;
 	cache->entries = entries;
@@ -332,17 +337,20 @@
 		u64 *block, int *offset, int length)
 {
 	struct squashfs_sb_info *msblk = sb->s_fs_info;
-	int bytes, copied = length;
+	int bytes, res = length;
 	struct squashfs_cache_entry *entry;
 
 	TRACE("Entered squashfs_read_metadata [%llx:%x]\n", *block, *offset);
 
 	while (length) {
 		entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0);
-		if (entry->error)
-			return entry->error;
-		else if (*offset >= entry->length)
-			return -EIO;
+		if (entry->error) {
+			res = entry->error;
+			goto error;
+		} else if (*offset >= entry->length) {
+			res = -EIO;
+			goto error;
+		}
 
 		bytes = squashfs_copy_data(buffer, entry, *offset, length);
 		if (buffer)
@@ -358,7 +366,11 @@
 		squashfs_cache_put(entry);
 	}
 
-	return copied;
+	return res;
+
+error:
+	squashfs_cache_put(entry);
+	return res;
 }
 
 

diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index fd7b3b3..81afbcc 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c

@@ -208,8 +208,8 @@
 		inode->i_op = &squashfs_inode_ops;
 		inode->i_fop = &generic_ro_fops;
 		inode->i_mode |= S_IFREG;
-		inode->i_blocks = ((inode->i_size -
-				le64_to_cpu(sqsh_ino->sparse) - 1) >> 9) + 1;
+		inode->i_blocks = (inode->i_size -
+				le64_to_cpu(sqsh_ino->sparse) + 511) >> 9;
 
 		squashfs_i(inode)->fragment_block = frag_blk;
 		squashfs_i(inode)->fragment_size = frag_size;

diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 651f0b3..52934a2 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h

@@ -28,6 +28,7 @@
 struct squashfs_cache {
 	char			*name;
 	int			entries;
+	int			curr_blk;
 	int			next_blk;
 	int			num_waiters;
 	int			unused;

diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index d0858c2..ecaa2f7 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c

@@ -290,7 +290,7 @@
 
 check_directory_table:
 	/* Sanity check directory_table */
-	if (msblk->directory_table >= next_table) {
+	if (msblk->directory_table > next_table) {
 		err = -EINVAL;
 		goto failed_mount;
 	}

diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index b09ba2d..f922cba 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c

@@ -38,9 +38,6 @@
 
 DEFINE_SPINLOCK(dbg_lock);
 
-static char dbg_key_buf0[128];
-static char dbg_key_buf1[128];
-
 static const char *get_key_fmt(int fmt)
 {
 	switch (fmt) {
@@ -103,8 +100,8 @@
 	}
 }
 
-static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key,
-			char *buffer)
+const char *dbg_snprintf_key(const struct ubifs_info *c,
+			     const union ubifs_key *key, char *buffer, int len)
 {
 	char *p = buffer;
 	int type = key_type(c, key);
@@ -112,45 +109,34 @@
 	if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) {
 		switch (type) {
 		case UBIFS_INO_KEY:
-			sprintf(p, "(%lu, %s)", (unsigned long)key_inum(c, key),
-			       get_key_type(type));
+			len -= snprintf(p, len, "(%lu, %s)",
+					(unsigned long)key_inum(c, key),
+					get_key_type(type));
 			break;
 		case UBIFS_DENT_KEY:
 		case UBIFS_XENT_KEY:
-			sprintf(p, "(%lu, %s, %#08x)",
-				(unsigned long)key_inum(c, key),
-				get_key_type(type), key_hash(c, key));
+			len -= snprintf(p, len, "(%lu, %s, %#08x)",
+					(unsigned long)key_inum(c, key),
+					get_key_type(type), key_hash(c, key));
 			break;
 		case UBIFS_DATA_KEY:
-			sprintf(p, "(%lu, %s, %u)",
-				(unsigned long)key_inum(c, key),
-				get_key_type(type), key_block(c, key));
+			len -= snprintf(p, len, "(%lu, %s, %u)",
+					(unsigned long)key_inum(c, key),
+					get_key_type(type), key_block(c, key));
 			break;
 		case UBIFS_TRUN_KEY:
-			sprintf(p, "(%lu, %s)",
-				(unsigned long)key_inum(c, key),
-				get_key_type(type));
+			len -= snprintf(p, len, "(%lu, %s)",
+					(unsigned long)key_inum(c, key),
+					get_key_type(type));
 			break;
 		default:
-			sprintf(p, "(bad key type: %#08x, %#08x)",
-				key->u32[0], key->u32[1]);
+			len -= snprintf(p, len, "(bad key type: %#08x, %#08x)",
+					key->u32[0], key->u32[1]);
 		}
 	} else
-		sprintf(p, "bad key format %d", c->key_fmt);
-}
-
-const char *dbg_key_str0(const struct ubifs_info *c, const union ubifs_key *key)
-{
-	/* dbg_lock must be held */
-	sprintf_key(c, key, dbg_key_buf0);
-	return dbg_key_buf0;
-}
-
-const char *dbg_key_str1(const struct ubifs_info *c, const union ubifs_key *key)
-{
-	/* dbg_lock must be held */
-	sprintf_key(c, key, dbg_key_buf1);
-	return dbg_key_buf1;
+		len -= snprintf(p, len, "bad key format %d", c->key_fmt);
+	ubifs_assert(len > 0);
+	return p;
 }
 
 const char *dbg_ntype(int type)
@@ -319,6 +305,7 @@
 	int i, n;
 	union ubifs_key key;
 	const struct ubifs_ch *ch = node;
+	char key_buf[DBG_KEY_BUF_LEN];
 
 	if (dbg_is_tst_rcvry(c))
 		return;
@@ -474,7 +461,8 @@
 		const struct ubifs_ino_node *ino = node;
 
 		key_read(c, &ino->key, &key);
-		printk(KERN_DEBUG "\tkey            %s\n", DBGKEY(&key));
+		printk(KERN_DEBUG "\tkey            %s\n",
+		       dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
 		printk(KERN_DEBUG "\tcreat_sqnum    %llu\n",
 		       (unsigned long long)le64_to_cpu(ino->creat_sqnum));
 		printk(KERN_DEBUG "\tsize           %llu\n",
@@ -517,7 +505,8 @@
 		int nlen = le16_to_cpu(dent->nlen);
 
 		key_read(c, &dent->key, &key);
-		printk(KERN_DEBUG "\tkey            %s\n", DBGKEY(&key));
+		printk(KERN_DEBUG "\tkey            %s\n",
+		       dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
 		printk(KERN_DEBUG "\tinum           %llu\n",
 		       (unsigned long long)le64_to_cpu(dent->inum));
 		printk(KERN_DEBUG "\ttype           %d\n", (int)dent->type);
@@ -541,7 +530,8 @@
 		int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ;
 
 		key_read(c, &dn->key, &key);
-		printk(KERN_DEBUG "\tkey            %s\n", DBGKEY(&key));
+		printk(KERN_DEBUG "\tkey            %s\n",
+		       dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
 		printk(KERN_DEBUG "\tsize           %u\n",
 		       le32_to_cpu(dn->size));
 		printk(KERN_DEBUG "\tcompr_typ      %d\n",
@@ -582,7 +572,9 @@
 			key_read(c, &br->key, &key);
 			printk(KERN_DEBUG "\t%d: LEB %d:%d len %d key %s\n",
 			       i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs),
-			       le32_to_cpu(br->len), DBGKEY(&key));
+			       le32_to_cpu(br->len),
+			       dbg_snprintf_key(c, &key, key_buf,
+						DBG_KEY_BUF_LEN));
 		}
 		break;
 	}
@@ -934,6 +926,7 @@
 {
 	int n;
 	const struct ubifs_zbranch *zbr;
+	char key_buf[DBG_KEY_BUF_LEN];
 
 	spin_lock(&dbg_lock);
 	if (znode->parent)
@@ -958,12 +951,16 @@
 			printk(KERN_DEBUG "\t%d: znode %p LEB %d:%d len %d key "
 					  "%s\n", n, zbr->znode, zbr->lnum,
 					  zbr->offs, zbr->len,
-					  DBGKEY(&zbr->key));
+					  dbg_snprintf_key(c, &zbr->key,
+							   key_buf,
+							   DBG_KEY_BUF_LEN));
 		else
 			printk(KERN_DEBUG "\t%d: LNC %p LEB %d:%d len %d key "
 					  "%s\n", n, zbr->znode, zbr->lnum,
 					  zbr->offs, zbr->len,
-					  DBGKEY(&zbr->key));
+					  dbg_snprintf_key(c, &zbr->key,
+							   key_buf,
+							   DBG_KEY_BUF_LEN));
 	}
 	spin_unlock(&dbg_lock);
 }
@@ -1260,6 +1257,7 @@
 	int err, nlen1, nlen2, cmp;
 	struct ubifs_dent_node *dent1, *dent2;
 	union ubifs_key key;
+	char key_buf[DBG_KEY_BUF_LEN];
 
 	ubifs_assert(!keys_cmp(c, &zbr1->key, &zbr2->key));
 	dent1 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS);
@@ -1290,9 +1288,11 @@
 	key_read(c, &dent1->key, &key);
 	if (keys_cmp(c, &zbr1->key, &key)) {
 		dbg_err("1st entry at %d:%d has key %s", zbr1->lnum,
-			zbr1->offs, DBGKEY(&key));
+			zbr1->offs, dbg_snprintf_key(c, &key, key_buf,
+						     DBG_KEY_BUF_LEN));
 		dbg_err("but it should have key %s according to tnc",
-			DBGKEY(&zbr1->key));
+			dbg_snprintf_key(c, &zbr1->key, key_buf,
+					 DBG_KEY_BUF_LEN));
 		dbg_dump_node(c, dent1);
 		goto out_free;
 	}
@@ -1300,9 +1300,11 @@
 	key_read(c, &dent2->key, &key);
 	if (keys_cmp(c, &zbr2->key, &key)) {
 		dbg_err("2nd entry at %d:%d has key %s", zbr1->lnum,
-			zbr1->offs, DBGKEY(&key));
+			zbr1->offs, dbg_snprintf_key(c, &key, key_buf,
+						     DBG_KEY_BUF_LEN));
 		dbg_err("but it should have key %s according to tnc",
-			DBGKEY(&zbr2->key));
+			dbg_snprintf_key(c, &zbr2->key, key_buf,
+					 DBG_KEY_BUF_LEN));
 		dbg_dump_node(c, dent2);
 		goto out_free;
 	}
@@ -1319,7 +1321,7 @@
 		dbg_err("2 xent/dent nodes with the same name");
 	else
 		dbg_err("bad order of colliding key %s",
-			DBGKEY(&key));
+			dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
 
 	ubifs_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs);
 	dbg_dump_node(c, dent1);

diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 8d9c468..307ab1d 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h

@@ -169,40 +169,39 @@
 	spin_unlock(&dbg_lock);                                                \
 } while (0)
 
-const char *dbg_key_str0(const struct ubifs_info *c,
-			 const union ubifs_key *key);
-const char *dbg_key_str1(const struct ubifs_info *c,
-			 const union ubifs_key *key);
+#define ubifs_dbg_msg(type, fmt, ...) \
+	pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__)
 
-/*
- * DBGKEY macros require @dbg_lock to be held, which it is in the dbg message
- * macros.
- */
-#define DBGKEY(key) dbg_key_str0(c, (key))
-#define DBGKEY1(key) dbg_key_str1(c, (key))
-
-extern spinlock_t dbg_lock;
-
-#define ubifs_dbg_msg(type, fmt, ...) do {                        \
-	spin_lock(&dbg_lock);                                     \
-	pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \
-	spin_unlock(&dbg_lock);                                   \
+#define DBG_KEY_BUF_LEN 32
+#define ubifs_dbg_msg_key(type, key, fmt, ...) do {                            \
+	char __tmp_key_buf[DBG_KEY_BUF_LEN];                                   \
+	pr_debug("UBIFS DBG " type ": " fmt "%s\n", ##__VA_ARGS__,             \
+		 dbg_snprintf_key(c, key, __tmp_key_buf, DBG_KEY_BUF_LEN));    \
 } while (0)
 
 /* Just a debugging messages not related to any specific UBIFS subsystem */
-#define dbg_msg(fmt, ...)   ubifs_dbg_msg("msg", fmt, ##__VA_ARGS__)
+#define dbg_msg(fmt, ...)                                                      \
+	printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid,   \
+	       __func__, ##__VA_ARGS__)
+
 /* General messages */
 #define dbg_gen(fmt, ...)   ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__)
 /* Additional journal messages */
 #define dbg_jnl(fmt, ...)   ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__)
+#define dbg_jnlk(key, fmt, ...) \
+	ubifs_dbg_msg_key("jnl", key, fmt, ##__VA_ARGS__)
 /* Additional TNC messages */
 #define dbg_tnc(fmt, ...)   ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__)
+#define dbg_tnck(key, fmt, ...) \
+	ubifs_dbg_msg_key("tnc", key, fmt, ##__VA_ARGS__)
 /* Additional lprops messages */
 #define dbg_lp(fmt, ...)    ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__)
 /* Additional LEB find messages */
 #define dbg_find(fmt, ...)  ubifs_dbg_msg("find", fmt, ##__VA_ARGS__)
 /* Additional mount messages */
 #define dbg_mnt(fmt, ...)   ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__)
+#define dbg_mntk(key, fmt, ...) \
+	ubifs_dbg_msg_key("mnt", key, fmt, ##__VA_ARGS__)
 /* Additional I/O messages */
 #define dbg_io(fmt, ...)    ubifs_dbg_msg("io", fmt, ##__VA_ARGS__)
 /* Additional commit messages */
@@ -218,6 +217,7 @@
 /* Additional recovery messages */
 #define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__)
 
+extern spinlock_t dbg_lock;
 extern struct ubifs_global_debug_info ubifs_dbg;
 
 static inline int dbg_is_chk_gen(const struct ubifs_info *c)
@@ -258,6 +258,8 @@
 const char *dbg_jhead(int jhead);
 const char *dbg_get_key_dump(const struct ubifs_info *c,
 			     const union ubifs_key *key);
+const char *dbg_snprintf_key(const struct ubifs_info *c,
+			     const union ubifs_key *key, char *buffer, int len);
 void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode);
 void dbg_dump_node(const struct ubifs_info *c, const void *node);
 void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum,
@@ -368,6 +370,10 @@
 static inline const char *
 dbg_get_key_dump(const struct ubifs_info *c,
 		 const union ubifs_key *key)                      { return ""; }
+static inline const char *
+dbg_snprintf_key(const struct ubifs_info *c,
+		 const union ubifs_key *key, char *buffer,
+		 int len)                                         { return ""; }
 static inline void dbg_dump_inode(struct ubifs_info *c,
 				  const struct inode *inode)      { return; }
 static inline void dbg_dump_node(const struct ubifs_info *c,

diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index cef0460..2f438ab 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c

@@ -697,9 +697,8 @@
 	int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1;
 	struct ubifs_inode *ui = ubifs_inode(inode);
 
-	dbg_jnl("ino %lu, blk %u, len %d, key %s",
-		(unsigned long)key_inum(c, key), key_block(c, key), len,
-		DBGKEY(key));
+	dbg_jnlk(key, "ino %lu, blk %u, len %d, key ",
+		(unsigned long)key_inum(c, key), key_block(c, key), len);
 	ubifs_assert(len <= UBIFS_BLOCK_SIZE);
 
 	data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN);
@@ -1177,7 +1176,7 @@
 		dn = (void *)trun + UBIFS_TRUN_NODE_SZ;
 		blk = new_size >> UBIFS_BLOCK_SHIFT;
 		data_key_init(c, &key, inum, blk);
-		dbg_jnl("last block key %s", DBGKEY(&key));
+		dbg_jnlk(&key, "last block key ");
 		err = ubifs_tnc_lookup(c, &key, dn);
 		if (err == -ENOENT)
 			dlen = 0; /* Not found (so it is a hole) */

diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index ccabaf1..b007637 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c

@@ -221,8 +221,8 @@
 {
 	int err;
 
-	dbg_mnt("LEB %d:%d len %d deletion %d sqnum %llu %s", r->lnum,
-		r->offs, r->len, r->deletion, r->sqnum, DBGKEY(&r->key));
+	dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ",
+		 r->lnum, r->offs, r->len, r->deletion, r->sqnum);
 
 	/* Set c->replay_sqnum to help deal with dangling branches. */
 	c->replay_sqnum = r->sqnum;
@@ -361,7 +361,7 @@
 {
 	struct replay_entry *r;
 
-	dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
+	dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs);
 
 	if (key_inum(c, key) >= c->highest_inum)
 		c->highest_inum = key_inum(c, key);
@@ -409,7 +409,7 @@
 	struct replay_entry *r;
 	char *nbuf;
 
-	dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
+	dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs);
 	if (key_inum(c, key) >= c->highest_inum)
 		c->highest_inum = key_inum(c, key);
 

diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index e14ee53..16ad84d 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c

@@ -505,7 +505,7 @@
 {
 	int ret;
 
-	dbg_tnc("LEB %d:%d, key %s", zbr->lnum, zbr->offs, DBGKEY(key));
+	dbg_tnck(key, "LEB %d:%d, key ", zbr->lnum, zbr->offs);
 
 	ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum,
 			    zbr->offs);
@@ -519,8 +519,8 @@
 			ret = 0;
 	}
 	if (ret == 0 && c->replaying)
-		dbg_mnt("dangling branch LEB %d:%d len %d, key %s",
-			zbr->lnum, zbr->offs, zbr->len, DBGKEY(key));
+		dbg_mntk(key, "dangling branch LEB %d:%d len %d, key ",
+			zbr->lnum, zbr->offs, zbr->len);
 	return ret;
 }
 
@@ -995,9 +995,9 @@
 	if (adding || !o_znode)
 		return 0;
 
-	dbg_mnt("dangling match LEB %d:%d len %d %s",
+	dbg_mntk(key, "dangling match LEB %d:%d len %d key ",
 		o_znode->zbranch[o_n].lnum, o_znode->zbranch[o_n].offs,
-		o_znode->zbranch[o_n].len, DBGKEY(key));
+		o_znode->zbranch[o_n].len);
 	*zn = o_znode;
 	*n = o_n;
 	return 1;
@@ -1179,7 +1179,7 @@
 	struct ubifs_znode *znode;
 	unsigned long time = get_seconds();
 
-	dbg_tnc("search key %s", DBGKEY(key));
+	dbg_tnck(key, "search key ");
 	ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY);
 
 	znode = c->zroot.znode;
@@ -1315,7 +1315,7 @@
 	struct ubifs_znode *znode;
 	unsigned long time = get_seconds();
 
-	dbg_tnc("search and dirty key %s", DBGKEY(key));
+	dbg_tnck(key, "search and dirty key ");
 
 	znode = c->zroot.znode;
 	if (unlikely(!znode)) {
@@ -1722,8 +1722,8 @@
 	if (!keys_eq(c, &zbr->key, &key1)) {
 		ubifs_err("bad key in node at LEB %d:%d",
 			  zbr->lnum, zbr->offs);
-		dbg_tnc("looked for key %s found node's key %s",
-			DBGKEY(&zbr->key), DBGKEY1(&key1));
+		dbg_tnck(&zbr->key, "looked for key ");
+		dbg_tnck(&key1, "found node's key ");
 		goto out_err;
 	}
 
@@ -1776,7 +1776,7 @@
 		ubifs_err("failed to read from LEB %d:%d, error %d",
 			  lnum, offs, err);
 		dbg_dump_stack();
-		dbg_tnc("key %s", DBGKEY(&bu->key));
+		dbg_tnck(&bu->key, "key ");
 		return err;
 	}
 
@@ -1811,7 +1811,7 @@
 	int found, n, err;
 	struct ubifs_znode *znode;
 
-	dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key));
+	dbg_tnck(key, "name '%.*s' key ", nm->len, nm->name);
 	mutex_lock(&c->tnc_mutex);
 	found = ubifs_lookup_level0(c, key, &znode, &n);
 	if (!found) {
@@ -1985,8 +1985,7 @@
 	zp = znode->parent;
 	if (znode->child_cnt < c->fanout) {
 		ubifs_assert(n != c->fanout);
-		dbg_tnc("inserted at %d level %d, key %s", n, znode->level,
-			DBGKEY(key));
+		dbg_tnck(key, "inserted at %d level %d, key ", n, znode->level);
 
 		insert_zbranch(znode, zbr, n);
 
@@ -2001,7 +2000,7 @@
 	 * Unfortunately, @znode does not have more empty slots and we have to
 	 * split it.
 	 */
-	dbg_tnc("splitting level %d, key %s", znode->level, DBGKEY(key));
+	dbg_tnck(key, "splitting level %d, key ", znode->level);
 
 	if (znode->alt)
 		/*
@@ -2095,7 +2094,7 @@
 	}
 
 	/* Insert new key and branch */
-	dbg_tnc("inserting at %d level %d, key %s", n, zn->level, DBGKEY(key));
+	dbg_tnck(key, "inserting at %d level %d, key ", n, zn->level);
 
 	insert_zbranch(zi, zbr, n);
 
@@ -2171,7 +2170,7 @@
 	struct ubifs_znode *znode;
 
 	mutex_lock(&c->tnc_mutex);
-	dbg_tnc("%d:%d, len %d, key %s", lnum, offs, len, DBGKEY(key));
+	dbg_tnck(key, "%d:%d, len %d, key ", lnum, offs, len);
 	found = lookup_level0_dirty(c, key, &znode, &n);
 	if (!found) {
 		struct ubifs_zbranch zbr;
@@ -2220,8 +2219,8 @@
 	struct ubifs_znode *znode;
 
 	mutex_lock(&c->tnc_mutex);
-	dbg_tnc("old LEB %d:%d, new LEB %d:%d, len %d, key %s", old_lnum,
-		old_offs, lnum, offs, len, DBGKEY(key));
+	dbg_tnck(key, "old LEB %d:%d, new LEB %d:%d, len %d, key ", old_lnum,
+		 old_offs, lnum, offs, len);
 	found = lookup_level0_dirty(c, key, &znode, &n);
 	if (found < 0) {
 		err = found;
@@ -2303,8 +2302,8 @@
 	struct ubifs_znode *znode;
 
 	mutex_lock(&c->tnc_mutex);
-	dbg_tnc("LEB %d:%d, name '%.*s', key %s", lnum, offs, nm->len, nm->name,
-		DBGKEY(key));
+	dbg_tnck(key, "LEB %d:%d, name '%.*s', key ",
+		 lnum, offs, nm->len, nm->name);
 	found = lookup_level0_dirty(c, key, &znode, &n);
 	if (found < 0) {
 		err = found;
@@ -2397,7 +2396,7 @@
 	/* Delete without merge for now */
 	ubifs_assert(znode->level == 0);
 	ubifs_assert(n >= 0 && n < c->fanout);
-	dbg_tnc("deleting %s", DBGKEY(&znode->zbranch[n].key));
+	dbg_tnck(&znode->zbranch[n].key, "deleting key ");
 
 	zbr = &znode->zbranch[n];
 	lnc_free(zbr);
@@ -2507,7 +2506,7 @@
 	struct ubifs_znode *znode;
 
 	mutex_lock(&c->tnc_mutex);
-	dbg_tnc("key %s", DBGKEY(key));
+	dbg_tnck(key, "key ");
 	found = lookup_level0_dirty(c, key, &znode, &n);
 	if (found < 0) {
 		err = found;
@@ -2538,7 +2537,7 @@
 	struct ubifs_znode *znode;
 
 	mutex_lock(&c->tnc_mutex);
-	dbg_tnc("%.*s, key %s", nm->len, nm->name, DBGKEY(key));
+	dbg_tnck(key, "%.*s, key ", nm->len, nm->name);
 	err = lookup_level0_dirty(c, key, &znode, &n);
 	if (err < 0)
 		goto out_unlock;
@@ -2653,7 +2652,7 @@
 				dbg_dump_znode(c, znode);
 				goto out_unlock;
 			}
-			dbg_tnc("removing %s", DBGKEY(key));
+			dbg_tnck(key, "removing key ");
 		}
 		if (k) {
 			for (i = n + 1 + k; i < znode->child_cnt; i++)
@@ -2773,7 +2772,7 @@
 	struct ubifs_zbranch *zbr;
 	union ubifs_key *dkey;
 
-	dbg_tnc("%s %s", nm->name ? (char *)nm->name : "(lowest)", DBGKEY(key));
+	dbg_tnck(key, "%s ", nm->name ? (char *)nm->name : "(lowest)");
 	ubifs_assert(is_hash_key(c, key));
 
 	mutex_lock(&c->tnc_mutex);
@@ -3332,9 +3331,9 @@
 
 out_dump:
 	block = key_block(c, key);
-	ubifs_err("inode %lu has size %lld, but there are data at offset %lld "
-		  "(data key %s)", (unsigned long)inode->i_ino, size,
-		  ((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key));
+	ubifs_err("inode %lu has size %lld, but there are data at offset %lld",
+		  (unsigned long)inode->i_ino, size,
+		  ((loff_t)block) << UBIFS_BLOCK_SHIFT);
 	mutex_unlock(&c->tnc_mutex);
 	dbg_dump_inode(c, inode);
 	dbg_dump_stack();

diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c
index b48db99..dc28fe6 100644
--- a/fs/ubifs/tnc_misc.c
+++ b/fs/ubifs/tnc_misc.c

@@ -328,8 +328,8 @@
 		case UBIFS_XENT_KEY:
 			break;
 		default:
-			dbg_msg("bad key type at slot %d: %s", i,
-				DBGKEY(&zbr->key));
+			dbg_msg("bad key type at slot %d: %d",
+				i, key_type(c, &zbr->key));
 			err = 3;
 			goto out_dump;
 		}
@@ -475,7 +475,7 @@
 				      zbr->offs);
 
 	if (err) {
-		dbg_tnc("key %s", DBGKEY(key));
+		dbg_tnck(key, "key ");
 		return err;
 	}
 
@@ -484,8 +484,8 @@
 	if (!keys_eq(c, key, &key1)) {
 		ubifs_err("bad key in node at LEB %d:%d",
 			  zbr->lnum, zbr->offs);
-		dbg_tnc("looked for key %s found node's key %s",
-			DBGKEY(key), DBGKEY1(&key1));
+		dbg_tnck(key, "looked for key ");
+		dbg_tnck(&key1, "but found node's key ");
 		dbg_dump_node(c, node);
 		return -EINVAL;
 	}

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index e58fa77..f96a5b5 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h

@@ -139,6 +139,20 @@
 		__tlb_remove_tlb_entry(tlb, ptep, address);	\
 	} while (0)
 
+/**
+ * tlb_remove_pmd_tlb_entry - remember a pmd mapping for later tlb invalidation
+ * This is a nop so far, because only x86 needs it.
+ */
+#ifndef __tlb_remove_pmd_tlb_entry
+#define __tlb_remove_pmd_tlb_entry(tlb, pmdp, address) do {} while (0)
+#endif
+
+#define tlb_remove_pmd_tlb_entry(tlb, pmdp, address)		\
+	do {							\
+		tlb->need_flush = 1;				\
+		__tlb_remove_pmd_tlb_entry(tlb, pmdp, address);	\
+	} while (0)
+
 #define pte_free_tlb(tlb, ptep, address)			\
 	do {							\
 		tlb->need_flush = 1;				\

diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 63e4fce..4cd4be2 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h

@@ -453,7 +453,7 @@
 #define DRM_CONNECTOR_MAX_UMODES 16
 #define DRM_CONNECTOR_MAX_PROPERTY 16
 #define DRM_CONNECTOR_LEN 32
-#define DRM_CONNECTOR_MAX_ENCODER 2
+#define DRM_CONNECTOR_MAX_ENCODER 3
 
 /**
  * drm_encoder - central DRM encoder structure

diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h
index 2111481..0101e9c 100644
--- a/include/linux/amba/mmci.h
+++ b/include/linux/amba/mmci.h

@@ -30,6 +30,7 @@
  * @cd_invert: true if the gpio_cd pin value is active low
  * @capabilities: the capabilities of the block as implemented in
  * this platform, signify anything MMC_CAP_* from mmc/host.h
+ * @capabilities2: more capabilities, MMC_CAP2_* from mmc/host.h
  * @dma_filter: function used to select an appropriate RX and TX
  * DMA channel to be used for DMA, if and only if you're deploying the
  * generic DMA engine
@@ -52,6 +53,7 @@
 	int	gpio_cd;
 	bool	cd_invert;
 	unsigned long capabilities;
+	unsigned long capabilities2;
 	bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
 	void *dma_rx_param;
 	void *dma_tx_param;

diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 5c4abce..b936763 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h

@@ -5,6 +5,7 @@
 #include <linux/kexec.h>
 #include <linux/device.h>
 #include <linux/proc_fs.h>
+#include <linux/elf.h>
 
 #define ELFCORE_ADDR_MAX	(-1ULL)
 #define ELFCORE_ADDR_ERR	(-2ULL)

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 31f7322..d64a55b 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h

@@ -242,6 +242,7 @@
 extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
 extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
 extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
+extern struct dentry *d_find_any_alias(struct inode *inode);
 extern struct dentry * d_obtain_alias(struct inode *);
 extern void shrink_dcache_sb(struct super_block *);
 extern void shrink_dcache_parent(struct dentry *);

diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index f362733..657ab55 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h

@@ -61,6 +61,7 @@
 static inline void eventpoll_init_file(struct file *file)
 {
 	INIT_LIST_HEAD(&file->f_ep_links);
+	INIT_LIST_HEAD(&file->f_tfile_llink);
 }
 
 

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7aacf31..4bc8169 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h

@@ -525,6 +525,7 @@
 struct page;
 struct address_space;
 struct writeback_control;
+enum migrate_mode;
 
 struct iov_iter {
 	const struct iovec *iov;
@@ -609,9 +610,12 @@
 			loff_t offset, unsigned long nr_segs);
 	int (*get_xip_mem)(struct address_space *, pgoff_t, int,
 						void **, unsigned long *);
-	/* migrate the contents of a page to the specified target */
+	/*
+	 * migrate the contents of a page to the specified target. If sync
+	 * is false, it must not block.
+	 */
 	int (*migratepage) (struct address_space *,
-			struct page *, struct page *);
+			struct page *, struct page *, enum migrate_mode);
 	int (*launder_page) (struct page *);
 	int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
 					unsigned long);
@@ -656,6 +660,7 @@
 	 * must be enforced here for CRIS, to let the least significant bit
 	 * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
 	 */
+struct request_queue;
 
 struct block_device {
 	dev_t			bd_dev;  /* not a kdev_t - it's a search key */
@@ -678,6 +683,7 @@
 	unsigned		bd_part_count;
 	int			bd_invalidated;
 	struct gendisk *	bd_disk;
+	struct request_queue *  bd_queue;
 	struct list_head	bd_list;
 	/*
 	 * Private data.  You must have bd_claim'ed the block_device
@@ -1001,6 +1007,7 @@
 #ifdef CONFIG_EPOLL
 	/* Used by fs/eventpoll.c to link all the hooks to this file */
 	struct list_head	f_ep_links;
+	struct list_head	f_tfile_llink;
 #endif /* #ifdef CONFIG_EPOLL */
 	struct address_space	*f_mapping;
 #ifdef CONFIG_DEBUG_WRITECOUNT
@@ -2536,7 +2543,8 @@
 
 #ifdef CONFIG_MIGRATION
 extern int buffer_migrate_page(struct address_space *,
-				struct page *, struct page *);
+				struct page *, struct page *,
+				enum migrate_mode);
 #else
 #define buffer_migrate_page NULL
 #endif

diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 464cff5..8ba2c94 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h

@@ -50,6 +50,10 @@
  *
  * 7.17
  *  - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK
+ *
+ * 7.18
+ *  - add FUSE_IOCTL_DIR flag
+ *  - add FUSE_NOTIFY_DELETE
  */
 
 #ifndef _LINUX_FUSE_H
@@ -81,7 +85,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 17
+#define FUSE_KERNEL_MINOR_VERSION 18
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -214,6 +218,7 @@
  * FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed
  * FUSE_IOCTL_RETRY: retry with new iovecs
  * FUSE_IOCTL_32BIT: 32bit ioctl
+ * FUSE_IOCTL_DIR: is a directory
  *
  * FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
  */
@@ -221,6 +226,7 @@
 #define FUSE_IOCTL_UNRESTRICTED	(1 << 1)
 #define FUSE_IOCTL_RETRY	(1 << 2)
 #define FUSE_IOCTL_32BIT	(1 << 3)
+#define FUSE_IOCTL_DIR		(1 << 4)
 
 #define FUSE_IOCTL_MAX_IOV	256
 
@@ -283,6 +289,7 @@
 	FUSE_NOTIFY_INVAL_ENTRY = 3,
 	FUSE_NOTIFY_STORE = 4,
 	FUSE_NOTIFY_RETRIEVE = 5,
+	FUSE_NOTIFY_DELETE = 6,
 	FUSE_NOTIFY_CODE_MAX,
 };
 
@@ -606,6 +613,13 @@
 	__u32	padding;
 };
 
+struct fuse_notify_delete_out {
+	__u64	parent;
+	__u64	child;
+	__u32	namelen;
+	__u32	padding;
+};
+
 struct fuse_notify_store_out {
 	__u64	nodeid;
 	__u64	offset;

diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h
index 4f44629..b148087 100644
--- a/include/linux/gfs2_ondisk.h
+++ b/include/linux/gfs2_ondisk.h

@@ -22,6 +22,8 @@
 #define GFS2_LIVE_LOCK		1
 #define GFS2_TRANS_LOCK		2
 #define GFS2_RENAME_LOCK	3
+#define GFS2_CONTROL_LOCK	4
+#define GFS2_MOUNTED_LOCK	5
 
 /* Format numbers for various metadata types */
 

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index a9ace9c..1b92129 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h

@@ -18,7 +18,7 @@
 					  unsigned int flags);
 extern int zap_huge_pmd(struct mmu_gather *tlb,
 			struct vm_area_struct *vma,
-			pmd_t *pmd);
+			pmd_t *pmd, unsigned long addr);
 extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			unsigned long addr, unsigned long end,
 			unsigned char *vec);

diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 114c0f6..78d3465 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h

@@ -652,10 +652,12 @@
 	unsigned num;
 	struct twl4030_resconfig *resource_config;
 #define TWL4030_RESCONFIG_UNDEF	((u8)-1)
+	bool use_poweroff;	/* Board is wired for TWL poweroff */
 };
 
 extern void twl4030_power_init(struct twl4030_power_data *triton2_scripts);
 extern int twl4030_remove_script(u8 flags);
+extern void twl4030_power_off(void);
 
 struct twl4030_codec_data {
 	unsigned int digimic_delay; /* in ms */

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 34e8d52..f1362b5 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h

@@ -22,7 +22,7 @@
 
 /* Request structure */
 
-struct inet_diag_req_compat {
+struct inet_diag_req {
 	__u8	idiag_family;		/* Family of addresses. */
 	__u8	idiag_src_len;
 	__u8	idiag_dst_len;
@@ -34,7 +34,7 @@
 	__u32	idiag_dbs;		/* Tables to dump (NI) */
 };
 
-struct inet_diag_req {
+struct inet_diag_req_v2 {
 	__u8	sdiag_family;
 	__u8	sdiag_protocol;
 	__u8	idiag_ext;
@@ -143,12 +143,12 @@
 struct inet_diag_handler {
 	void			(*dump)(struct sk_buff *skb,
 					struct netlink_callback *cb,
-					struct inet_diag_req *r,
+					struct inet_diag_req_v2 *r,
 					struct nlattr *bc);
 
 	int			(*dump_one)(struct sk_buff *in_skb,
 					const struct nlmsghdr *nlh,
-					struct inet_diag_req *req);
+					struct inet_diag_req_v2 *req);
 
 	void			(*idiag_get_info)(struct sock *sk,
 						  struct inet_diag_msg *r,
@@ -158,15 +158,15 @@
 
 struct inet_connection_sock;
 int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
-			      struct sk_buff *skb, struct inet_diag_req *req,
+			      struct sk_buff *skb, struct inet_diag_req_v2 *req,
 			      u32 pid, u32 seq, u16 nlmsg_flags,
 			      const struct nlmsghdr *unlh);
 void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb,
-		struct netlink_callback *cb, struct inet_diag_req *r,
+		struct netlink_callback *cb, struct inet_diag_req_v2 *r,
 		struct nlattr *bc);
 int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 		struct sk_buff *in_skb, const struct nlmsghdr *nlh,
-		struct inet_diag_req *req);
+		struct inet_diag_req_v2 *req);
 
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
 

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d0a7a0c..e834342 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h

@@ -185,16 +185,17 @@
 
 extern struct atomic_notifier_head panic_notifier_list;
 extern long (*panic_blink)(int state);
-NORET_TYPE void panic(const char * fmt, ...)
-	__attribute__ ((NORET_AND format (printf, 1, 2))) __cold;
+__printf(1, 2)
+void panic(const char *fmt, ...)
+	__noreturn __cold;
 extern void oops_enter(void);
 extern void oops_exit(void);
 void print_oops_end_marker(void);
 extern int oops_may_print(void);
-NORET_TYPE void do_exit(long error_code)
-	ATTRIB_NORET;
-NORET_TYPE void complete_and_exit(struct completion *, long)
-	ATTRIB_NORET;
+void do_exit(long error_code)
+	__noreturn;
+void complete_and_exit(struct completion *, long)
+	__noreturn;
 
 /* Internal, do not use. */
 int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res);

diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index ee0c952..fee6631 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h

@@ -18,7 +18,6 @@
 enum kmsg_dump_reason {
 	KMSG_DUMP_OOPS,
 	KMSG_DUMP_PANIC,
-	KMSG_DUMP_KEXEC,
 	KMSG_DUMP_RESTART,
 	KMSG_DUMP_HALT,
 	KMSG_DUMP_POWEROFF,

diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 3f46aed..807f1e5 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h

@@ -88,8 +88,4 @@
 
 #endif
 
-#define NORET_TYPE    /**/
-#define ATTRIB_NORET  __attribute__((noreturn))
-#define NORET_AND     noreturn,
-
 #endif

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f944591..4d34356 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h

@@ -32,13 +32,11 @@
 	MEMCG_NR_FILE_MAPPED, /* # of pages charged as file rss */
 };
 
-extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
-					struct list_head *dst,
-					unsigned long *scanned, int order,
-					isolate_mode_t mode,
-					struct zone *z,
-					struct mem_cgroup *mem_cont,
-					int active, int file);
+struct mem_cgroup_reclaim_cookie {
+	struct zone *zone;
+	int priority;
+	unsigned int generation;
+};
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 /*
@@ -56,20 +54,21 @@
 				gfp_t gfp_mask);
 /* for swap handling */
 extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
-		struct page *page, gfp_t mask, struct mem_cgroup **ptr);
+		struct page *page, gfp_t mask, struct mem_cgroup **memcgp);
 extern void mem_cgroup_commit_charge_swapin(struct page *page,
-					struct mem_cgroup *ptr);
-extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr);
+					struct mem_cgroup *memcg);
+extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg);
 
 extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 					gfp_t gfp_mask);
-extern void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru);
-extern void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru);
-extern void mem_cgroup_rotate_reclaimable_page(struct page *page);
-extern void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru);
-extern void mem_cgroup_del_lru(struct page *page);
-extern void mem_cgroup_move_lists(struct page *page,
-				  enum lru_list from, enum lru_list to);
+
+struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
+struct lruvec *mem_cgroup_lru_add_list(struct zone *, struct page *,
+				       enum lru_list);
+void mem_cgroup_lru_del_list(struct page *, enum lru_list);
+void mem_cgroup_lru_del(struct page *);
+struct lruvec *mem_cgroup_lru_move_lists(struct zone *, struct page *,
+					 enum lru_list, enum lru_list);
 
 /* For coalescing uncharge for reducing memcg' overhead*/
 extern void mem_cgroup_uncharge_start(void);
@@ -102,10 +101,15 @@
 
 extern int
 mem_cgroup_prepare_migration(struct page *page,
-	struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask);
+	struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask);
 extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
 	struct page *oldpage, struct page *newpage, bool migration_ok);
 
+struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
+				   struct mem_cgroup *,
+				   struct mem_cgroup_reclaim_cookie *);
+void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
+
 /*
  * For memory reclaim.
  */
@@ -122,7 +126,10 @@
 mem_cgroup_get_reclaim_stat_from_page(struct page *page);
 extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 					struct task_struct *p);
+extern void mem_cgroup_replace_page_cache(struct page *oldpage,
+					struct page *newpage);
 
+extern void mem_cgroup_reset_owner(struct page *page);
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 extern int do_swap_account;
 #endif
@@ -157,7 +164,7 @@
 
 void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail);
+void mem_cgroup_split_huge_fixup(struct page *head);
 #endif
 
 #ifdef CONFIG_DEBUG_VM
@@ -180,17 +187,17 @@
 }
 
 static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
-		struct page *page, gfp_t gfp_mask, struct mem_cgroup **ptr)
+		struct page *page, gfp_t gfp_mask, struct mem_cgroup **memcgp)
 {
 	return 0;
 }
 
 static inline void mem_cgroup_commit_charge_swapin(struct page *page,
-					  struct mem_cgroup *ptr)
+					  struct mem_cgroup *memcg)
 {
 }
 
-static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *ptr)
+static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
 {
 }
 
@@ -210,33 +217,33 @@
 {
 }
 
-static inline void mem_cgroup_add_lru_list(struct page *page, int lru)
+static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
+						    struct mem_cgroup *memcg)
+{
+	return &zone->lruvec;
+}
+
+static inline struct lruvec *mem_cgroup_lru_add_list(struct zone *zone,
+						     struct page *page,
+						     enum lru_list lru)
+{
+	return &zone->lruvec;
+}
+
+static inline void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
 {
 }
 
-static inline void mem_cgroup_del_lru_list(struct page *page, int lru)
+static inline void mem_cgroup_lru_del(struct page *page)
 {
-	return ;
 }
 
-static inline void mem_cgroup_rotate_reclaimable_page(struct page *page)
+static inline struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone,
+						       struct page *page,
+						       enum lru_list from,
+						       enum lru_list to)
 {
-	return ;
-}
-
-static inline void mem_cgroup_rotate_lru_list(struct page *page, int lru)
-{
-	return ;
-}
-
-static inline void mem_cgroup_del_lru(struct page *page)
-{
-	return ;
-}
-
-static inline void
-mem_cgroup_move_lists(struct page *page, enum lru_list from, enum lru_list to)
-{
+	return &zone->lruvec;
 }
 
 static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
@@ -269,7 +276,7 @@
 
 static inline int
 mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
-	struct mem_cgroup **ptr, gfp_t gfp_mask)
+	struct mem_cgroup **memcgp, gfp_t gfp_mask)
 {
 	return 0;
 }
@@ -279,6 +286,19 @@
 {
 }
 
+static inline struct mem_cgroup *
+mem_cgroup_iter(struct mem_cgroup *root,
+		struct mem_cgroup *prev,
+		struct mem_cgroup_reclaim_cookie *reclaim)
+{
+	return NULL;
+}
+
+static inline void mem_cgroup_iter_break(struct mem_cgroup *root,
+					 struct mem_cgroup *prev)
+{
+}
+
 static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *memcg)
 {
 	return 0;
@@ -360,8 +380,7 @@
 	return 0;
 }
 
-static inline void mem_cgroup_split_huge_fixup(struct page *head,
-						struct page *tail)
+static inline void mem_cgroup_split_huge_fixup(struct page *head)
 {
 }
 
@@ -369,6 +388,14 @@
 void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
 {
 }
+static inline void mem_cgroup_replace_page_cache(struct page *oldpage,
+				struct page *newpage)
+{
+}
+
+static inline void mem_cgroup_reset_owner(struct page *page)
+{
+}
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #if !defined(CONFIG_CGROUP_MEM_RES_CTLR) || !defined(CONFIG_DEBUG_VM)

diff --git a/include/linux/mfd/88pm860x.h b/include/linux/mfd/88pm860x.h
index 63b4fb8..92be347 100644
--- a/include/linux/mfd/88pm860x.h
+++ b/include/linux/mfd/88pm860x.h

@@ -297,10 +297,11 @@
 
 struct pm860x_chip {
 	struct device		*dev;
-	struct mutex		io_lock;
 	struct mutex		irq_lock;
 	struct i2c_client	*client;
 	struct i2c_client	*companion;	/* companion chip client */
+	struct regmap           *regmap;
+	struct regmap           *regmap_companion;
 
 	int			buck3_double;	/* DVC ramp slope double */
 	unsigned short		companion_addr;

diff --git a/include/linux/mfd/ab5500/ab5500.h b/include/linux/mfd/abx500/ab5500.h
similarity index 100%
rename from include/linux/mfd/ab5500/ab5500.h
rename to include/linux/mfd/abx500/ab5500.h


diff --git a/include/linux/mfd/ab8500/gpadc.h b/include/linux/mfd/abx500/ab8500-gpadc.h
similarity index 100%
rename from include/linux/mfd/ab8500/gpadc.h
rename to include/linux/mfd/abx500/ab8500-gpadc.h


diff --git a/include/linux/mfd/ab8500/gpio.h b/include/linux/mfd/abx500/ab8500-gpio.h
similarity index 100%
rename from include/linux/mfd/ab8500/gpio.h
rename to include/linux/mfd/abx500/ab8500-gpio.h


diff --git a/include/linux/mfd/ab8500/sysctrl.h b/include/linux/mfd/abx500/ab8500-sysctrl.h
similarity index 100%
rename from include/linux/mfd/ab8500/sysctrl.h
rename to include/linux/mfd/abx500/ab8500-sysctrl.h


diff --git a/include/linux/mfd/ab8500.h b/include/linux/mfd/abx500/ab8500.h
similarity index 100%
rename from include/linux/mfd/ab8500.h
rename to include/linux/mfd/abx500/ab8500.h


diff --git a/include/linux/mfd/max8925.h b/include/linux/mfd/max8925.h
index b8e6d94..15b2392 100644
--- a/include/linux/mfd/max8925.h
+++ b/include/linux/mfd/max8925.h

@@ -203,6 +203,8 @@
 	int			irq_base;
 	int			core_irq;
 	int			tsc_irq;
+
+	unsigned int            wakeup_flag;
 };
 
 struct max8925_backlight_pdata {

diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h
index 0bbd13d..fff5905 100644
--- a/include/linux/mfd/max8997.h
+++ b/include/linux/mfd/max8997.h

@@ -77,6 +77,82 @@
 	struct regulator_init_data *initdata;
 };
 
+enum max8997_muic_usb_type {
+	MAX8997_USB_HOST,
+	MAX8997_USB_DEVICE,
+};
+
+enum max8997_muic_charger_type {
+	MAX8997_CHARGER_TYPE_NONE = 0,
+	MAX8997_CHARGER_TYPE_USB,
+	MAX8997_CHARGER_TYPE_DOWNSTREAM_PORT,
+	MAX8997_CHARGER_TYPE_DEDICATED_CHG,
+	MAX8997_CHARGER_TYPE_500MA,
+	MAX8997_CHARGER_TYPE_1A,
+	MAX8997_CHARGER_TYPE_DEAD_BATTERY = 7,
+};
+
+struct max8997_muic_reg_data {
+	u8 addr;
+	u8 data;
+};
+
+/**
+ * struct max8997_muic_platform_data
+ * @usb_callback: callback function for USB
+ *		  inform callee of USB type (HOST or DEVICE)
+ *		  and attached state(true or false)
+ * @charger_callback: callback function for charger
+ *		  inform callee of charger_type
+ *		  and attached state(true or false)
+ * @deskdock_callback: callback function for desk dock
+ *		  inform callee of attached state(true or false)
+ * @cardock_callback: callback function for car dock
+ *		  inform callee of attached state(true or false)
+ * @mhl_callback: callback function for MHL (Mobile High-definition Link)
+ *		  inform callee of attached state(true or false)
+ * @uart_callback: callback function for JIG UART
+ *		   inform callee of attached state(true or false)
+ * @init_data: array of max8997_muic_reg_data
+ *	       used for initializing registers of MAX8997 MUIC device
+ * @num_init_data: array size of init_data
+ */
+struct max8997_muic_platform_data {
+	void (*usb_callback)(enum max8997_muic_usb_type usb_type,
+		bool attached);
+	void (*charger_callback)(bool attached,
+		enum max8997_muic_charger_type charger_type);
+	void (*deskdock_callback) (bool attached);
+	void (*cardock_callback) (bool attached);
+	void (*mhl_callback) (bool attached);
+	void (*uart_callback) (bool attached);
+
+	struct max8997_muic_reg_data *init_data;
+	int num_init_data;
+};
+
+enum max8997_led_mode {
+	MAX8997_NONE,
+	MAX8997_FLASH_MODE,
+	MAX8997_MOVIE_MODE,
+	MAX8997_FLASH_PIN_CONTROL_MODE,
+	MAX8997_MOVIE_PIN_CONTROL_MODE,
+};
+
+/**
+ *  struct max8997_led_platform_data
+ *  The number of LED devices for MAX8997 is two
+ *  @mode: LED mode for each LED device
+ *  @brightness: initial brightness for each LED device
+ *	range:
+ *	[0 - 31]: MAX8997_FLASH_MODE and MAX8997_FLASH_PIN_CONTROL_MODE
+ *	[0 - 15]: MAX8997_MOVIE_MODE and MAX8997_MOVIE_PIN_CONTROL_MODE
+ */
+struct max8997_led_platform_data {
+	enum max8997_led_mode mode[2];
+	u8 brightness[2];
+};
+
 struct max8997_platform_data {
 	/* IRQ */
 	int irq_base;
@@ -113,10 +189,13 @@
 	/* charge Full Timeout */
 	int timeout; /* 0 (no timeout), 5, 6, 7 hours */
 
-	/* MUIC: Not implemented */
+	/* ---- MUIC ---- */
+	struct max8997_muic_platform_data *muic_pdata;
+
 	/* HAPTIC: Not implemented */
 	/* RTC: Not implemented */
-	/* Flash: Not implemented */
+	/* ---- LED ---- */
+	struct max8997_led_platform_data *led_pdata;
 };
 
 #endif /* __LINUX_MFD_MAX8998_H */

diff --git a/include/linux/mfd/mc13xxx.h b/include/linux/mfd/mc13xxx.h
index a98e2a3..b86ee45 100644
--- a/include/linux/mfd/mc13xxx.h
+++ b/include/linux/mfd/mc13xxx.h

@@ -174,6 +174,9 @@
 #define MC13XXX_ADC_MODE_MULT_CHAN	3
 
 #define MC13XXX_ADC0		43
+#define MC13XXX_ADC0_LICELLCON		(1 << 0)
+#define MC13XXX_ADC0_CHRGICON		(1 << 1)
+#define MC13XXX_ADC0_BATICON		(1 << 2)
 #define MC13XXX_ADC0_ADREFEN		(1 << 10)
 #define MC13XXX_ADC0_TSMOD0		(1 << 12)
 #define MC13XXX_ADC0_TSMOD1		(1 << 13)
@@ -185,4 +188,9 @@
 					MC13XXX_ADC0_TSMOD1 | \
 					MC13XXX_ADC0_TSMOD2)
 
+#define MC13XXX_ADC0_CONFIG_MASK	(MC13XXX_ADC0_TSMOD_MASK | \
+					MC13XXX_ADC0_LICELLCON | \
+					MC13XXX_ADC0_CHRGICON | \
+					MC13XXX_ADC0_BATICON)
+
 #endif /* ifndef __LINUX_MFD_MC13XXX_H */

diff --git a/include/linux/mfd/mcp.h b/include/linux/mfd/mcp.h
index ee496708..1515e64 100644
--- a/include/linux/mfd/mcp.h
+++ b/include/linux/mfd/mcp.h

@@ -10,6 +10,7 @@
 #ifndef MCP_H
 #define MCP_H
 
+#include <linux/mod_devicetable.h>
 #include <mach/dma.h>
 
 struct mcp_ops;
@@ -26,7 +27,7 @@
 	dma_device_t	dma_telco_rd;
 	dma_device_t	dma_telco_wr;
 	struct device	attached_device;
-	int		gpio_base;
+	const char	*codec;
 };
 
 struct mcp_ops {
@@ -44,10 +45,11 @@
 unsigned int mcp_reg_read(struct mcp *, unsigned int);
 void mcp_enable(struct mcp *);
 void mcp_disable(struct mcp *);
+const struct mcp_device_id *mcp_get_device_id(const struct mcp *mcp);
 #define mcp_get_sclk_rate(mcp)	((mcp)->sclk_rate)
 
 struct mcp *mcp_host_alloc(struct device *, size_t);
-int mcp_host_register(struct mcp *);
+int mcp_host_register(struct mcp *, void *);
 void mcp_host_unregister(struct mcp *);
 
 struct mcp_driver {
@@ -56,6 +58,7 @@
 	void (*remove)(struct mcp *);
 	int (*suspend)(struct mcp *, pm_message_t);
 	int (*resume)(struct mcp *);
+	const struct mcp_device_id *id_table;
 };
 
 int mcp_driver_register(struct mcp_driver *);

diff --git a/include/linux/mfd/s5m87xx/s5m-core.h b/include/linux/mfd/s5m87xx/s5m-core.h
new file mode 100644
index 0000000..a7480b5
--- /dev/null
+++ b/include/linux/mfd/s5m87xx/s5m-core.h

@@ -0,0 +1,373 @@
+/*
+ * s5m-core.h
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd
+ *              http://www.samsung.com
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __LINUX_MFD_S5M_CORE_H
+#define __LINUX_MFD_S5M_CORE_H
+
+#define NUM_IRQ_REGS	4
+
+enum s5m_device_type {
+	S5M8751X,
+	S5M8763X,
+	S5M8767X,
+};
+
+/* S5M8767 registers */
+enum s5m8767_reg {
+	S5M8767_REG_ID,
+	S5M8767_REG_INT1,
+	S5M8767_REG_INT2,
+	S5M8767_REG_INT3,
+	S5M8767_REG_INT1M,
+	S5M8767_REG_INT2M,
+	S5M8767_REG_INT3M,
+	S5M8767_REG_STATUS1,
+	S5M8767_REG_STATUS2,
+	S5M8767_REG_STATUS3,
+	S5M8767_REG_CTRL1,
+	S5M8767_REG_CTRL2,
+	S5M8767_REG_LOWBAT1,
+	S5M8767_REG_LOWBAT2,
+	S5M8767_REG_BUCHG,
+	S5M8767_REG_DVSRAMP,
+	S5M8767_REG_DVSTIMER2 = 0x10,
+	S5M8767_REG_DVSTIMER3,
+	S5M8767_REG_DVSTIMER4,
+	S5M8767_REG_LDO1,
+	S5M8767_REG_LDO2,
+	S5M8767_REG_LDO3,
+	S5M8767_REG_LDO4,
+	S5M8767_REG_LDO5,
+	S5M8767_REG_LDO6,
+	S5M8767_REG_LDO7,
+	S5M8767_REG_LDO8,
+	S5M8767_REG_LDO9,
+	S5M8767_REG_LDO10,
+	S5M8767_REG_LDO11,
+	S5M8767_REG_LDO12,
+	S5M8767_REG_LDO13,
+	S5M8767_REG_LDO14 = 0x20,
+	S5M8767_REG_LDO15,
+	S5M8767_REG_LDO16,
+	S5M8767_REG_LDO17,
+	S5M8767_REG_LDO18,
+	S5M8767_REG_LDO19,
+	S5M8767_REG_LDO20,
+	S5M8767_REG_LDO21,
+	S5M8767_REG_LDO22,
+	S5M8767_REG_LDO23,
+	S5M8767_REG_LDO24,
+	S5M8767_REG_LDO25,
+	S5M8767_REG_LDO26,
+	S5M8767_REG_LDO27,
+	S5M8767_REG_LDO28,
+	S5M8767_REG_UVLO = 0x31,
+	S5M8767_REG_BUCK1CTRL1,
+	S5M8767_REG_BUCK1CTRL2,
+	S5M8767_REG_BUCK2CTRL,
+	S5M8767_REG_BUCK2DVS1,
+	S5M8767_REG_BUCK2DVS2,
+	S5M8767_REG_BUCK2DVS3,
+	S5M8767_REG_BUCK2DVS4,
+	S5M8767_REG_BUCK2DVS5,
+	S5M8767_REG_BUCK2DVS6,
+	S5M8767_REG_BUCK2DVS7,
+	S5M8767_REG_BUCK2DVS8,
+	S5M8767_REG_BUCK3CTRL,
+	S5M8767_REG_BUCK3DVS1,
+	S5M8767_REG_BUCK3DVS2,
+	S5M8767_REG_BUCK3DVS3,
+	S5M8767_REG_BUCK3DVS4,
+	S5M8767_REG_BUCK3DVS5,
+	S5M8767_REG_BUCK3DVS6,
+	S5M8767_REG_BUCK3DVS7,
+	S5M8767_REG_BUCK3DVS8,
+	S5M8767_REG_BUCK4CTRL,
+	S5M8767_REG_BUCK4DVS1,
+	S5M8767_REG_BUCK4DVS2,
+	S5M8767_REG_BUCK4DVS3,
+	S5M8767_REG_BUCK4DVS4,
+	S5M8767_REG_BUCK4DVS5,
+	S5M8767_REG_BUCK4DVS6,
+	S5M8767_REG_BUCK4DVS7,
+	S5M8767_REG_BUCK4DVS8,
+	S5M8767_REG_BUCK5CTRL1,
+	S5M8767_REG_BUCK5CTRL2,
+	S5M8767_REG_BUCK5CTRL3,
+	S5M8767_REG_BUCK5CTRL4,
+	S5M8767_REG_BUCK5CTRL5,
+	S5M8767_REG_BUCK6CTRL1,
+	S5M8767_REG_BUCK6CTRL2,
+	S5M8767_REG_BUCK7CTRL1,
+	S5M8767_REG_BUCK7CTRL2,
+	S5M8767_REG_BUCK8CTRL1,
+	S5M8767_REG_BUCK8CTRL2,
+	S5M8767_REG_BUCK9CTRL1,
+	S5M8767_REG_BUCK9CTRL2,
+	S5M8767_REG_LDO1CTRL,
+	S5M8767_REG_LDO2_1CTRL,
+	S5M8767_REG_LDO2_2CTRL,
+	S5M8767_REG_LDO2_3CTRL,
+	S5M8767_REG_LDO2_4CTRL,
+	S5M8767_REG_LDO3CTRL,
+	S5M8767_REG_LDO4CTRL,
+	S5M8767_REG_LDO5CTRL,
+	S5M8767_REG_LDO6CTRL,
+	S5M8767_REG_LDO7CTRL,
+	S5M8767_REG_LDO8CTRL,
+	S5M8767_REG_LDO9CTRL,
+	S5M8767_REG_LDO10CTRL,
+	S5M8767_REG_LDO11CTRL,
+	S5M8767_REG_LDO12CTRL,
+	S5M8767_REG_LDO13CTRL,
+	S5M8767_REG_LDO14CTRL,
+	S5M8767_REG_LDO15CTRL,
+	S5M8767_REG_LDO16CTRL,
+	S5M8767_REG_LDO17CTRL,
+	S5M8767_REG_LDO18CTRL,
+	S5M8767_REG_LDO19CTRL,
+	S5M8767_REG_LDO20CTRL,
+	S5M8767_REG_LDO21CTRL,
+	S5M8767_REG_LDO22CTRL,
+	S5M8767_REG_LDO23CTRL,
+	S5M8767_REG_LDO24CTRL,
+	S5M8767_REG_LDO25CTRL,
+	S5M8767_REG_LDO26CTRL,
+	S5M8767_REG_LDO27CTRL,
+	S5M8767_REG_LDO28CTRL,
+};
+
+/* S5M8763 registers */
+enum s5m8763_reg {
+	S5M8763_REG_IRQ1,
+	S5M8763_REG_IRQ2,
+	S5M8763_REG_IRQ3,
+	S5M8763_REG_IRQ4,
+	S5M8763_REG_IRQM1,
+	S5M8763_REG_IRQM2,
+	S5M8763_REG_IRQM3,
+	S5M8763_REG_IRQM4,
+	S5M8763_REG_STATUS1,
+	S5M8763_REG_STATUS2,
+	S5M8763_REG_STATUSM1,
+	S5M8763_REG_STATUSM2,
+	S5M8763_REG_CHGR1,
+	S5M8763_REG_CHGR2,
+	S5M8763_REG_LDO_ACTIVE_DISCHARGE1,
+	S5M8763_REG_LDO_ACTIVE_DISCHARGE2,
+	S5M8763_REG_BUCK_ACTIVE_DISCHARGE3,
+	S5M8763_REG_ONOFF1,
+	S5M8763_REG_ONOFF2,
+	S5M8763_REG_ONOFF3,
+	S5M8763_REG_ONOFF4,
+	S5M8763_REG_BUCK1_VOLTAGE1,
+	S5M8763_REG_BUCK1_VOLTAGE2,
+	S5M8763_REG_BUCK1_VOLTAGE3,
+	S5M8763_REG_BUCK1_VOLTAGE4,
+	S5M8763_REG_BUCK2_VOLTAGE1,
+	S5M8763_REG_BUCK2_VOLTAGE2,
+	S5M8763_REG_BUCK3,
+	S5M8763_REG_BUCK4,
+	S5M8763_REG_LDO1_LDO2,
+	S5M8763_REG_LDO3,
+	S5M8763_REG_LDO4,
+	S5M8763_REG_LDO5,
+	S5M8763_REG_LDO6,
+	S5M8763_REG_LDO7,
+	S5M8763_REG_LDO7_LDO8,
+	S5M8763_REG_LDO9_LDO10,
+	S5M8763_REG_LDO11,
+	S5M8763_REG_LDO12,
+	S5M8763_REG_LDO13,
+	S5M8763_REG_LDO14,
+	S5M8763_REG_LDO15,
+	S5M8763_REG_LDO16,
+	S5M8763_REG_BKCHR,
+	S5M8763_REG_LBCNFG1,
+	S5M8763_REG_LBCNFG2,
+};
+
+enum s5m8767_irq {
+	S5M8767_IRQ_PWRR,
+	S5M8767_IRQ_PWRF,
+	S5M8767_IRQ_PWR1S,
+	S5M8767_IRQ_JIGR,
+	S5M8767_IRQ_JIGF,
+	S5M8767_IRQ_LOWBAT2,
+	S5M8767_IRQ_LOWBAT1,
+
+	S5M8767_IRQ_MRB,
+	S5M8767_IRQ_DVSOK2,
+	S5M8767_IRQ_DVSOK3,
+	S5M8767_IRQ_DVSOK4,
+
+	S5M8767_IRQ_RTC60S,
+	S5M8767_IRQ_RTCA1,
+	S5M8767_IRQ_RTCA2,
+	S5M8767_IRQ_SMPL,
+	S5M8767_IRQ_RTC1S,
+	S5M8767_IRQ_WTSR,
+
+	S5M8767_IRQ_NR,
+};
+
+#define S5M8767_IRQ_PWRR_MASK		(1 << 0)
+#define S5M8767_IRQ_PWRF_MASK		(1 << 1)
+#define S5M8767_IRQ_PWR1S_MASK		(1 << 3)
+#define S5M8767_IRQ_JIGR_MASK		(1 << 4)
+#define S5M8767_IRQ_JIGF_MASK		(1 << 5)
+#define S5M8767_IRQ_LOWBAT2_MASK	(1 << 6)
+#define S5M8767_IRQ_LOWBAT1_MASK	(1 << 7)
+
+#define S5M8767_IRQ_MRB_MASK		(1 << 2)
+#define S5M8767_IRQ_DVSOK2_MASK		(1 << 3)
+#define S5M8767_IRQ_DVSOK3_MASK		(1 << 4)
+#define S5M8767_IRQ_DVSOK4_MASK		(1 << 5)
+
+#define S5M8767_IRQ_RTC60S_MASK		(1 << 0)
+#define S5M8767_IRQ_RTCA1_MASK		(1 << 1)
+#define S5M8767_IRQ_RTCA2_MASK		(1 << 2)
+#define S5M8767_IRQ_SMPL_MASK		(1 << 3)
+#define S5M8767_IRQ_RTC1S_MASK		(1 << 4)
+#define S5M8767_IRQ_WTSR_MASK		(1 << 5)
+
+enum s5m8763_irq {
+	S5M8763_IRQ_DCINF,
+	S5M8763_IRQ_DCINR,
+	S5M8763_IRQ_JIGF,
+	S5M8763_IRQ_JIGR,
+	S5M8763_IRQ_PWRONF,
+	S5M8763_IRQ_PWRONR,
+
+	S5M8763_IRQ_WTSREVNT,
+	S5M8763_IRQ_SMPLEVNT,
+	S5M8763_IRQ_ALARM1,
+	S5M8763_IRQ_ALARM0,
+
+	S5M8763_IRQ_ONKEY1S,
+	S5M8763_IRQ_TOPOFFR,
+	S5M8763_IRQ_DCINOVPR,
+	S5M8763_IRQ_CHGRSTF,
+	S5M8763_IRQ_DONER,
+	S5M8763_IRQ_CHGFAULT,
+
+	S5M8763_IRQ_LOBAT1,
+	S5M8763_IRQ_LOBAT2,
+
+	S5M8763_IRQ_NR,
+};
+
+#define S5M8763_IRQ_DCINF_MASK		(1 << 2)
+#define S5M8763_IRQ_DCINR_MASK		(1 << 3)
+#define S5M8763_IRQ_JIGF_MASK		(1 << 4)
+#define S5M8763_IRQ_JIGR_MASK		(1 << 5)
+#define S5M8763_IRQ_PWRONF_MASK		(1 << 6)
+#define S5M8763_IRQ_PWRONR_MASK		(1 << 7)
+
+#define S5M8763_IRQ_WTSREVNT_MASK	(1 << 0)
+#define S5M8763_IRQ_SMPLEVNT_MASK	(1 << 1)
+#define S5M8763_IRQ_ALARM1_MASK		(1 << 2)
+#define S5M8763_IRQ_ALARM0_MASK		(1 << 3)
+
+#define S5M8763_IRQ_ONKEY1S_MASK	(1 << 0)
+#define S5M8763_IRQ_TOPOFFR_MASK	(1 << 2)
+#define S5M8763_IRQ_DCINOVPR_MASK	(1 << 3)
+#define S5M8763_IRQ_CHGRSTF_MASK	(1 << 4)
+#define S5M8763_IRQ_DONER_MASK		(1 << 5)
+#define S5M8763_IRQ_CHGFAULT_MASK	(1 << 7)
+
+#define S5M8763_IRQ_LOBAT1_MASK		(1 << 0)
+#define S5M8763_IRQ_LOBAT2_MASK		(1 << 1)
+
+#define S5M8763_ENRAMP                  (1 << 4)
+
+/**
+ * struct s5m87xx_dev - s5m87xx master device for sub-drivers
+ * @dev: master device of the chip (can be used to access platform data)
+ * @i2c: i2c client private data for regulator
+ * @rtc: i2c client private data for rtc
+ * @iolock: mutex for serializing io access
+ * @irqlock: mutex for buslock
+ * @irq_base: base IRQ number for s5m87xx, required for IRQs
+ * @irq: generic IRQ number for s5m87xx
+ * @ono: power onoff IRQ number for s5m87xx
+ * @irq_masks_cur: currently active value
+ * @irq_masks_cache: cached hardware value
+ * @type: indicate which s5m87xx "variant" is used
+ */
+struct s5m87xx_dev {
+	struct device *dev;
+	struct regmap *regmap;
+	struct i2c_client *i2c;
+	struct i2c_client *rtc;
+	struct mutex iolock;
+	struct mutex irqlock;
+
+	int device_type;
+	int irq_base;
+	int irq;
+	int ono;
+	u8 irq_masks_cur[NUM_IRQ_REGS];
+	u8 irq_masks_cache[NUM_IRQ_REGS];
+	int type;
+	bool wakeup;
+};
+
+int s5m_irq_init(struct s5m87xx_dev *s5m87xx);
+void s5m_irq_exit(struct s5m87xx_dev *s5m87xx);
+int s5m_irq_resume(struct s5m87xx_dev *s5m87xx);
+
+extern int s5m_reg_read(struct s5m87xx_dev *s5m87xx, u8 reg, void *dest);
+extern int s5m_bulk_read(struct s5m87xx_dev *s5m87xx, u8 reg, int count, u8 *buf);
+extern int s5m_reg_write(struct s5m87xx_dev *s5m87xx, u8 reg, u8 value);
+extern int s5m_bulk_write(struct s5m87xx_dev *s5m87xx, u8 reg, int count, u8 *buf);
+extern int s5m_reg_update(struct s5m87xx_dev *s5m87xx, u8 reg, u8 val, u8 mask);
+
+struct s5m_platform_data {
+	struct s5m_regulator_data	*regulators;
+	int				device_type;
+	int				num_regulators;
+
+	int				irq_base;
+	int 				(*cfg_pmic_irq)(void);
+
+	int				ono;
+	bool				wakeup;
+	bool				buck_voltage_lock;
+
+	int				buck_gpios[3];
+	int				buck2_voltage[8];
+	bool				buck2_gpiodvs;
+	int				buck3_voltage[8];
+	bool				buck3_gpiodvs;
+	int				buck4_voltage[8];
+	bool				buck4_gpiodvs;
+
+	int				buck_set1;
+	int				buck_set2;
+	int				buck_set3;
+	int				buck2_enable;
+	int				buck3_enable;
+	int				buck4_enable;
+	int				buck_default_idx;
+	int				buck2_default_idx;
+	int				buck3_default_idx;
+	int				buck4_default_idx;
+
+	int                             buck_ramp_delay;
+	bool                            buck2_ramp_enable;
+	bool                            buck3_ramp_enable;
+	bool                            buck4_ramp_enable;
+};
+
+#endif /*  __LINUX_MFD_S5M_CORE_H */

diff --git a/include/linux/mfd/s5m87xx/s5m-pmic.h b/include/linux/mfd/s5m87xx/s5m-pmic.h
new file mode 100644
index 0000000..a72a5d2
--- /dev/null
+++ b/include/linux/mfd/s5m87xx/s5m-pmic.h

@@ -0,0 +1,100 @@
+/* s5m87xx.h
+ *
+ * Copyright (c) 2010-2011 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#ifndef __LINUX_MFD_S5M_PMIC_H
+#define __LINUX_MFD_S5M_PMIC_H
+
+#include <linux/regulator/machine.h>
+
+/* S5M8767 regulator ids */
+enum s5m8767_regulators {
+	S5M8767_LDO1,
+	S5M8767_LDO2,
+	S5M8767_LDO3,
+	S5M8767_LDO4,
+	S5M8767_LDO5,
+	S5M8767_LDO6,
+	S5M8767_LDO7,
+	S5M8767_LDO8,
+	S5M8767_LDO9,
+	S5M8767_LDO10,
+	S5M8767_LDO11,
+	S5M8767_LDO12,
+	S5M8767_LDO13,
+	S5M8767_LDO14,
+	S5M8767_LDO15,
+	S5M8767_LDO16,
+	S5M8767_LDO17,
+	S5M8767_LDO18,
+	S5M8767_LDO19,
+	S5M8767_LDO20,
+	S5M8767_LDO21,
+	S5M8767_LDO22,
+	S5M8767_LDO23,
+	S5M8767_LDO24,
+	S5M8767_LDO25,
+	S5M8767_LDO26,
+	S5M8767_LDO27,
+	S5M8767_LDO28,
+	S5M8767_BUCK1,
+	S5M8767_BUCK2,
+	S5M8767_BUCK3,
+	S5M8767_BUCK4,
+	S5M8767_BUCK5,
+	S5M8767_BUCK6,
+	S5M8767_BUCK7,
+	S5M8767_BUCK8,
+	S5M8767_BUCK9,
+	S5M8767_AP_EN32KHZ,
+	S5M8767_CP_EN32KHZ,
+
+	S5M8767_REG_MAX,
+};
+
+/* S5M8763 regulator ids */
+enum s5m8763_regulators {
+	S5M8763_LDO1,
+	S5M8763_LDO2,
+	S5M8763_LDO3,
+	S5M8763_LDO4,
+	S5M8763_LDO5,
+	S5M8763_LDO6,
+	S5M8763_LDO7,
+	S5M8763_LDO8,
+	S5M8763_LDO9,
+	S5M8763_LDO10,
+	S5M8763_LDO11,
+	S5M8763_LDO12,
+	S5M8763_LDO13,
+	S5M8763_LDO14,
+	S5M8763_LDO15,
+	S5M8763_LDO16,
+	S5M8763_BUCK1,
+	S5M8763_BUCK2,
+	S5M8763_BUCK3,
+	S5M8763_BUCK4,
+	S5M8763_AP_EN32KHZ,
+	S5M8763_CP_EN32KHZ,
+	S5M8763_ENCHGVI,
+	S5M8763_ESAFEUSB1,
+	S5M8763_ESAFEUSB2,
+};
+
+/**
+ * s5m87xx_regulator_data - regulator data
+ * @id: regulator id
+ * @initdata: regulator init data (contraints, supplies, ...)
+ */
+struct s5m_regulator_data {
+	int				id;
+	struct regulator_init_data	*initdata;
+};
+
+#endif /*  __LINUX_MFD_S5M_PMIC_H */

diff --git a/include/linux/mfd/s5m87xx/s5m-rtc.h b/include/linux/mfd/s5m87xx/s5m-rtc.h
new file mode 100644
index 0000000..6ce8da2
--- /dev/null
+++ b/include/linux/mfd/s5m87xx/s5m-rtc.h

@@ -0,0 +1,84 @@
+/*
+ * s5m-rtc.h
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd
+ *              http://www.samsung.com
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __LINUX_MFD_S5M_RTC_H
+#define __LINUX_MFD_S5M_RTC_H
+
+enum s5m87xx_rtc_reg {
+	S5M87XX_RTC_SEC,
+	S5M87XX_RTC_MIN,
+	S5M87XX_RTC_HOUR,
+	S5M87XX_RTC_WEEKDAY,
+	S5M87XX_RTC_DATE,
+	S5M87XX_RTC_MONTH,
+	S5M87XX_RTC_YEAR1,
+	S5M87XX_RTC_YEAR2,
+	S5M87XX_ALARM0_SEC,
+	S5M87XX_ALARM0_MIN,
+	S5M87XX_ALARM0_HOUR,
+	S5M87XX_ALARM0_WEEKDAY,
+	S5M87XX_ALARM0_DATE,
+	S5M87XX_ALARM0_MONTH,
+	S5M87XX_ALARM0_YEAR1,
+	S5M87XX_ALARM0_YEAR2,
+	S5M87XX_ALARM1_SEC,
+	S5M87XX_ALARM1_MIN,
+	S5M87XX_ALARM1_HOUR,
+	S5M87XX_ALARM1_WEEKDAY,
+	S5M87XX_ALARM1_DATE,
+	S5M87XX_ALARM1_MONTH,
+	S5M87XX_ALARM1_YEAR1,
+	S5M87XX_ALARM1_YEAR2,
+	S5M87XX_ALARM0_CONF,
+	S5M87XX_ALARM1_CONF,
+	S5M87XX_RTC_STATUS,
+	S5M87XX_WTSR_SMPL_CNTL,
+	S5M87XX_RTC_UDR_CON,
+};
+
+#define RTC_I2C_ADDR		(0x0C >> 1)
+
+#define HOUR_12			(1 << 7)
+#define HOUR_AMPM		(1 << 6)
+#define HOUR_PM			(1 << 5)
+#define ALARM0_STATUS		(1 << 1)
+#define ALARM1_STATUS		(1 << 2)
+#define UPDATE_AD		(1 << 0)
+
+/* RTC Control Register */
+#define BCD_EN_SHIFT		0
+#define BCD_EN_MASK		(1 << BCD_EN_SHIFT)
+#define MODEL24_SHIFT		1
+#define MODEL24_MASK		(1 << MODEL24_SHIFT)
+/* RTC Update Register1 */
+#define RTC_UDR_SHIFT		0
+#define RTC_UDR_MASK		(1 << RTC_UDR_SHIFT)
+/* RTC Hour register */
+#define HOUR_PM_SHIFT		6
+#define HOUR_PM_MASK		(1 << HOUR_PM_SHIFT)
+/* RTC Alarm Enable */
+#define ALARM_ENABLE_SHIFT	7
+#define ALARM_ENABLE_MASK	(1 << ALARM_ENABLE_SHIFT)
+
+enum {
+	RTC_SEC = 0,
+	RTC_MIN,
+	RTC_HOUR,
+	RTC_WEEKDAY,
+	RTC_DATE,
+	RTC_MONTH,
+	RTC_YEAR1,
+	RTC_YEAR2,
+};
+
+#endif /*  __LINUX_MFD_S5M_RTC_H */

diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h
index be1af7c..ca1d7a3 100644
--- a/include/linux/mfd/stmpe.h
+++ b/include/linux/mfd/stmpe.h

@@ -20,6 +20,8 @@
 };
 
 enum stmpe_partnum {
+	STMPE610,
+	STMPE801,
 	STMPE811,
 	STMPE1601,
 	STMPE2401,
@@ -50,17 +52,20 @@
 
 
 struct stmpe_variant_info;
+struct stmpe_client_info;
 
 /**
  * struct stmpe - STMPE MFD structure
  * @lock: lock protecting I/O operations
  * @irq_lock: IRQ bus lock
  * @dev: device, mostly for dev_dbg()
- * @i2c: i2c client
+ * @client: client - i2c or spi
+ * @ci: client specific information
  * @partnum: part number
  * @variant: the detected STMPE model number
  * @regs: list of addresses of registers which are at different addresses on
  *	  different variants.  Indexed by one of STMPE_IDX_*.
+ * @irq: irq number for stmpe
  * @irq_base: starting IRQ number for internal IRQs
  * @num_gpios: number of gpios, differs for variants
  * @ier: cache of IER registers for bus_lock
@@ -71,11 +76,13 @@
 	struct mutex lock;
 	struct mutex irq_lock;
 	struct device *dev;
-	struct i2c_client *i2c;
+	void *client;
+	struct stmpe_client_info *ci;
 	enum stmpe_partnum partnum;
 	struct stmpe_variant_info *variant;
 	const u8 *regs;
 
+	int irq;
 	int irq_base;
 	int num_gpios;
 	u8 ier[2];
@@ -183,6 +190,9 @@
  * @autosleep_timeout: inactivity timeout in milliseconds for autosleep
  * @irq_base: base IRQ number.  %STMPE_NR_IRQS irqs will be used, or
  *	      %STMPE_NR_INTERNAL_IRQS if the GPIO driver is not used.
+ * @irq_over_gpio: true if gpio is used to get irq
+ * @irq_gpio: gpio number over which irq will be requested (significant only if
+ *	      irq_over_gpio is true)
  * @gpio: GPIO-specific platform data
  * @keypad: keypad-specific platform data
  * @ts: touchscreen-specific platform data
@@ -194,6 +204,8 @@
 	unsigned int irq_trigger;
 	bool irq_invert_polarity;
 	bool autosleep;
+	bool irq_over_gpio;
+	int irq_gpio;
 	int autosleep_timeout;
 
 	struct stmpe_gpio_platform_data *gpio;

diff --git a/include/linux/mfd/ucb1x00.h b/include/linux/mfd/ucb1x00.h
index 4321f04..bc19e5f 100644
--- a/include/linux/mfd/ucb1x00.h
+++ b/include/linux/mfd/ucb1x00.h

@@ -104,6 +104,9 @@
 #define UCB_MODE_DYN_VFLAG_ENA	(1 << 12)
 #define UCB_MODE_AUD_OFF_CAN	(1 << 13)
 
+struct ucb1x00_plat_data {
+	int		gpio_base;
+};
 
 struct ucb1x00_irq {
 	void *devid;
@@ -116,7 +119,7 @@
 	unsigned int		irq;
 	struct semaphore	adc_sem;
 	spinlock_t		io_lock;
-	u16			id;
+	const struct mcp_device_id *id;
 	u16			io_dir;
 	u16			io_out;
 	u16			adc_cr;

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index e39aeec..eaf8674 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h

@@ -6,18 +6,31 @@
 
 typedef struct page *new_page_t(struct page *, unsigned long private, int **);
 
+/*
+ * MIGRATE_ASYNC means never block
+ * MIGRATE_SYNC_LIGHT in the current implementation means to allow blocking
+ *	on most operations but not ->writepage as the potential stall time
+ *	is too significant
+ * MIGRATE_SYNC will block when migrating pages
+ */
+enum migrate_mode {
+	MIGRATE_ASYNC,
+	MIGRATE_SYNC_LIGHT,
+	MIGRATE_SYNC,
+};
+
 #ifdef CONFIG_MIGRATION
 #define PAGE_MIGRATION 1
 
 extern void putback_lru_pages(struct list_head *l);
 extern int migrate_page(struct address_space *,
-			struct page *, struct page *);
+			struct page *, struct page *, enum migrate_mode);
 extern int migrate_pages(struct list_head *l, new_page_t x,
 			unsigned long private, bool offlining,
-			bool sync);
+			enum migrate_mode mode);
 extern int migrate_huge_pages(struct list_head *l, new_page_t x,
 			unsigned long private, bool offlining,
-			bool sync);
+			enum migrate_mode mode);
 
 extern int fail_migrate_page(struct address_space *,
 			struct page *, struct page *);
@@ -36,10 +49,10 @@
 static inline void putback_lru_pages(struct list_head *l) {}
 static inline int migrate_pages(struct list_head *l, new_page_t x,
 		unsigned long private, bool offlining,
-		bool sync) { return -ENOSYS; }
+		enum migrate_mode mode) { return -ENOSYS; }
 static inline int migrate_huge_pages(struct list_head *l, new_page_t x,
 		unsigned long private, bool offlining,
-		bool sync) { return -ENOSYS; }
+		enum migrate_mode mode) { return -ENOSYS; }
 
 static inline int migrate_prep(void) { return -ENOSYS; }
 static inline int migrate_prep_local(void) { return -ENOSYS; }

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 8f7d247..227fd3e 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h

@@ -22,26 +22,21 @@
 }
 
 static inline void
-__add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l,
-		       struct list_head *head)
+add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list lru)
 {
-	list_add(&page->lru, head);
-	__mod_zone_page_state(zone, NR_LRU_BASE + l, hpage_nr_pages(page));
-	mem_cgroup_add_lru_list(page, l);
+	struct lruvec *lruvec;
+
+	lruvec = mem_cgroup_lru_add_list(zone, page, lru);
+	list_add(&page->lru, &lruvec->lists[lru]);
+	__mod_zone_page_state(zone, NR_LRU_BASE + lru, hpage_nr_pages(page));
 }
 
 static inline void
-add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
+del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list lru)
 {
-	__add_page_to_lru_list(zone, page, l, &zone->lru[l].list);
-}
-
-static inline void
-del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
-{
+	mem_cgroup_lru_del_list(page, lru);
 	list_del(&page->lru);
-	__mod_zone_page_state(zone, NR_LRU_BASE + l, -hpage_nr_pages(page));
-	mem_cgroup_del_lru_list(page, l);
+	__mod_zone_page_state(zone, NR_LRU_BASE + lru, -hpage_nr_pages(page));
 }
 
 /**
@@ -59,24 +54,28 @@
 	return LRU_INACTIVE_ANON;
 }
 
-static inline void
-del_page_from_lru(struct zone *zone, struct page *page)
+/**
+ * page_off_lru - which LRU list was page on? clearing its lru flags.
+ * @page: the page to test
+ *
+ * Returns the LRU list a page was on, as an index into the array of LRU
+ * lists; and clears its Unevictable or Active flags, ready for freeing.
+ */
+static inline enum lru_list page_off_lru(struct page *page)
 {
-	enum lru_list l;
+	enum lru_list lru;
 
-	list_del(&page->lru);
 	if (PageUnevictable(page)) {
 		__ClearPageUnevictable(page);
-		l = LRU_UNEVICTABLE;
+		lru = LRU_UNEVICTABLE;
 	} else {
-		l = page_lru_base_type(page);
+		lru = page_lru_base_type(page);
 		if (PageActive(page)) {
 			__ClearPageActive(page);
-			l += LRU_ACTIVE;
+			lru += LRU_ACTIVE;
 		}
 	}
-	__mod_zone_page_state(zone, NR_LRU_BASE + l, -hpage_nr_pages(page));
-	mem_cgroup_del_lru_list(page, l);
+	return lru;
 }
 
 /**
@@ -97,7 +96,6 @@
 		if (PageActive(page))
 			lru += LRU_ACTIVE;
 	}
-
 	return lru;
 }
 

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5b42f1b..3cc3062 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h

@@ -151,12 +151,11 @@
 #endif
 }
 /*
- * If another subsystem starts using the double word pairing for atomic
- * operations on struct page then it must change the #if to ensure
- * proper alignment of the page struct.
+ * The struct page can be forced to be double word aligned so that atomic ops
+ * on double words work. The SLUB allocator can make use of such a feature.
  */
-#if defined(CONFIG_SLUB) && defined(CONFIG_CMPXCHG_LOCAL)
-	__attribute__((__aligned__(2*sizeof(unsigned long))))
+#ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE
+	__aligned(2 * sizeof(unsigned long))
 #endif
 ;
 

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index c8ef9bc..9f22ba5 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h

@@ -71,6 +71,8 @@
 	bool			hpi_en;			/* HPI enablebit */
 	bool			hpi;			/* HPI support bit */
 	unsigned int		hpi_cmd;		/* cmd used as HPI */
+	unsigned int		boot_ro_lock;		/* ro lock support */
+	bool			boot_ro_lockable;
 	u8			raw_partition_support;	/* 160 */
 	u8			raw_erased_mem_count;	/* 181 */
 	u8			raw_ext_csd_structure;	/* 194 */
@@ -110,6 +112,7 @@
 struct sd_switch_caps {
 	unsigned int		hs_max_dtr;
 	unsigned int		uhs_max_dtr;
+#define HIGH_SPEED_MAX_DTR	50000000
 #define UHS_SDR104_MAX_DTR	208000000
 #define UHS_SDR50_MAX_DTR	100000000
 #define UHS_DDR50_MAX_DTR	50000000
@@ -117,11 +120,13 @@
 #define UHS_SDR12_MAX_DTR	25000000
 	unsigned int		sd3_bus_mode;
 #define UHS_SDR12_BUS_SPEED	0
+#define HIGH_SPEED_BUS_SPEED	1
 #define UHS_SDR25_BUS_SPEED	1
 #define UHS_SDR50_BUS_SPEED	2
 #define UHS_SDR104_BUS_SPEED	3
 #define UHS_DDR50_BUS_SPEED	4
 
+#define SD_MODE_HIGH_SPEED	(1 << HIGH_SPEED_BUS_SPEED)
 #define SD_MODE_UHS_SDR12	(1 << UHS_SDR12_BUS_SPEED)
 #define SD_MODE_UHS_SDR25	(1 << UHS_SDR25_BUS_SPEED)
 #define SD_MODE_UHS_SDR50	(1 << UHS_SDR50_BUS_SPEED)
@@ -184,6 +189,10 @@
 	unsigned int	part_cfg;	/* partition type */
 	char	name[MAX_MMC_PART_NAME_LEN];
 	bool	force_ro;	/* to make boot parts RO by default */
+	unsigned int	area_type;
+#define MMC_BLK_DATA_AREA_MAIN	(1<<0)
+#define MMC_BLK_DATA_AREA_BOOT	(1<<1)
+#define MMC_BLK_DATA_AREA_GP	(1<<2)
 };
 
 /*
@@ -206,6 +215,8 @@
 #define MMC_STATE_HIGHSPEED_DDR (1<<4)		/* card is in high speed mode */
 #define MMC_STATE_ULTRAHIGHSPEED (1<<5)		/* card is in ultra high speed mode */
 #define MMC_CARD_SDXC		(1<<6)		/* card is SDXC */
+#define MMC_CARD_REMOVED	(1<<7)		/* card has been removed */
+#define MMC_STATE_HIGHSPEED_200	(1<<8)		/* card is in HS200 mode */
 	unsigned int		quirks; 	/* card quirks */
 #define MMC_QUIRK_LENIENT_FN0	(1<<0)		/* allow SDIO FN0 writes outside of the VS CCCR range */
 #define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1)	/* use func->cur_blksize */
@@ -261,12 +272,14 @@
  * This function fill contents in mmc_part.
  */
 static inline void mmc_part_add(struct mmc_card *card, unsigned int size,
-			unsigned int part_cfg, char *name, int idx, bool ro)
+			unsigned int part_cfg, char *name, int idx, bool ro,
+			int area_type)
 {
 	card->part[card->nr_parts].size = size;
 	card->part[card->nr_parts].part_cfg = part_cfg;
 	sprintf(card->part[card->nr_parts].name, name, idx);
 	card->part[card->nr_parts].force_ro = ro;
+	card->part[card->nr_parts].area_type = area_type;
 	card->nr_parts++;
 }
 
@@ -362,18 +375,24 @@
 #define mmc_card_present(c)	((c)->state & MMC_STATE_PRESENT)
 #define mmc_card_readonly(c)	((c)->state & MMC_STATE_READONLY)
 #define mmc_card_highspeed(c)	((c)->state & MMC_STATE_HIGHSPEED)
+#define mmc_card_hs200(c)	((c)->state & MMC_STATE_HIGHSPEED_200)
 #define mmc_card_blockaddr(c)	((c)->state & MMC_STATE_BLOCKADDR)
 #define mmc_card_ddr_mode(c)	((c)->state & MMC_STATE_HIGHSPEED_DDR)
-#define mmc_sd_card_uhs(c) ((c)->state & MMC_STATE_ULTRAHIGHSPEED)
+#define mmc_card_uhs(c)		((c)->state & MMC_STATE_ULTRAHIGHSPEED)
+#define mmc_sd_card_uhs(c)	((c)->state & MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC)
+#define mmc_card_removed(c)	((c) && ((c)->state & MMC_CARD_REMOVED))
 
 #define mmc_card_set_present(c)	((c)->state |= MMC_STATE_PRESENT)
 #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY)
 #define mmc_card_set_highspeed(c) ((c)->state |= MMC_STATE_HIGHSPEED)
+#define mmc_card_set_hs200(c)	((c)->state |= MMC_STATE_HIGHSPEED_200)
 #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR)
 #define mmc_card_set_ddr_mode(c) ((c)->state |= MMC_STATE_HIGHSPEED_DDR)
+#define mmc_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_sd_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC)
+#define mmc_card_set_removed(c) ((c)->state |= MMC_CARD_REMOVED)
 
 /*
  * Quirk add/remove for MMC products.

diff --git a/include/linux/mmc/cd-gpio.h b/include/linux/mmc/cd-gpio.h
new file mode 100644
index 0000000..a8e4697
--- /dev/null
+++ b/include/linux/mmc/cd-gpio.h

@@ -0,0 +1,19 @@
+/*
+ * Generic GPIO card-detect helper header
+ *
+ * Copyright (C) 2011, Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef MMC_CD_GPIO_H
+#define MMC_CD_GPIO_H
+
+struct mmc_host;
+int mmc_cd_gpio_request(struct mmc_host *host, unsigned int gpio,
+			unsigned int irq, unsigned long flags);
+void mmc_cd_gpio_free(struct mmc_host *host);
+
+#endif

diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 174a844..87a976c 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h

@@ -180,6 +180,8 @@
 
 extern int mmc_flush_cache(struct mmc_card *);
 
+extern int mmc_detect_card_removed(struct mmc_host *host);
+
 /**
  *	mmc_claim_host - exclusively claim a host
  *	@host: mmc host to claim

diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 6dc9b80..e8779c6 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h

@@ -214,6 +214,7 @@
 	unsigned int bus_hz; /* Bus speed */
 
 	unsigned int caps;	/* Capabilities */
+	unsigned int caps2;	/* More capabilities */
 	/*
 	 * Override fifo depth. If 0, autodetect it from the FIFOTH register,
 	 * but note that this may not be reliable after a bootloader has used

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index a3ac9c4..dd13e05 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h

@@ -56,10 +56,13 @@
 #define MMC_TIMING_UHS_SDR50	3
 #define MMC_TIMING_UHS_SDR104	4
 #define MMC_TIMING_UHS_DDR50	5
+#define MMC_TIMING_MMC_HS200	6
 
 #define MMC_SDR_MODE		0
 #define MMC_1_2V_DDR_MODE	1
 #define MMC_1_8V_DDR_MODE	2
+#define MMC_1_2V_SDR_MODE	3
+#define MMC_1_8V_SDR_MODE	4
 
 	unsigned char	signal_voltage;		/* signalling voltage (1.8V or 3.3V) */
 
@@ -148,7 +151,9 @@
 	void	(*init_card)(struct mmc_host *host, struct mmc_card *card);
 
 	int	(*start_signal_voltage_switch)(struct mmc_host *host, struct mmc_ios *ios);
-	int	(*execute_tuning)(struct mmc_host *host);
+
+	/* The tuning command opcode value is different for SD and eMMC cards */
+	int	(*execute_tuning)(struct mmc_host *host, u32 opcode);
 	void	(*enable_preset_value)(struct mmc_host *host, bool enable);
 	int	(*select_drive_strength)(unsigned int max_dtr, int host_drv, int card_drv);
 	void	(*hw_reset)(struct mmc_host *host);
@@ -167,6 +172,11 @@
 	int (*err_check) (struct mmc_card *, struct mmc_async_req *);
 };
 
+struct mmc_hotplug {
+	unsigned int irq;
+	void *handler_priv;
+};
+
 struct mmc_host {
 	struct device		*parent;
 	struct device		class_dev;
@@ -242,6 +252,11 @@
 #define MMC_CAP2_CACHE_CTRL	(1 << 1)	/* Allow cache control */
 #define MMC_CAP2_POWEROFF_NOTIFY (1 << 2)	/* Notify poweroff supported */
 #define MMC_CAP2_NO_MULTI_READ	(1 << 3)	/* Multiblock reads don't work */
+#define MMC_CAP2_NO_SLEEP_CMD	(1 << 4)	/* Don't allow sleep command */
+#define MMC_CAP2_HS200_1_8V_SDR	(1 << 5)        /* can support */
+#define MMC_CAP2_HS200_1_2V_SDR	(1 << 6)        /* can support */
+#define MMC_CAP2_HS200		(MMC_CAP2_HS200_1_8V_SDR | \
+				 MMC_CAP2_HS200_1_2V_SDR)
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 	unsigned int        power_notify_type;
@@ -253,10 +268,12 @@
 	int			clk_requests;	/* internal reference counter */
 	unsigned int		clk_delay;	/* number of MCI clk hold cycles */
 	bool			clk_gated;	/* clock gated */
-	struct work_struct	clk_gate_work; /* delayed clock gate */
+	struct delayed_work	clk_gate_work; /* delayed clock gate */
 	unsigned int		clk_old;	/* old clock value cache */
 	spinlock_t		clk_lock;	/* lock for clk fields */
 	struct mutex		clk_gate_mutex;	/* mutex for clock gating */
+	struct device_attribute clkgate_delay_attr;
+	unsigned long           clkgate_delay;
 #endif
 
 	/* host specific block data */
@@ -297,6 +314,8 @@
 	int			claim_cnt;	/* "claim" nesting count */
 
 	struct delayed_work	detect;
+	int			detect_change;	/* card detect flag */
+	struct mmc_hotplug	hotplug;
 
 	const struct mmc_bus_ops *bus_ops;	/* current bus driver */
 	unsigned int		bus_refs;	/* reference counter */
@@ -323,6 +342,8 @@
 	struct fault_attr	fail_mmc_request;
 #endif
 
+	unsigned int		actual_clock;	/* Actual HC clock rate */
+
 	unsigned long		private[0] ____cacheline_aligned;
 };
 

diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 0e71356..fb9f6e1 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h

@@ -51,6 +51,7 @@
 #define MMC_READ_SINGLE_BLOCK    17   /* adtc [31:0] data addr   R1  */
 #define MMC_READ_MULTIPLE_BLOCK  18   /* adtc [31:0] data addr   R1  */
 #define MMC_SEND_TUNING_BLOCK    19   /* adtc                    R1  */
+#define MMC_SEND_TUNING_BLOCK_HS200	21	/* adtc R1  */
 
   /* class 3 */
 #define MMC_WRITE_DAT_UNTIL_STOP 20   /* adtc [31:0] data addr   R1  */
@@ -280,6 +281,7 @@
 #define EXT_CSD_RST_N_FUNCTION		162	/* R/W */
 #define EXT_CSD_SANITIZE_START		165     /* W */
 #define EXT_CSD_WR_REL_PARAM		166	/* RO */
+#define EXT_CSD_BOOT_WP			173	/* R/W */
 #define EXT_CSD_ERASE_GROUP_DEF		175	/* R/W */
 #define EXT_CSD_PART_CONFIG		179	/* R/W */
 #define EXT_CSD_ERASED_MEM_CONT		181	/* RO */
@@ -321,6 +323,11 @@
 
 #define EXT_CSD_WR_REL_PARAM_EN		(1<<2)
 
+#define EXT_CSD_BOOT_WP_B_PWR_WP_DIS	(0x40)
+#define EXT_CSD_BOOT_WP_B_PERM_WP_DIS	(0x10)
+#define EXT_CSD_BOOT_WP_B_PERM_WP_EN	(0x04)
+#define EXT_CSD_BOOT_WP_B_PWR_WP_EN	(0x01)
+
 #define EXT_CSD_PART_CONFIG_ACC_MASK	(0x7)
 #define EXT_CSD_PART_CONFIG_ACC_BOOT0	(0x1)
 #define EXT_CSD_PART_CONFIG_ACC_GP0	(0x4)
@@ -333,13 +340,76 @@
 
 #define EXT_CSD_CARD_TYPE_26	(1<<0)	/* Card can run at 26MHz */
 #define EXT_CSD_CARD_TYPE_52	(1<<1)	/* Card can run at 52MHz */
-#define EXT_CSD_CARD_TYPE_MASK	0xF	/* Mask out reserved bits */
+#define EXT_CSD_CARD_TYPE_MASK	0x3F	/* Mask out reserved bits */
 #define EXT_CSD_CARD_TYPE_DDR_1_8V  (1<<2)   /* Card can run at 52MHz */
 					     /* DDR mode @1.8V or 3V I/O */
 #define EXT_CSD_CARD_TYPE_DDR_1_2V  (1<<3)   /* Card can run at 52MHz */
 					     /* DDR mode @1.2V I/O */
 #define EXT_CSD_CARD_TYPE_DDR_52       (EXT_CSD_CARD_TYPE_DDR_1_8V  \
 					| EXT_CSD_CARD_TYPE_DDR_1_2V)
+#define EXT_CSD_CARD_TYPE_SDR_1_8V	(1<<4)	/* Card can run at 200MHz */
+#define EXT_CSD_CARD_TYPE_SDR_1_2V	(1<<5)	/* Card can run at 200MHz */
+						/* SDR mode @1.2V I/O */
+
+#define EXT_CSD_CARD_TYPE_SDR_200	(EXT_CSD_CARD_TYPE_SDR_1_8V | \
+					 EXT_CSD_CARD_TYPE_SDR_1_2V)
+
+#define EXT_CSD_CARD_TYPE_SDR_ALL	(EXT_CSD_CARD_TYPE_SDR_200 | \
+					 EXT_CSD_CARD_TYPE_52 | \
+					 EXT_CSD_CARD_TYPE_26)
+
+#define	EXT_CSD_CARD_TYPE_SDR_1_2V_ALL	(EXT_CSD_CARD_TYPE_SDR_1_2V | \
+					 EXT_CSD_CARD_TYPE_52 | \
+					 EXT_CSD_CARD_TYPE_26)
+
+#define	EXT_CSD_CARD_TYPE_SDR_1_8V_ALL	(EXT_CSD_CARD_TYPE_SDR_1_8V | \
+					 EXT_CSD_CARD_TYPE_52 | \
+					 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_1_8V	(EXT_CSD_CARD_TYPE_SDR_1_2V | \
+						 EXT_CSD_CARD_TYPE_DDR_1_8V | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_1_8V	(EXT_CSD_CARD_TYPE_SDR_1_8V | \
+						 EXT_CSD_CARD_TYPE_DDR_1_8V | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_1_2V	(EXT_CSD_CARD_TYPE_SDR_1_2V | \
+						 EXT_CSD_CARD_TYPE_DDR_1_2V | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_1_2V	(EXT_CSD_CARD_TYPE_SDR_1_8V | \
+						 EXT_CSD_CARD_TYPE_DDR_1_2V | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_1_2V_DDR_52	(EXT_CSD_CARD_TYPE_SDR_1_2V | \
+						 EXT_CSD_CARD_TYPE_DDR_52 | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_1_8V_DDR_52	(EXT_CSD_CARD_TYPE_SDR_1_8V | \
+						 EXT_CSD_CARD_TYPE_DDR_52 | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_ALL_DDR_1_8V	(EXT_CSD_CARD_TYPE_SDR_200 | \
+						 EXT_CSD_CARD_TYPE_DDR_1_8V | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_ALL_DDR_1_2V	(EXT_CSD_CARD_TYPE_SDR_200 | \
+						 EXT_CSD_CARD_TYPE_DDR_1_2V | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
+
+#define EXT_CSD_CARD_TYPE_SDR_ALL_DDR_52	(EXT_CSD_CARD_TYPE_SDR_200 | \
+						 EXT_CSD_CARD_TYPE_DDR_52 | \
+						 EXT_CSD_CARD_TYPE_52 | \
+						 EXT_CSD_CARD_TYPE_26)
 
 #define EXT_CSD_BUS_WIDTH_1	0	/* Card is in 1 bit mode */
 #define EXT_CSD_BUS_WIDTH_4	1	/* Card is in 4 bit mode */

diff --git a/include/linux/mmc/sdhci-pci-data.h b/include/linux/mmc/sdhci-pci-data.h
new file mode 100644
index 0000000..8959604
--- /dev/null
+++ b/include/linux/mmc/sdhci-pci-data.h

@@ -0,0 +1,18 @@
+#ifndef LINUX_MMC_SDHCI_PCI_DATA_H
+#define LINUX_MMC_SDHCI_PCI_DATA_H
+
+struct pci_dev;
+
+struct sdhci_pci_data {
+	struct pci_dev	*pdev;
+	int		slotno;
+	int		rst_n_gpio; /* Set to -EINVAL if unused */
+	int		cd_gpio;    /* Set to -EINVAL if unused */
+	int		(*setup)(struct sdhci_pci_data *data);
+	void		(*cleanup)(struct sdhci_pci_data *data);
+};
+
+extern struct sdhci_pci_data *(*sdhci_pci_get_data)(struct pci_dev *pdev,
+				int slotno);
+
+#endif

diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index e4b6935..c750f85 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h

@@ -90,8 +90,6 @@
 
 	unsigned int quirks2;	/* More deviations from spec. */
 
-#define SDHCI_QUIRK2_OWN_CARD_DETECTION			(1<<0)
-
 	int irq;		/* Device IRQ */
 	void __iomem *ioaddr;	/* Mapped address */
 
@@ -121,6 +119,7 @@
 #define SDHCI_AUTO_CMD23	(1<<7)	/* Auto CMD23 support */
 #define SDHCI_PV_ENABLED	(1<<8)	/* Preset value enabled */
 #define SDHCI_SDIO_IRQ_ENABLED	(1<<9)	/* SDIO irq enabled */
+#define SDHCI_HS200_NEEDS_TUNING (1<<10)	/* HS200 needs tuning */
 
 	unsigned int version;	/* SDHCI spec. version */
 

diff --git a/include/linux/mmc/sdio.h b/include/linux/mmc/sdio.h
index e0b1123..c9fe66c 100644
--- a/include/linux/mmc/sdio.h
+++ b/include/linux/mmc/sdio.h

@@ -38,6 +38,7 @@
  *      [8:0] Byte/block count
  */
 
+#define R4_18V_PRESENT (1<<24)
 #define R4_MEMORY_PRESENT (1 << 27)
 
 /*
@@ -85,6 +86,7 @@
 #define  SDIO_SD_REV_1_01	0	/* SD Physical Spec Version 1.01 */
 #define  SDIO_SD_REV_1_10	1	/* SD Physical Spec Version 1.10 */
 #define  SDIO_SD_REV_2_00	2	/* SD Physical Spec Version 2.00 */
+#define  SDIO_SD_REV_3_00	3	/* SD Physical Spev Version 3.00 */
 
 #define SDIO_CCCR_IOEx		0x02
 #define SDIO_CCCR_IORx		0x03
@@ -134,8 +136,31 @@
 #define SDIO_CCCR_SPEED		0x13
 
 #define  SDIO_SPEED_SHS		0x01	/* Supports High-Speed mode */
-#define  SDIO_SPEED_EHS		0x02	/* Enable High-Speed mode */
+#define  SDIO_SPEED_BSS_SHIFT	1
+#define  SDIO_SPEED_BSS_MASK	(7<<SDIO_SPEED_BSS_SHIFT)
+#define  SDIO_SPEED_SDR12	(0<<SDIO_SPEED_BSS_SHIFT)
+#define  SDIO_SPEED_SDR25	(1<<SDIO_SPEED_BSS_SHIFT)
+#define  SDIO_SPEED_SDR50	(2<<SDIO_SPEED_BSS_SHIFT)
+#define  SDIO_SPEED_SDR104	(3<<SDIO_SPEED_BSS_SHIFT)
+#define  SDIO_SPEED_DDR50	(4<<SDIO_SPEED_BSS_SHIFT)
+#define  SDIO_SPEED_EHS		SDIO_SPEED_SDR25	/* Enable High-Speed */
 
+#define SDIO_CCCR_UHS		0x14
+#define  SDIO_UHS_SDR50		0x01
+#define  SDIO_UHS_SDR104	0x02
+#define  SDIO_UHS_DDR50		0x04
+
+#define SDIO_CCCR_DRIVE_STRENGTH 0x15
+#define  SDIO_SDTx_MASK		0x07
+#define  SDIO_DRIVE_SDTA	(1<<0)
+#define  SDIO_DRIVE_SDTC	(1<<1)
+#define  SDIO_DRIVE_SDTD	(1<<2)
+#define  SDIO_DRIVE_DTSx_MASK	0x03
+#define  SDIO_DRIVE_DTSx_SHIFT	4
+#define  SDIO_DTSx_SET_TYPE_B	(0 << SDIO_DRIVE_DTSx_SHIFT)
+#define  SDIO_DTSx_SET_TYPE_A	(1 << SDIO_DRIVE_DTSx_SHIFT)
+#define  SDIO_DTSx_SET_TYPE_C	(2 << SDIO_DRIVE_DTSx_SHIFT)
+#define  SDIO_DTSx_SET_TYPE_D	(3 << SDIO_DRIVE_DTSx_SHIFT)
 /*
  * Function Basic Registers (FBR)
  */

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ca6ca92..650ba2f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h

@@ -140,25 +140,29 @@
 	NR_LRU_LISTS
 };
 
-#define for_each_lru(l) for (l = 0; l < NR_LRU_LISTS; l++)
+#define for_each_lru(lru) for (lru = 0; lru < NR_LRU_LISTS; lru++)
 
-#define for_each_evictable_lru(l) for (l = 0; l <= LRU_ACTIVE_FILE; l++)
+#define for_each_evictable_lru(lru) for (lru = 0; lru <= LRU_ACTIVE_FILE; lru++)
 
-static inline int is_file_lru(enum lru_list l)
+static inline int is_file_lru(enum lru_list lru)
 {
-	return (l == LRU_INACTIVE_FILE || l == LRU_ACTIVE_FILE);
+	return (lru == LRU_INACTIVE_FILE || lru == LRU_ACTIVE_FILE);
 }
 
-static inline int is_active_lru(enum lru_list l)
+static inline int is_active_lru(enum lru_list lru)
 {
-	return (l == LRU_ACTIVE_ANON || l == LRU_ACTIVE_FILE);
+	return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE);
 }
 
-static inline int is_unevictable_lru(enum lru_list l)
+static inline int is_unevictable_lru(enum lru_list lru)
 {
-	return (l == LRU_UNEVICTABLE);
+	return (lru == LRU_UNEVICTABLE);
 }
 
+struct lruvec {
+	struct list_head lists[NR_LRU_LISTS];
+};
+
 /* Mask used at gathering information at once (see memcontrol.c) */
 #define LRU_ALL_FILE (BIT(LRU_INACTIVE_FILE) | BIT(LRU_ACTIVE_FILE))
 #define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON))
@@ -173,6 +177,8 @@
 #define ISOLATE_CLEAN		((__force isolate_mode_t)0x4)
 /* Isolate unmapped file */
 #define ISOLATE_UNMAPPED	((__force isolate_mode_t)0x8)
+/* Isolate for asynchronous migration */
+#define ISOLATE_ASYNC_MIGRATE	((__force isolate_mode_t)0x10)
 
 /* LRU Isolation modes. */
 typedef unsigned __bitwise__ isolate_mode_t;
@@ -364,10 +370,8 @@
 	ZONE_PADDING(_pad1_)
 
 	/* Fields commonly accessed by the page reclaim scanner */
-	spinlock_t		lru_lock;	
-	struct zone_lru {
-		struct list_head list;
-	} lru[NR_LRU_LISTS];
+	spinlock_t		lru_lock;
+	struct lruvec		lruvec;
 
 	struct zone_reclaim_stat reclaim_stat;
 

diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 83ac071..b29e7f6 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h

@@ -436,6 +436,17 @@
 			__attribute__((aligned(sizeof(kernel_ulong_t))));
 };
 
+/* mcp */
+
+#define MCP_NAME_SIZE	20
+#define MCP_MODULE_PREFIX "mcp:"
+
+struct mcp_device_id {
+	char name[MCP_NAME_SIZE];
+	kernel_ulong_t driver_data	/* Data private to the driver */
+			__attribute__((aligned(sizeof(kernel_ulong_t))));
+};
+
 /* dmi */
 enum dmi_field {
 	DMI_NONE,

diff --git a/include/linux/oom.h b/include/linux/oom.h
index 6f9d04a..552fba9 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h

@@ -43,7 +43,7 @@
 extern void compare_swap_oom_score_adj(int old_val, int new_val);
 extern int test_set_oom_score_adj(int new_val);
 
-extern unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
+extern unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
 			const nodemask_t *nodemask, unsigned long totalpages);
 extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
 extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 961ecc7..a2d1177 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h

@@ -10,8 +10,6 @@
 	/* flags for mem_cgroup and file and I/O status */
 	PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
 	PCG_FILE_MAPPED, /* page is accounted as "mapped" */
-	/* No lock in page_cgroup */
-	PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
 	__NR_PCG_FLAGS,
 };
 
@@ -31,7 +29,6 @@
 struct page_cgroup {
 	unsigned long flags;
 	struct mem_cgroup *mem_cgroup;
-	struct list_head lru;		/* per cgroup LRU list */
 };
 
 void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
@@ -76,12 +73,6 @@
 CLEARPCGFLAG(Used, USED)
 SETPCGFLAG(Used, USED)
 
-SETPCGFLAG(AcctLRU, ACCT_LRU)
-CLEARPCGFLAG(AcctLRU, ACCT_LRU)
-TESTPCGFLAG(AcctLRU, ACCT_LRU)
-TESTCLEARPCGFLAG(AcctLRU, ACCT_LRU)
-
-
 SETPCGFLAG(FileMapped, FILE_MAPPED)
 CLEARPCGFLAG(FileMapped, FILE_MAPPED)
 TESTPCGFLAG(FileMapped, FILE_MAPPED)
@@ -122,39 +113,6 @@
 	local_irq_restore(*flags);
 }
 
-#ifdef CONFIG_SPARSEMEM
-#define PCG_ARRAYID_WIDTH	SECTIONS_SHIFT
-#else
-#define PCG_ARRAYID_WIDTH	NODES_SHIFT
-#endif
-
-#if (PCG_ARRAYID_WIDTH > BITS_PER_LONG - NR_PCG_FLAGS)
-#error Not enough space left in pc->flags to store page_cgroup array IDs
-#endif
-
-/* pc->flags: ARRAY-ID | FLAGS */
-
-#define PCG_ARRAYID_MASK	((1UL << PCG_ARRAYID_WIDTH) - 1)
-
-#define PCG_ARRAYID_OFFSET	(BITS_PER_LONG - PCG_ARRAYID_WIDTH)
-/*
- * Zero the shift count for non-existent fields, to prevent compiler
- * warnings and ensure references are optimized away.
- */
-#define PCG_ARRAYID_SHIFT	(PCG_ARRAYID_OFFSET * (PCG_ARRAYID_WIDTH != 0))
-
-static inline void set_page_cgroup_array_id(struct page_cgroup *pc,
-					    unsigned long id)
-{
-	pc->flags &= ~(PCG_ARRAYID_MASK << PCG_ARRAYID_SHIFT);
-	pc->flags |= (id & PCG_ARRAYID_MASK) << PCG_ARRAYID_SHIFT;
-}
-
-static inline unsigned long page_cgroup_array_id(struct page_cgroup *pc)
-{
-	return (pc->flags >> PCG_ARRAYID_SHIFT) & PCG_ARRAYID_MASK;
-}
-
 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
 struct page_cgroup;
 
@@ -183,7 +141,7 @@
 extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
 					unsigned short old, unsigned short new);
 extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
-extern unsigned short lookup_swap_cgroup(swp_entry_t ent);
+extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent);
 extern int swap_cgroup_swapon(int type, unsigned long max_pages);
 extern void swap_cgroup_swapoff(int type);
 #else
@@ -195,7 +153,7 @@
 }
 
 static inline
-unsigned short lookup_swap_cgroup(swp_entry_t ent)
+unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
 {
 	return 0;
 }

diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index ed17024..2aa12b8 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h

@@ -21,8 +21,7 @@
 };
 
 void __pagevec_release(struct pagevec *pvec);
-void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru);
-void pagevec_strip(struct pagevec *pvec);
+void __pagevec_lru_add(struct pagevec *pvec, enum lru_list lru);
 unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
 		pgoff_t start, unsigned nr_pages);
 unsigned pagevec_lookup_tag(struct pagevec *pvec,
@@ -59,7 +58,6 @@
 	return pagevec_space(pvec);
 }
 
-
 static inline void pagevec_release(struct pagevec *pvec)
 {
 	if (pagevec_count(pvec))
@@ -68,22 +66,22 @@
 
 static inline void __pagevec_lru_add_anon(struct pagevec *pvec)
 {
-	____pagevec_lru_add(pvec, LRU_INACTIVE_ANON);
+	__pagevec_lru_add(pvec, LRU_INACTIVE_ANON);
 }
 
 static inline void __pagevec_lru_add_active_anon(struct pagevec *pvec)
 {
-	____pagevec_lru_add(pvec, LRU_ACTIVE_ANON);
+	__pagevec_lru_add(pvec, LRU_ACTIVE_ANON);
 }
 
 static inline void __pagevec_lru_add_file(struct pagevec *pvec)
 {
-	____pagevec_lru_add(pvec, LRU_INACTIVE_FILE);
+	__pagevec_lru_add(pvec, LRU_INACTIVE_FILE);
 }
 
 static inline void __pagevec_lru_add_active_file(struct pagevec *pvec)
 {
-	____pagevec_lru_add(pvec, LRU_ACTIVE_FILE);
+	__pagevec_lru_add(pvec, LRU_ACTIVE_FILE);
 }
 
 static inline void pagevec_lru_add_file(struct pagevec *pvec)

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 79f337c..c599f7ec 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h

@@ -129,7 +129,12 @@
 };
 #define to_mii_bus(d) container_of(d, struct mii_bus, dev)
 
-struct mii_bus *mdiobus_alloc(void);
+struct mii_bus *mdiobus_alloc_size(size_t);
+static inline struct mii_bus *mdiobus_alloc(void)
+{
+	return mdiobus_alloc_size(0);
+}
+
 int mdiobus_register(struct mii_bus *bus);
 void mdiobus_unregister(struct mii_bus *bus);
 void mdiobus_free(struct mii_bus *bus);

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 8f1b928..0d5b793 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h

@@ -162,10 +162,30 @@
 	unsigned	flows;		/* Maximal number of flows  */
 };
 
+struct tc_sfqred_stats {
+	__u32           prob_drop;      /* Early drops, below max threshold */
+	__u32           forced_drop;	/* Early drops, after max threshold */
+	__u32           prob_mark;      /* Marked packets, below max threshold */
+	__u32           forced_mark;    /* Marked packets, after max threshold */
+	__u32           prob_mark_head; /* Marked packets, below max threshold */
+	__u32           forced_mark_head;/* Marked packets, after max threshold */
+};
+
 struct tc_sfq_qopt_v1 {
 	struct tc_sfq_qopt v0;
 	unsigned int	depth;		/* max number of packets per flow */
 	unsigned int	headdrop;
+/* SFQRED parameters */
+	__u32		limit;		/* HARD maximal flow queue length (bytes) */
+	__u32		qth_min;	/* Min average length threshold (bytes) */
+	__u32		qth_max;	/* Max average length threshold (bytes) */
+	unsigned char   Wlog;		/* log(W)		*/
+	unsigned char   Plog;		/* log(P_max/(qth_max-qth_min))	*/
+	unsigned char   Scell_log;	/* cell size for idle damping */
+	unsigned char	flags;
+	__u32		max_P;		/* probability, high resolution */
+/* SFQRED stats */
+	struct tc_sfqred_stats stats;
 };
 
 

diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index a3baeb2..7ddc7f1 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h

@@ -102,4 +102,16 @@
 
 #define PR_MCE_KILL_GET 34
 
+/*
+ * Tune up process memory map specifics.
+ */
+#define PR_SET_MM		35
+# define PR_SET_MM_START_CODE		1
+# define PR_SET_MM_END_CODE		2
+# define PR_SET_MM_START_DATA		3
+# define PR_SET_MM_END_DATA		4
+# define PR_SET_MM_START_STACK		5
+# define PR_SET_MM_START_BRK		6
+# define PR_SET_MM_BRK			7
+
 #endif /* _LINUX_PRCTL_H */

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 9d4539c..07e360b 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h

@@ -49,9 +49,6 @@
 #define RADIX_TREE_EXCEPTIONAL_ENTRY	2
 #define RADIX_TREE_EXCEPTIONAL_SHIFT	2
 
-#define radix_tree_indirect_to_ptr(ptr) \
-	radix_tree_indirect_to_ptr((void __force *)(ptr))
-
 static inline int radix_tree_is_indirect_ptr(void *ptr)
 {
 	return (int)((unsigned long)ptr & RADIX_TREE_INDIRECT_PTR);

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 1afb995..1cdd62a 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h

@@ -158,7 +158,7 @@
  * Called from mm/vmscan.c to handle paging out
  */
 int page_referenced(struct page *, int is_locked,
-			struct mem_cgroup *cnt, unsigned long *vm_flags);
+			struct mem_cgroup *memcg, unsigned long *vm_flags);
 int page_referenced_one(struct page *, struct vm_area_struct *,
 	unsigned long address, unsigned int *mapcount, unsigned long *vm_flags);
 
@@ -236,7 +236,7 @@
 #define anon_vma_link(vma)	do {} while (0)
 
 static inline int page_referenced(struct page *page, int is_locked,
-				  struct mem_cgroup *cnt,
+				  struct mem_cgroup *memcg,
 				  unsigned long *vm_flags)
 {
 	*vm_flags = 0;

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 21cd030..4032ec1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h

@@ -2275,7 +2275,7 @@
 extern void exit_itimers(struct signal_struct *);
 extern void flush_itimer_signals(void);
 
-extern NORET_TYPE void do_group_exit(int);
+extern void do_group_exit(int);
 
 extern void daemonize(const char *, ...);
 extern int allow_signal(int);

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 4c069d8..d0018d2 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h

@@ -25,71 +25,19 @@
 	void *priv;
 };
 
-/**
- * operations for virtqueue
- * virtqueue_add_buf: expose buffer to other end
- *	vq: the struct virtqueue we're talking about.
- *	sg: the description of the buffer(s).
- *	out_num: the number of sg readable by other side
- *	in_num: the number of sg which are writable (after readable ones)
- *	data: the token identifying the buffer.
- *	gfp: how to do memory allocations (if necessary).
- *      Returns remaining capacity of queue (sg segments) or a negative error.
- * virtqueue_kick: update after add_buf
- *	vq: the struct virtqueue
- *	After one or more add_buf calls, invoke this to kick the other side.
- * virtqueue_get_buf: get the next used buffer
- *	vq: the struct virtqueue we're talking about.
- *	len: the length written into the buffer
- *	Returns NULL or the "data" token handed to add_buf.
- * virtqueue_disable_cb: disable callbacks
- *	vq: the struct virtqueue we're talking about.
- *	Note that this is not necessarily synchronous, hence unreliable and only
- *	useful as an optimization.
- * virtqueue_enable_cb: restart callbacks after disable_cb.
- *	vq: the struct virtqueue we're talking about.
- *	This re-enables callbacks; it returns "false" if there are pending
- *	buffers in the queue, to detect a possible race between the driver
- *	checking for more work, and enabling callbacks.
- * virtqueue_enable_cb_delayed: restart callbacks after disable_cb.
- *	vq: the struct virtqueue we're talking about.
- *	This re-enables callbacks but hints to the other side to delay
- *	interrupts until most of the available buffers have been processed;
- *	it returns "false" if there are many pending buffers in the queue,
- *	to detect a possible race between the driver checking for more work,
- *	and enabling callbacks.
- * virtqueue_detach_unused_buf: detach first unused buffer
- * 	vq: the struct virtqueue we're talking about.
- * 	Returns NULL or the "data" token handed to add_buf
- * virtqueue_get_vring_size: return the size of the virtqueue's vring
- *	vq: the struct virtqueue containing the vring of interest.
- *	Returns the size of the vring.
- *
- * Locking rules are straightforward: the driver is responsible for
- * locking.  No two operations may be invoked simultaneously, with the exception
- * of virtqueue_disable_cb.
- *
- * All operations can be called in any context.
- */
-
-int virtqueue_add_buf_gfp(struct virtqueue *vq,
-			  struct scatterlist sg[],
-			  unsigned int out_num,
-			  unsigned int in_num,
-			  void *data,
-			  gfp_t gfp);
-
-static inline int virtqueue_add_buf(struct virtqueue *vq,
-				    struct scatterlist sg[],
-				    unsigned int out_num,
-				    unsigned int in_num,
-				    void *data)
-{
-	return virtqueue_add_buf_gfp(vq, sg, out_num, in_num, data, GFP_ATOMIC);
-}
+int virtqueue_add_buf(struct virtqueue *vq,
+		      struct scatterlist sg[],
+		      unsigned int out_num,
+		      unsigned int in_num,
+		      void *data,
+		      gfp_t gfp);
 
 void virtqueue_kick(struct virtqueue *vq);
 
+bool virtqueue_kick_prepare(struct virtqueue *vq);
+
+void virtqueue_notify(struct virtqueue *vq);
+
 void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
 
 void virtqueue_disable_cb(struct virtqueue *vq);
@@ -146,6 +94,11 @@
 	int (*probe)(struct virtio_device *dev);
 	void (*remove)(struct virtio_device *dev);
 	void (*config_changed)(struct virtio_device *dev);
+#ifdef CONFIG_PM
+	int (*freeze)(struct virtio_device *dev);
+	int (*thaw)(struct virtio_device *dev);
+	int (*restore)(struct virtio_device *dev);
+#endif
 };
 
 int register_virtio_driver(struct virtio_driver *drv);

diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index 36be0f6..e338730 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h

@@ -168,6 +168,7 @@
 struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      unsigned int vring_align,
 				      struct virtio_device *vdev,
+				      bool weak_barriers,
 				      void *pages,
 				      void (*notify)(struct virtqueue *vq),
 				      void (*callback)(struct virtqueue *vq),

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 5e2e984..ea9231f 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h

@@ -127,7 +127,7 @@
 	__u8		major_class;
 	__u8		minor_class;
 	__u8		features[8];
-	__u8		extfeatures[8];
+	__u8		host_features[8];
 	__u8		commands[64];
 	__u8		ssp_mode;
 	__u8		hci_ver;
@@ -676,7 +676,7 @@
 #define lmp_le_capable(dev)        ((dev)->features[4] & LMP_LE)
 
 /* ----- Extended LMP capabilities ----- */
-#define lmp_host_le_capable(dev)   ((dev)->extfeatures[0] & LMP_HOST_LE)
+#define lmp_host_le_capable(dev)   ((dev)->host_features[0] & LMP_HOST_LE)
 
 /* ----- HCI protocols ----- */
 static inline int hci_proto_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr,

diff --git a/include/net/red.h b/include/net/red.h
index baab385..28068ec 100644
--- a/include/net/red.h
+++ b/include/net/red.h

@@ -199,7 +199,8 @@
 	p->Scell_log	= Scell_log;
 	p->Scell_max	= (255 << Scell_log);
 
-	memcpy(p->Stab, stab, sizeof(p->Stab));
+	if (stab)
+		memcpy(p->Stab, stab, sizeof(p->Stab));
 }
 
 static inline int red_is_idling(const struct red_vars *v)

diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index edc4b3d..f64560e 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h

@@ -266,9 +266,10 @@
 		unsigned long nr_lumpy_taken,
 		unsigned long nr_lumpy_dirty,
 		unsigned long nr_lumpy_failed,
-		isolate_mode_t isolate_mode),
+		isolate_mode_t isolate_mode,
+		int file),
 
-	TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode),
+	TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file),
 
 	TP_STRUCT__entry(
 		__field(int, order)
@@ -279,6 +280,7 @@
 		__field(unsigned long, nr_lumpy_dirty)
 		__field(unsigned long, nr_lumpy_failed)
 		__field(isolate_mode_t, isolate_mode)
+		__field(int, file)
 	),
 
 	TP_fast_assign(
@@ -290,9 +292,10 @@
 		__entry->nr_lumpy_dirty = nr_lumpy_dirty;
 		__entry->nr_lumpy_failed = nr_lumpy_failed;
 		__entry->isolate_mode = isolate_mode;
+		__entry->file = file;
 	),
 
-	TP_printk("isolate_mode=%d order=%d nr_requested=%lu nr_scanned=%lu nr_taken=%lu contig_taken=%lu contig_dirty=%lu contig_failed=%lu",
+	TP_printk("isolate_mode=%d order=%d nr_requested=%lu nr_scanned=%lu nr_taken=%lu contig_taken=%lu contig_dirty=%lu contig_failed=%lu file=%d",
 		__entry->isolate_mode,
 		__entry->order,
 		__entry->nr_requested,
@@ -300,7 +303,8 @@
 		__entry->nr_taken,
 		__entry->nr_lumpy_taken,
 		__entry->nr_lumpy_dirty,
-		__entry->nr_lumpy_failed)
+		__entry->nr_lumpy_failed,
+		__entry->file)
 );
 
 DEFINE_EVENT(mm_vmscan_lru_isolate_template, mm_vmscan_lru_isolate,
@@ -312,9 +316,10 @@
 		unsigned long nr_lumpy_taken,
 		unsigned long nr_lumpy_dirty,
 		unsigned long nr_lumpy_failed,
-		isolate_mode_t isolate_mode),
+		isolate_mode_t isolate_mode,
+		int file),
 
-	TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode)
+	TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file)
 
 );
 
@@ -327,9 +332,10 @@
 		unsigned long nr_lumpy_taken,
 		unsigned long nr_lumpy_dirty,
 		unsigned long nr_lumpy_failed,
-		isolate_mode_t isolate_mode),
+		isolate_mode_t isolate_mode,
+		int file),
 
-	TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode)
+	TP_ARGS(order, nr_requested, nr_scanned, nr_taken, nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, isolate_mode, file)
 
 );
 

diff --git a/init/Kconfig b/init/Kconfig
index 018d206..6ac2236 100644
--- a/init/Kconfig
+++ b/init/Kconfig

@@ -783,6 +783,17 @@
 
 endif # CGROUPS
 
+config CHECKPOINT_RESTORE
+	bool "Checkpoint/restore support" if EXPERT
+	default n
+	help
+	  Enables additional kernel features in a sake of checkpoint/restore.
+	  In particular it adds auxiliary prctl codes to setup process text,
+	  data and heap segment sizes, and a few additional /proc filesystem
+	  entries.
+
+	  If unsure, say N here.
+
 menuconfig NAMESPACES
 	bool "Namespaces support" if EXPERT
 	default !EXPERT

diff --git a/kernel/exit.c b/kernel/exit.c
index 94ed6e2..c447382 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c

@@ -887,7 +887,7 @@
 static inline void check_stack_usage(void) {}
 #endif
 
-NORET_TYPE void do_exit(long code)
+void do_exit(long code)
 {
 	struct task_struct *tsk = current;
 	int group_dead;
@@ -1051,7 +1051,7 @@
 
 EXPORT_SYMBOL_GPL(do_exit);
 
-NORET_TYPE void complete_and_exit(struct completion *comp, long code)
+void complete_and_exit(struct completion *comp, long code)
 {
 	if (comp)
 		complete(comp);
@@ -1070,7 +1070,7 @@
  * Take down every thread in the group.  This is called by fatal signals
  * as well as by sys_exit_group (below).
  */
-NORET_TYPE void
+void
 do_group_exit(int exit_code)
 {
 	struct signal_struct *sig = current->signal;

diff --git a/kernel/kexec.c b/kernel/kexec.c
index 090ee10..7b08867 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c

@@ -32,7 +32,6 @@
 #include <linux/console.h>
 #include <linux/vmalloc.h>
 #include <linux/swap.h>
-#include <linux/kmsg_dump.h>
 #include <linux/syscore_ops.h>
 
 #include <asm/page.h>
@@ -1094,8 +1093,6 @@
 		if (kexec_crash_image) {
 			struct pt_regs fixed_regs;
 
-			kmsg_dump(KMSG_DUMP_KEXEC);
-
 			crash_setup_regs(&fixed_regs, regs);
 			crash_save_vmcoreinfo();
 			machine_crash_shutdown(&fixed_regs);
@@ -1132,6 +1129,8 @@
 {
 	int ret = 0;
 	unsigned long start, end;
+	unsigned long old_size;
+	struct resource *ram_res;
 
 	mutex_lock(&kexec_mutex);
 
@@ -1141,11 +1140,15 @@
 	}
 	start = crashk_res.start;
 	end = crashk_res.end;
+	old_size = (end == 0) ? 0 : end - start + 1;
+	if (new_size >= old_size) {
+		ret = (new_size == old_size) ? 0 : -EINVAL;
+		goto unlock;
+	}
 
-	if (new_size >= end - start + 1) {
-		ret = -EINVAL;
-		if (new_size == end - start + 1)
-			ret = 0;
+	ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL);
+	if (!ram_res) {
+		ret = -ENOMEM;
 		goto unlock;
 	}
 
@@ -1157,7 +1160,15 @@
 
 	if ((start == end) && (crashk_res.parent != NULL))
 		release_resource(&crashk_res);
+
+	ram_res->start = end;
+	ram_res->end = crashk_res.end;
+	ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
+	ram_res->name = "System RAM";
+
 	crashk_res.end = end - 1;
+
+	insert_resource(&iomem_resource, ram_res);
 	crash_unmap_reserved_pages();
 
 unlock:

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index e5d8464..95dd721 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c

@@ -2198,7 +2198,7 @@
 	       const char __user *user_buf, size_t count, loff_t *ppos)
 {
 	char buf[32];
-	int buf_size;
+	size_t buf_size;
 
 	buf_size = min(count, (sizeof(buf)-1));
 	if (copy_from_user(buf, user_buf, buf_size))

diff --git a/kernel/panic.c b/kernel/panic.c
index 3458469..80aed44 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c

@@ -49,6 +49,15 @@
 long (*panic_blink)(int state);
 EXPORT_SYMBOL(panic_blink);
 
+/*
+ * Stop ourself in panic -- architecture code may override this
+ */
+void __weak panic_smp_self_stop(void)
+{
+	while (1)
+		cpu_relax();
+}
+
 /**
  *	panic - halt the system
  *	@fmt: The text string to print
@@ -57,8 +66,9 @@
  *
  *	This function never returns.
  */
-NORET_TYPE void panic(const char * fmt, ...)
+void panic(const char *fmt, ...)
 {
+	static DEFINE_SPINLOCK(panic_lock);
 	static char buf[1024];
 	va_list args;
 	long i, i_next = 0;
@@ -68,8 +78,14 @@
 	 * It's possible to come here directly from a panic-assertion and
 	 * not have preempt disabled. Some functions called from here want
 	 * preempt to be disabled. No point enabling it later though...
+	 *
+	 * Only one CPU is allowed to execute the panic code from here. For
+	 * multiple parallel invocations of panic, all other CPUs either
+	 * stop themself or will wait until they are stopped by the 1st CPU
+	 * with smp_send_stop().
 	 */
-	preempt_disable();
+	if (!spin_trylock(&panic_lock))
+		panic_smp_self_stop();
 
 	console_verbose();
 	bust_spinlocks(1);
@@ -78,7 +94,11 @@
 	va_end(args);
 	printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf);
 #ifdef CONFIG_DEBUG_BUGVERBOSE
-	dump_stack();
+	/*
+	 * Avoid nested stack-dumping if a panic occurs during oops processing
+	 */
+	if (!oops_in_progress)
+		dump_stack();
 #endif
 
 	/*

diff --git a/kernel/pid.c b/kernel/pid.c
index fa5f722..ce8e00d 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c

@@ -137,7 +137,9 @@
 }
 
 /*
- * We might be racing with someone else trying to set pid_ns->last_pid.
+ * We might be racing with someone else trying to set pid_ns->last_pid
+ * at the pid allocation time (there's also a sysctl for this, but racing
+ * with this one is OK, see comment in kernel/pid_namespace.c about it).
  * We want the winner to have the "later" value, because if the
  * "earlier" value prevails, then a pid may get reused immediately.
  *

diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index e9c9adc..a896839 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c

@@ -191,9 +191,40 @@
 	return;
 }
 
+static int pid_ns_ctl_handler(struct ctl_table *table, int write,
+		void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ctl_table tmp = *table;
+
+	if (write && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	/*
+	 * Writing directly to ns' last_pid field is OK, since this field
+	 * is volatile in a living namespace anyway and a code writing to
+	 * it should synchronize its usage with external means.
+	 */
+
+	tmp.data = &current->nsproxy->pid_ns->last_pid;
+	return proc_dointvec(&tmp, write, buffer, lenp, ppos);
+}
+
+static struct ctl_table pid_ns_ctl_table[] = {
+	{
+		.procname = "ns_last_pid",
+		.maxlen = sizeof(int),
+		.mode = 0666, /* permissions are checked in the handler */
+		.proc_handler = pid_ns_ctl_handler,
+	},
+	{ }
+};
+
+static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } };
+
 static __init int pid_namespaces_init(void)
 {
 	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
+	register_sysctl_paths(kern_path, pid_ns_ctl_table);
 	return 0;
 }
 

diff --git a/kernel/sys.c b/kernel/sys.c
index ddf8155..4070153 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c

@@ -1692,6 +1692,124 @@
 	return mask;
 }
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static int prctl_set_mm(int opt, unsigned long addr,
+			unsigned long arg4, unsigned long arg5)
+{
+	unsigned long rlim = rlimit(RLIMIT_DATA);
+	unsigned long vm_req_flags;
+	unsigned long vm_bad_flags;
+	struct vm_area_struct *vma;
+	int error = 0;
+	struct mm_struct *mm = current->mm;
+
+	if (arg4 | arg5)
+		return -EINVAL;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (addr >= TASK_SIZE)
+		return -EINVAL;
+
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, addr);
+
+	if (opt != PR_SET_MM_START_BRK && opt != PR_SET_MM_BRK) {
+		/* It must be existing VMA */
+		if (!vma || vma->vm_start > addr)
+			goto out;
+	}
+
+	error = -EINVAL;
+	switch (opt) {
+	case PR_SET_MM_START_CODE:
+	case PR_SET_MM_END_CODE:
+		vm_req_flags = VM_READ | VM_EXEC;
+		vm_bad_flags = VM_WRITE | VM_MAYSHARE;
+
+		if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
+		    (vma->vm_flags & vm_bad_flags))
+			goto out;
+
+		if (opt == PR_SET_MM_START_CODE)
+			mm->start_code = addr;
+		else
+			mm->end_code = addr;
+		break;
+
+	case PR_SET_MM_START_DATA:
+	case PR_SET_MM_END_DATA:
+		vm_req_flags = VM_READ | VM_WRITE;
+		vm_bad_flags = VM_EXEC | VM_MAYSHARE;
+
+		if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
+		    (vma->vm_flags & vm_bad_flags))
+			goto out;
+
+		if (opt == PR_SET_MM_START_DATA)
+			mm->start_data = addr;
+		else
+			mm->end_data = addr;
+		break;
+
+	case PR_SET_MM_START_STACK:
+
+#ifdef CONFIG_STACK_GROWSUP
+		vm_req_flags = VM_READ | VM_WRITE | VM_GROWSUP;
+#else
+		vm_req_flags = VM_READ | VM_WRITE | VM_GROWSDOWN;
+#endif
+		if ((vma->vm_flags & vm_req_flags) != vm_req_flags)
+			goto out;
+
+		mm->start_stack = addr;
+		break;
+
+	case PR_SET_MM_START_BRK:
+		if (addr <= mm->end_data)
+			goto out;
+
+		if (rlim < RLIM_INFINITY &&
+		    (mm->brk - addr) +
+		    (mm->end_data - mm->start_data) > rlim)
+			goto out;
+
+		mm->start_brk = addr;
+		break;
+
+	case PR_SET_MM_BRK:
+		if (addr <= mm->end_data)
+			goto out;
+
+		if (rlim < RLIM_INFINITY &&
+		    (addr - mm->start_brk) +
+		    (mm->end_data - mm->start_data) > rlim)
+			goto out;
+
+		mm->brk = addr;
+		break;
+
+	default:
+		error = -EINVAL;
+		goto out;
+	}
+
+	error = 0;
+
+out:
+	up_read(&mm->mmap_sem);
+
+	return error;
+}
+#else /* CONFIG_CHECKPOINT_RESTORE */
+static int prctl_set_mm(int opt, unsigned long addr,
+			unsigned long arg4, unsigned long arg5)
+{
+	return -EINVAL;
+}
+#endif
+
 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		unsigned long, arg4, unsigned long, arg5)
 {
@@ -1841,6 +1959,9 @@
 			else
 				error = PR_MCE_KILL_DEFAULT;
 			break;
+		case PR_SET_MM:
+			error = prctl_set_mm(arg2, arg3, arg4, arg5);
+			break;
 		default:
 			error = -EINVAL;
 			break;

diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
index 5a7a2ad..4531294 100644
--- a/lib/decompress_unlzo.c
+++ b/lib/decompress_unlzo.c

@@ -279,7 +279,7 @@
 	ret = 0;
 exit_2:
 	if (!input)
-		free(in_buf);
+		free(in_buf_save);
 exit_1:
 	if (!output)
 		free(out_buf);

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index d9df745..dc63d08 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c

@@ -48,16 +48,14 @@
 struct radix_tree_node {
 	unsigned int	height;		/* Height from the bottom */
 	unsigned int	count;
-	struct rcu_head	rcu_head;
+	union {
+		struct radix_tree_node *parent;	/* Used when ascending tree */
+		struct rcu_head	rcu_head;	/* Used when freeing node */
+	};
 	void __rcu	*slots[RADIX_TREE_MAP_SIZE];
 	unsigned long	tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
 };
 
-struct radix_tree_path {
-	struct radix_tree_node *node;
-	int offset;
-};
-
 #define RADIX_TREE_INDEX_BITS  (8 /* CHAR_BIT */ * sizeof(unsigned long))
 #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
 					  RADIX_TREE_MAP_SHIFT))
@@ -256,6 +254,7 @@
 static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
 {
 	struct radix_tree_node *node;
+	struct radix_tree_node *slot;
 	unsigned int height;
 	int tag;
 
@@ -274,18 +273,23 @@
 		if (!(node = radix_tree_node_alloc(root)))
 			return -ENOMEM;
 
-		/* Increase the height.  */
-		node->slots[0] = indirect_to_ptr(root->rnode);
-
 		/* Propagate the aggregated tag info into the new root */
 		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
 			if (root_tag_get(root, tag))
 				tag_set(node, tag, 0);
 		}
 
+		/* Increase the height.  */
 		newheight = root->height+1;
 		node->height = newheight;
 		node->count = 1;
+		node->parent = NULL;
+		slot = root->rnode;
+		if (newheight > 1) {
+			slot = indirect_to_ptr(slot);
+			slot->parent = node;
+		}
+		node->slots[0] = slot;
 		node = ptr_to_indirect(node);
 		rcu_assign_pointer(root->rnode, node);
 		root->height = newheight;
@@ -331,6 +335,7 @@
 			if (!(slot = radix_tree_node_alloc(root)))
 				return -ENOMEM;
 			slot->height = height;
+			slot->parent = node;
 			if (node) {
 				rcu_assign_pointer(node->slots[offset], slot);
 				node->count++;
@@ -504,47 +509,41 @@
 void *radix_tree_tag_clear(struct radix_tree_root *root,
 			unsigned long index, unsigned int tag)
 {
-	/*
-	 * The radix tree path needs to be one longer than the maximum path
-	 * since the "list" is null terminated.
-	 */
-	struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path;
+	struct radix_tree_node *node = NULL;
 	struct radix_tree_node *slot = NULL;
 	unsigned int height, shift;
+	int uninitialized_var(offset);
 
 	height = root->height;
 	if (index > radix_tree_maxindex(height))
 		goto out;
 
-	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
-	pathp->node = NULL;
+	shift = height * RADIX_TREE_MAP_SHIFT;
 	slot = indirect_to_ptr(root->rnode);
 
-	while (height > 0) {
-		int offset;
-
+	while (shift) {
 		if (slot == NULL)
 			goto out;
 
-		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
-		pathp[1].offset = offset;
-		pathp[1].node = slot;
-		slot = slot->slots[offset];
-		pathp++;
 		shift -= RADIX_TREE_MAP_SHIFT;
-		height--;
+		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+		node = slot;
+		slot = slot->slots[offset];
 	}
 
 	if (slot == NULL)
 		goto out;
 
-	while (pathp->node) {
-		if (!tag_get(pathp->node, tag, pathp->offset))
+	while (node) {
+		if (!tag_get(node, tag, offset))
 			goto out;
-		tag_clear(pathp->node, tag, pathp->offset);
-		if (any_tag_set(pathp->node, tag))
+		tag_clear(node, tag, offset);
+		if (any_tag_set(node, tag))
 			goto out;
-		pathp--;
+
+		index >>= RADIX_TREE_MAP_SHIFT;
+		offset = index & RADIX_TREE_MAP_MASK;
+		node = node->parent;
 	}
 
 	/* clear the root's tag bit */
@@ -646,8 +645,7 @@
 		unsigned int iftag, unsigned int settag)
 {
 	unsigned int height = root->height;
-	struct radix_tree_path path[height];
-	struct radix_tree_path *pathp = path;
+	struct radix_tree_node *node = NULL;
 	struct radix_tree_node *slot;
 	unsigned int shift;
 	unsigned long tagged = 0;
@@ -671,14 +669,8 @@
 	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
 	slot = indirect_to_ptr(root->rnode);
 
-	/*
-	 * we fill the path from (root->height - 2) to 0, leaving the index at
-	 * (root->height - 1) as a terminator. Zero the node in the terminator
-	 * so that we can use this to end walk loops back up the path.
-	 */
-	path[height - 1].node = NULL;
-
 	for (;;) {
+		unsigned long upindex;
 		int offset;
 
 		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
@@ -686,12 +678,10 @@
 			goto next;
 		if (!tag_get(slot, iftag, offset))
 			goto next;
-		if (height > 1) {
+		if (shift) {
 			/* Go down one level */
-			height--;
 			shift -= RADIX_TREE_MAP_SHIFT;
-			path[height - 1].node = slot;
-			path[height - 1].offset = offset;
+			node = slot;
 			slot = slot->slots[offset];
 			continue;
 		}
@@ -701,15 +691,27 @@
 		tag_set(slot, settag, offset);
 
 		/* walk back up the path tagging interior nodes */
-		pathp = &path[0];
-		while (pathp->node) {
+		upindex = index;
+		while (node) {
+			upindex >>= RADIX_TREE_MAP_SHIFT;
+			offset = upindex & RADIX_TREE_MAP_MASK;
+
 			/* stop if we find a node with the tag already set */
-			if (tag_get(pathp->node, settag, pathp->offset))
+			if (tag_get(node, settag, offset))
 				break;
-			tag_set(pathp->node, settag, pathp->offset);
-			pathp++;
+			tag_set(node, settag, offset);
+			node = node->parent;
 		}
 
+		/*
+		 * Small optimization: now clear that node pointer.
+		 * Since all of this slot's ancestors now have the tag set
+		 * from setting it above, we have no further need to walk
+		 * back up the tree setting tags, until we update slot to
+		 * point to another radix_tree_node.
+		 */
+		node = NULL;
+
 next:
 		/* Go to next item at level determined by 'shift' */
 		index = ((index >> shift) + 1) << shift;
@@ -724,8 +726,7 @@
 			 * last_index is guaranteed to be in the tree, what
 			 * we do below cannot wander astray.
 			 */
-			slot = path[height - 1].node;
-			height++;
+			slot = slot->parent;
 			shift += RADIX_TREE_MAP_SHIFT;
 		}
 	}
@@ -1299,7 +1300,7 @@
 	/* try to shrink tree height */
 	while (root->height > 0) {
 		struct radix_tree_node *to_free = root->rnode;
-		void *newptr;
+		struct radix_tree_node *slot;
 
 		BUG_ON(!radix_tree_is_indirect_ptr(to_free));
 		to_free = indirect_to_ptr(to_free);
@@ -1320,10 +1321,12 @@
 		 * (to_free->slots[0]), it will be safe to dereference the new
 		 * one (root->rnode) as far as dependent read barriers go.
 		 */
-		newptr = to_free->slots[0];
-		if (root->height > 1)
-			newptr = ptr_to_indirect(newptr);
-		root->rnode = newptr;
+		slot = to_free->slots[0];
+		if (root->height > 1) {
+			slot->parent = NULL;
+			slot = ptr_to_indirect(slot);
+		}
+		root->rnode = slot;
 		root->height--;
 
 		/*
@@ -1363,16 +1366,12 @@
  */
 void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
 {
-	/*
-	 * The radix tree path needs to be one longer than the maximum path
-	 * since the "list" is null terminated.
-	 */
-	struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path;
+	struct radix_tree_node *node = NULL;
 	struct radix_tree_node *slot = NULL;
 	struct radix_tree_node *to_free;
 	unsigned int height, shift;
 	int tag;
-	int offset;
+	int uninitialized_var(offset);
 
 	height = root->height;
 	if (index > radix_tree_maxindex(height))
@@ -1385,39 +1384,35 @@
 		goto out;
 	}
 	slot = indirect_to_ptr(slot);
-
-	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
-	pathp->node = NULL;
+	shift = height * RADIX_TREE_MAP_SHIFT;
 
 	do {
 		if (slot == NULL)
 			goto out;
 
-		pathp++;
-		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
-		pathp->offset = offset;
-		pathp->node = slot;
-		slot = slot->slots[offset];
 		shift -= RADIX_TREE_MAP_SHIFT;
-		height--;
-	} while (height > 0);
+		offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+		node = slot;
+		slot = slot->slots[offset];
+	} while (shift);
 
 	if (slot == NULL)
 		goto out;
 
 	/*
-	 * Clear all tags associated with the just-deleted item
+	 * Clear all tags associated with the item to be deleted.
+	 * This way of doing it would be inefficient, but seldom is any set.
 	 */
 	for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
-		if (tag_get(pathp->node, tag, pathp->offset))
+		if (tag_get(node, tag, offset))
 			radix_tree_tag_clear(root, index, tag);
 	}
 
 	to_free = NULL;
 	/* Now free the nodes we do not need anymore */
-	while (pathp->node) {
-		pathp->node->slots[pathp->offset] = NULL;
-		pathp->node->count--;
+	while (node) {
+		node->slots[offset] = NULL;
+		node->count--;
 		/*
 		 * Queue the node for deferred freeing after the
 		 * last reference to it disappears (set NULL, above).
@@ -1425,17 +1420,20 @@
 		if (to_free)
 			radix_tree_node_free(to_free);
 
-		if (pathp->node->count) {
-			if (pathp->node == indirect_to_ptr(root->rnode))
+		if (node->count) {
+			if (node == indirect_to_ptr(root->rnode))
 				radix_tree_shrink(root);
 			goto out;
 		}
 
 		/* Node with zero slots in use so free it */
-		to_free = pathp->node;
-		pathp--;
+		to_free = node;
 
+		index >>= RADIX_TREE_MAP_SHIFT;
+		offset = index & RADIX_TREE_MAP_MASK;
+		node = node->parent;
 	}
+
 	root_tag_clear_all(root);
 	root->height = 0;
 	root->rnode = NULL;

diff --git a/mm/compaction.c b/mm/compaction.c
index e6670c3..71a58f6 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c

@@ -350,7 +350,7 @@
 		}
 
 		if (!cc->sync)
-			mode |= ISOLATE_CLEAN;
+			mode |= ISOLATE_ASYNC_MIGRATE;
 
 		/* Try isolate the page */
 		if (__isolate_lru_page(page, mode, 0) != 0)
@@ -557,7 +557,7 @@
 		nr_migrate = cc->nr_migratepages;
 		err = migrate_pages(&cc->migratepages, compaction_alloc,
 				(unsigned long)cc, false,
-				cc->sync);
+				cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC);
 		update_nr_listpages(cc);
 		nr_remaining = cc->nr_migratepages;
 
@@ -671,6 +671,7 @@
 			.nr_freepages = 0,
 			.nr_migratepages = 0,
 			.order = -1,
+			.sync = true,
 		};
 
 		zone = &pgdat->node_zones[zoneid];

diff --git a/mm/filemap.c b/mm/filemap.c
index c4ee2e9..97f49ed 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c

@@ -393,24 +393,11 @@
 int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 {
 	int error;
-	struct mem_cgroup *memcg = NULL;
 
 	VM_BUG_ON(!PageLocked(old));
 	VM_BUG_ON(!PageLocked(new));
 	VM_BUG_ON(new->mapping);
 
-	/*
-	 * This is not page migration, but prepare_migration and
-	 * end_migration does enough work for charge replacement.
-	 *
-	 * In the longer term we probably want a specialized function
-	 * for moving the charge from old to new in a more efficient
-	 * manner.
-	 */
-	error = mem_cgroup_prepare_migration(old, new, &memcg, gfp_mask);
-	if (error)
-		return error;
-
 	error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
 	if (!error) {
 		struct address_space *mapping = old->mapping;
@@ -432,13 +419,12 @@
 		if (PageSwapBacked(new))
 			__inc_zone_page_state(new, NR_SHMEM);
 		spin_unlock_irq(&mapping->tree_lock);
+		/* mem_cgroup codes must not be called under tree_lock */
+		mem_cgroup_replace_page_cache(old, new);
 		radix_tree_preload_end();
 		if (freepage)
 			freepage(old);
 		page_cache_release(old);
-		mem_cgroup_end_migration(memcg, old, new, true);
-	} else {
-		mem_cgroup_end_migration(memcg, old, new, false);
 	}
 
 	return error;

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 36b3d98..b3ffc21 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c

@@ -487,41 +487,68 @@
 	.attrs = khugepaged_attr,
 	.name = "khugepaged",
 };
+
+static int __init hugepage_init_sysfs(struct kobject **hugepage_kobj)
+{
+	int err;
+
+	*hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj);
+	if (unlikely(!*hugepage_kobj)) {
+		printk(KERN_ERR "hugepage: failed kobject create\n");
+		return -ENOMEM;
+	}
+
+	err = sysfs_create_group(*hugepage_kobj, &hugepage_attr_group);
+	if (err) {
+		printk(KERN_ERR "hugepage: failed register hugeage group\n");
+		goto delete_obj;
+	}
+
+	err = sysfs_create_group(*hugepage_kobj, &khugepaged_attr_group);
+	if (err) {
+		printk(KERN_ERR "hugepage: failed register hugeage group\n");
+		goto remove_hp_group;
+	}
+
+	return 0;
+
+remove_hp_group:
+	sysfs_remove_group(*hugepage_kobj, &hugepage_attr_group);
+delete_obj:
+	kobject_put(*hugepage_kobj);
+	return err;
+}
+
+static void __init hugepage_exit_sysfs(struct kobject *hugepage_kobj)
+{
+	sysfs_remove_group(hugepage_kobj, &khugepaged_attr_group);
+	sysfs_remove_group(hugepage_kobj, &hugepage_attr_group);
+	kobject_put(hugepage_kobj);
+}
+#else
+static inline int hugepage_init_sysfs(struct kobject **hugepage_kobj)
+{
+	return 0;
+}
+
+static inline void hugepage_exit_sysfs(struct kobject *hugepage_kobj)
+{
+}
 #endif /* CONFIG_SYSFS */
 
 static int __init hugepage_init(void)
 {
 	int err;
-#ifdef CONFIG_SYSFS
-	static struct kobject *hugepage_kobj;
-#endif
+	struct kobject *hugepage_kobj;
 
-	err = -EINVAL;
 	if (!has_transparent_hugepage()) {
 		transparent_hugepage_flags = 0;
-		goto out;
+		return -EINVAL;
 	}
 
-#ifdef CONFIG_SYSFS
-	err = -ENOMEM;
-	hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj);
-	if (unlikely(!hugepage_kobj)) {
-		printk(KERN_ERR "hugepage: failed kobject create\n");
-		goto out;
-	}
-
-	err = sysfs_create_group(hugepage_kobj, &hugepage_attr_group);
-	if (err) {
-		printk(KERN_ERR "hugepage: failed register hugeage group\n");
-		goto out;
-	}
-
-	err = sysfs_create_group(hugepage_kobj, &khugepaged_attr_group);
-	if (err) {
-		printk(KERN_ERR "hugepage: failed register hugeage group\n");
-		goto out;
-	}
-#endif
+	err = hugepage_init_sysfs(&hugepage_kobj);
+	if (err)
+		return err;
 
 	err = khugepaged_slab_init();
 	if (err)
@@ -545,7 +572,9 @@
 
 	set_recommended_min_free_kbytes();
 
+	return 0;
 out:
+	hugepage_exit_sysfs(hugepage_kobj);
 	return err;
 }
 module_init(hugepage_init)
@@ -997,7 +1026,7 @@
 }
 
 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
-		 pmd_t *pmd)
+		 pmd_t *pmd, unsigned long addr)
 {
 	int ret = 0;
 
@@ -1013,6 +1042,7 @@
 			pgtable = get_pmd_huge_pte(tlb->mm);
 			page = pmd_page(*pmd);
 			pmd_clear(pmd);
+			tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
 			page_remove_rmap(page);
 			VM_BUG_ON(page_mapcount(page) < 0);
 			add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
@@ -1116,7 +1146,6 @@
 			entry = pmd_modify(entry, newprot);
 			set_pmd_at(mm, addr, pmd, entry);
 			spin_unlock(&vma->vm_mm->page_table_lock);
-			flush_tlb_range(vma, addr, addr + HPAGE_PMD_SIZE);
 			ret = 1;
 		}
 	} else
@@ -1199,16 +1228,16 @@
 static void __split_huge_page_refcount(struct page *page)
 {
 	int i;
-	unsigned long head_index = page->index;
 	struct zone *zone = page_zone(page);
-	int zonestat;
 	int tail_count = 0;
 
 	/* prevent PageLRU to go away from under us, and freeze lru stats */
 	spin_lock_irq(&zone->lru_lock);
 	compound_lock(page);
+	/* complete memcg works before add pages to LRU */
+	mem_cgroup_split_huge_fixup(page);
 
-	for (i = 1; i < HPAGE_PMD_NR; i++) {
+	for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
 		struct page *page_tail = page + i;
 
 		/* tail_page->_mapcount cannot change */
@@ -1271,14 +1300,13 @@
 		BUG_ON(page_tail->mapping);
 		page_tail->mapping = page->mapping;
 
-		page_tail->index = ++head_index;
+		page_tail->index = page->index + i;
 
 		BUG_ON(!PageAnon(page_tail));
 		BUG_ON(!PageUptodate(page_tail));
 		BUG_ON(!PageDirty(page_tail));
 		BUG_ON(!PageSwapBacked(page_tail));
 
-		mem_cgroup_split_huge_fixup(page, page_tail);
 
 		lru_add_page_tail(zone, page, page_tail);
 	}
@@ -1288,15 +1316,6 @@
 	__dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
 	__mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR);
 
-	/*
-	 * A hugepage counts for HPAGE_PMD_NR pages on the LRU statistics,
-	 * so adjust those appropriately if this page is on the LRU.
-	 */
-	if (PageLRU(page)) {
-		zonestat = NR_LRU_BASE + page_lru(page);
-		__mod_zone_page_state(zone, zonestat, -(HPAGE_PMD_NR-1));
-	}
-
 	ClearPageCompound(page);
 	compound_unlock(page);
 	spin_unlock_irq(&zone->lru_lock);

diff --git a/mm/ksm.c b/mm/ksm.c
index 310544a..1925ffb 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c

@@ -28,6 +28,7 @@
 #include <linux/kthread.h>
 #include <linux/wait.h>
 #include <linux/slab.h>
+#include <linux/memcontrol.h>
 #include <linux/rbtree.h>
 #include <linux/memory.h>
 #include <linux/mmu_notifier.h>
@@ -1571,6 +1572,16 @@
 
 	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
 	if (new_page) {
+		/*
+		 * The memcg-specific accounting when moving
+		 * pages around the LRU lists relies on the
+		 * page's owner (memcg) to be valid.  Usually,
+		 * pages are assigned to a new owner before
+		 * being put on the LRU list, but since this
+		 * is not the case here, the stale owner from
+		 * a previous allocation cycle must be reset.
+		 */
+		mem_cgroup_reset_owner(new_page);
 		copy_user_highpage(new_page, page, address, vma);
 
 		SetPageDirty(new_page);

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d87aa35..602207b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c

@@ -123,16 +123,22 @@
 	unsigned long targets[MEM_CGROUP_NTARGETS];
 };
 
+struct mem_cgroup_reclaim_iter {
+	/* css_id of the last scanned hierarchy member */
+	int position;
+	/* scan generation, increased every round-trip */
+	unsigned int generation;
+};
+
 /*
  * per-zone information in memory controller.
  */
 struct mem_cgroup_per_zone {
-	/*
-	 * spin_lock to protect the per cgroup LRU
-	 */
-	struct list_head	lists[NR_LRU_LISTS];
+	struct lruvec		lruvec;
 	unsigned long		count[NR_LRU_LISTS];
 
+	struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1];
+
 	struct zone_reclaim_stat reclaim_stat;
 	struct rb_node		tree_node;	/* RB tree node */
 	unsigned long long	usage_in_excess;/* Set to the value by which */
@@ -233,11 +239,6 @@
 	 * per zone LRU lists.
 	 */
 	struct mem_cgroup_lru_info info;
-	/*
-	 * While reclaiming in a hierarchy, we cache the last child we
-	 * reclaimed from.
-	 */
-	int last_scanned_child;
 	int last_scanned_node;
 #if MAX_NUMNODES > 1
 	nodemask_t	scan_nodes;
@@ -366,8 +367,6 @@
 #define MEM_CGROUP_RECLAIM_NOSWAP	(1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT)
 #define MEM_CGROUP_RECLAIM_SHRINK_BIT	0x1
 #define MEM_CGROUP_RECLAIM_SHRINK	(1 << MEM_CGROUP_RECLAIM_SHRINK_BIT)
-#define MEM_CGROUP_RECLAIM_SOFT_BIT	0x2
-#define MEM_CGROUP_RECLAIM_SOFT		(1 << MEM_CGROUP_RECLAIM_SOFT_BIT)
 
 static void mem_cgroup_get(struct mem_cgroup *memcg);
 static void mem_cgroup_put(struct mem_cgroup *memcg);
@@ -566,7 +565,7 @@
 	struct mem_cgroup_per_zone *mz;
 	struct mem_cgroup_tree_per_zone *mctz;
 
-	for_each_node_state(node, N_POSSIBLE) {
+	for_each_node(node) {
 		for (zone = 0; zone < MAX_NR_ZONES; zone++) {
 			mz = mem_cgroup_zoneinfo(memcg, node, zone);
 			mctz = soft_limit_tree_node_zone(node, zone);
@@ -656,16 +655,6 @@
 	this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAPOUT], val);
 }
 
-void mem_cgroup_pgfault(struct mem_cgroup *memcg, int val)
-{
-	this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGFAULT], val);
-}
-
-void mem_cgroup_pgmajfault(struct mem_cgroup *memcg, int val)
-{
-	this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT], val);
-}
-
 static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
 					    enum mem_cgroup_events_index idx)
 {
@@ -749,37 +738,32 @@
 	return total;
 }
 
-static bool __memcg_event_check(struct mem_cgroup *memcg, int target)
+static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
+				       enum mem_cgroup_events_target target)
 {
 	unsigned long val, next;
 
 	val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]);
 	next = __this_cpu_read(memcg->stat->targets[target]);
 	/* from time_after() in jiffies.h */
-	return ((long)next - (long)val < 0);
-}
-
-static void __mem_cgroup_target_update(struct mem_cgroup *memcg, int target)
-{
-	unsigned long val, next;
-
-	val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]);
-
-	switch (target) {
-	case MEM_CGROUP_TARGET_THRESH:
-		next = val + THRESHOLDS_EVENTS_TARGET;
-		break;
-	case MEM_CGROUP_TARGET_SOFTLIMIT:
-		next = val + SOFTLIMIT_EVENTS_TARGET;
-		break;
-	case MEM_CGROUP_TARGET_NUMAINFO:
-		next = val + NUMAINFO_EVENTS_TARGET;
-		break;
-	default:
-		return;
+	if ((long)next - (long)val < 0) {
+		switch (target) {
+		case MEM_CGROUP_TARGET_THRESH:
+			next = val + THRESHOLDS_EVENTS_TARGET;
+			break;
+		case MEM_CGROUP_TARGET_SOFTLIMIT:
+			next = val + SOFTLIMIT_EVENTS_TARGET;
+			break;
+		case MEM_CGROUP_TARGET_NUMAINFO:
+			next = val + NUMAINFO_EVENTS_TARGET;
+			break;
+		default:
+			break;
+		}
+		__this_cpu_write(memcg->stat->targets[target], next);
+		return true;
 	}
-
-	__this_cpu_write(memcg->stat->targets[target], next);
+	return false;
 }
 
 /*
@@ -790,25 +774,27 @@
 {
 	preempt_disable();
 	/* threshold event is triggered in finer grain than soft limit */
-	if (unlikely(__memcg_event_check(memcg, MEM_CGROUP_TARGET_THRESH))) {
-		mem_cgroup_threshold(memcg);
-		__mem_cgroup_target_update(memcg, MEM_CGROUP_TARGET_THRESH);
-		if (unlikely(__memcg_event_check(memcg,
-			     MEM_CGROUP_TARGET_SOFTLIMIT))) {
-			mem_cgroup_update_tree(memcg, page);
-			__mem_cgroup_target_update(memcg,
-						   MEM_CGROUP_TARGET_SOFTLIMIT);
-		}
+	if (unlikely(mem_cgroup_event_ratelimit(memcg,
+						MEM_CGROUP_TARGET_THRESH))) {
+		bool do_softlimit, do_numainfo;
+
+		do_softlimit = mem_cgroup_event_ratelimit(memcg,
+						MEM_CGROUP_TARGET_SOFTLIMIT);
 #if MAX_NUMNODES > 1
-		if (unlikely(__memcg_event_check(memcg,
-			MEM_CGROUP_TARGET_NUMAINFO))) {
-			atomic_inc(&memcg->numainfo_events);
-			__mem_cgroup_target_update(memcg,
-				MEM_CGROUP_TARGET_NUMAINFO);
-		}
+		do_numainfo = mem_cgroup_event_ratelimit(memcg,
+						MEM_CGROUP_TARGET_NUMAINFO);
 #endif
-	}
-	preempt_enable();
+		preempt_enable();
+
+		mem_cgroup_threshold(memcg);
+		if (unlikely(do_softlimit))
+			mem_cgroup_update_tree(memcg, page);
+#if MAX_NUMNODES > 1
+		if (unlikely(do_numainfo))
+			atomic_inc(&memcg->numainfo_events);
+#endif
+	} else
+		preempt_enable();
 }
 
 struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
@@ -853,83 +839,116 @@
 	return memcg;
 }
 
-/* The caller has to guarantee "mem" exists before calling this */
-static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *memcg)
+/**
+ * mem_cgroup_iter - iterate over memory cgroup hierarchy
+ * @root: hierarchy root
+ * @prev: previously returned memcg, NULL on first invocation
+ * @reclaim: cookie for shared reclaim walks, NULL for full walks
+ *
+ * Returns references to children of the hierarchy below @root, or
+ * @root itself, or %NULL after a full round-trip.
+ *
+ * Caller must pass the return value in @prev on subsequent
+ * invocations for reference counting, or use mem_cgroup_iter_break()
+ * to cancel a hierarchy walk before the round-trip is complete.
+ *
+ * Reclaimers can specify a zone and a priority level in @reclaim to
+ * divide up the memcgs in the hierarchy among all concurrent
+ * reclaimers operating on the same zone and priority.
+ */
+struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
+				   struct mem_cgroup *prev,
+				   struct mem_cgroup_reclaim_cookie *reclaim)
 {
-	struct cgroup_subsys_state *css;
-	int found;
+	struct mem_cgroup *memcg = NULL;
+	int id = 0;
 
-	if (!memcg) /* ROOT cgroup has the smallest ID */
-		return root_mem_cgroup; /*css_put/get against root is ignored*/
-	if (!memcg->use_hierarchy) {
-		if (css_tryget(&memcg->css))
-			return memcg;
-		return NULL;
-	}
-	rcu_read_lock();
-	/*
-	 * searching a memory cgroup which has the smallest ID under given
-	 * ROOT cgroup. (ID >= 1)
-	 */
-	css = css_get_next(&mem_cgroup_subsys, 1, &memcg->css, &found);
-	if (css && css_tryget(css))
-		memcg = container_of(css, struct mem_cgroup, css);
-	else
-		memcg = NULL;
-	rcu_read_unlock();
-	return memcg;
-}
-
-static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter,
-					struct mem_cgroup *root,
-					bool cond)
-{
-	int nextid = css_id(&iter->css) + 1;
-	int found;
-	int hierarchy_used;
-	struct cgroup_subsys_state *css;
-
-	hierarchy_used = iter->use_hierarchy;
-
-	css_put(&iter->css);
-	/* If no ROOT, walk all, ignore hierarchy */
-	if (!cond || (root && !hierarchy_used))
+	if (mem_cgroup_disabled())
 		return NULL;
 
 	if (!root)
 		root = root_mem_cgroup;
 
-	do {
-		iter = NULL;
+	if (prev && !reclaim)
+		id = css_id(&prev->css);
+
+	if (prev && prev != root)
+		css_put(&prev->css);
+
+	if (!root->use_hierarchy && root != root_mem_cgroup) {
+		if (prev)
+			return NULL;
+		return root;
+	}
+
+	while (!memcg) {
+		struct mem_cgroup_reclaim_iter *uninitialized_var(iter);
+		struct cgroup_subsys_state *css;
+
+		if (reclaim) {
+			int nid = zone_to_nid(reclaim->zone);
+			int zid = zone_idx(reclaim->zone);
+			struct mem_cgroup_per_zone *mz;
+
+			mz = mem_cgroup_zoneinfo(root, nid, zid);
+			iter = &mz->reclaim_iter[reclaim->priority];
+			if (prev && reclaim->generation != iter->generation)
+				return NULL;
+			id = iter->position;
+		}
+
 		rcu_read_lock();
-
-		css = css_get_next(&mem_cgroup_subsys, nextid,
-				&root->css, &found);
-		if (css && css_tryget(css))
-			iter = container_of(css, struct mem_cgroup, css);
+		css = css_get_next(&mem_cgroup_subsys, id + 1, &root->css, &id);
+		if (css) {
+			if (css == &root->css || css_tryget(css))
+				memcg = container_of(css,
+						     struct mem_cgroup, css);
+		} else
+			id = 0;
 		rcu_read_unlock();
-		/* If css is NULL, no more cgroups will be found */
-		nextid = found + 1;
-	} while (css && !iter);
 
-	return iter;
+		if (reclaim) {
+			iter->position = id;
+			if (!css)
+				iter->generation++;
+			else if (!prev && memcg)
+				reclaim->generation = iter->generation;
+		}
+
+		if (prev && !css)
+			return NULL;
+	}
+	return memcg;
 }
-/*
- * for_eacn_mem_cgroup_tree() for visiting all cgroup under tree. Please
- * be careful that "break" loop is not allowed. We have reference count.
- * Instead of that modify "cond" to be false and "continue" to exit the loop.
+
+/**
+ * mem_cgroup_iter_break - abort a hierarchy walk prematurely
+ * @root: hierarchy root
+ * @prev: last visited hierarchy member as returned by mem_cgroup_iter()
  */
-#define for_each_mem_cgroup_tree_cond(iter, root, cond)	\
-	for (iter = mem_cgroup_start_loop(root);\
-	     iter != NULL;\
-	     iter = mem_cgroup_get_next(iter, root, cond))
+void mem_cgroup_iter_break(struct mem_cgroup *root,
+			   struct mem_cgroup *prev)
+{
+	if (!root)
+		root = root_mem_cgroup;
+	if (prev && prev != root)
+		css_put(&prev->css);
+}
 
-#define for_each_mem_cgroup_tree(iter, root) \
-	for_each_mem_cgroup_tree_cond(iter, root, true)
+/*
+ * Iteration constructs for visiting all cgroups (under a tree).  If
+ * loops are exited prematurely (break), mem_cgroup_iter_break() must
+ * be used for reference counting.
+ */
+#define for_each_mem_cgroup_tree(iter, root)		\
+	for (iter = mem_cgroup_iter(root, NULL, NULL);	\
+	     iter != NULL;				\
+	     iter = mem_cgroup_iter(root, iter, NULL))
 
-#define for_each_mem_cgroup_all(iter) \
-	for_each_mem_cgroup_tree_cond(iter, NULL, true)
-
+#define for_each_mem_cgroup(iter)			\
+	for (iter = mem_cgroup_iter(NULL, NULL, NULL);	\
+	     iter != NULL;				\
+	     iter = mem_cgroup_iter(NULL, iter, NULL))
 
 static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
 {
@@ -949,11 +968,11 @@
 		goto out;
 
 	switch (idx) {
-	case PGMAJFAULT:
-		mem_cgroup_pgmajfault(memcg, 1);
-		break;
 	case PGFAULT:
-		mem_cgroup_pgfault(memcg, 1);
+		this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGFAULT]);
+		break;
+	case PGMAJFAULT:
+		this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT]);
 		break;
 	default:
 		BUG();
@@ -963,6 +982,27 @@
 }
 EXPORT_SYMBOL(mem_cgroup_count_vm_event);
 
+/**
+ * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg
+ * @zone: zone of the wanted lruvec
+ * @mem: memcg of the wanted lruvec
+ *
+ * Returns the lru list vector holding pages for the given @zone and
+ * @mem.  This can be the global zone lruvec, if the memory controller
+ * is disabled.
+ */
+struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
+				      struct mem_cgroup *memcg)
+{
+	struct mem_cgroup_per_zone *mz;
+
+	if (mem_cgroup_disabled())
+		return &zone->lruvec;
+
+	mz = mem_cgroup_zoneinfo(memcg, zone_to_nid(zone), zone_idx(zone));
+	return &mz->lruvec;
+}
+
 /*
  * Following LRU functions are allowed to be used without PCG_LOCK.
  * Operations are called by routine of global LRU independently from memcg.
@@ -977,180 +1017,91 @@
  * When moving account, the page is not on LRU. It's isolated.
  */
 
-void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru)
-{
-	struct page_cgroup *pc;
-	struct mem_cgroup_per_zone *mz;
-
-	if (mem_cgroup_disabled())
-		return;
-	pc = lookup_page_cgroup(page);
-	/* can happen while we handle swapcache. */
-	if (!TestClearPageCgroupAcctLRU(pc))
-		return;
-	VM_BUG_ON(!pc->mem_cgroup);
-	/*
-	 * We don't check PCG_USED bit. It's cleared when the "page" is finally
-	 * removed from global LRU.
-	 */
-	mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
-	/* huge page split is done under lru_lock. so, we have no races. */
-	MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page);
-	if (mem_cgroup_is_root(pc->mem_cgroup))
-		return;
-	VM_BUG_ON(list_empty(&pc->lru));
-	list_del_init(&pc->lru);
-}
-
-void mem_cgroup_del_lru(struct page *page)
-{
-	mem_cgroup_del_lru_list(page, page_lru(page));
-}
-
-/*
- * Writeback is about to end against a page which has been marked for immediate
- * reclaim.  If it still appears to be reclaimable, move it to the tail of the
- * inactive list.
+/**
+ * mem_cgroup_lru_add_list - account for adding an lru page and return lruvec
+ * @zone: zone of the page
+ * @page: the page
+ * @lru: current lru
+ *
+ * This function accounts for @page being added to @lru, and returns
+ * the lruvec for the given @zone and the memcg @page is charged to.
+ *
+ * The callsite is then responsible for physically linking the page to
+ * the returned lruvec->lists[@lru].
  */
-void mem_cgroup_rotate_reclaimable_page(struct page *page)
+struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page,
+				       enum lru_list lru)
 {
 	struct mem_cgroup_per_zone *mz;
-	struct page_cgroup *pc;
-	enum lru_list lru = page_lru(page);
-
-	if (mem_cgroup_disabled())
-		return;
-
-	pc = lookup_page_cgroup(page);
-	/* unused or root page is not rotated. */
-	if (!PageCgroupUsed(pc))
-		return;
-	/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
-	smp_rmb();
-	if (mem_cgroup_is_root(pc->mem_cgroup))
-		return;
-	mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
-	list_move_tail(&pc->lru, &mz->lists[lru]);
-}
-
-void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
-{
-	struct mem_cgroup_per_zone *mz;
+	struct mem_cgroup *memcg;
 	struct page_cgroup *pc;
 
 	if (mem_cgroup_disabled())
-		return;
+		return &zone->lruvec;
 
 	pc = lookup_page_cgroup(page);
-	/* unused or root page is not rotated. */
-	if (!PageCgroupUsed(pc))
-		return;
-	/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
-	smp_rmb();
-	if (mem_cgroup_is_root(pc->mem_cgroup))
-		return;
-	mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
-	list_move(&pc->lru, &mz->lists[lru]);
-}
-
-void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
-{
-	struct page_cgroup *pc;
-	struct mem_cgroup_per_zone *mz;
-
-	if (mem_cgroup_disabled())
-		return;
-	pc = lookup_page_cgroup(page);
-	VM_BUG_ON(PageCgroupAcctLRU(pc));
-	/*
-	 * putback:				charge:
-	 * SetPageLRU				SetPageCgroupUsed
-	 * smp_mb				smp_mb
-	 * PageCgroupUsed && add to memcg LRU	PageLRU && add to memcg LRU
-	 *
-	 * Ensure that one of the two sides adds the page to the memcg
-	 * LRU during a race.
-	 */
-	smp_mb();
-	if (!PageCgroupUsed(pc))
-		return;
-	/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
-	smp_rmb();
-	mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
-	/* huge page split is done under lru_lock. so, we have no races. */
+	memcg = pc->mem_cgroup;
+	mz = page_cgroup_zoneinfo(memcg, page);
+	/* compound_order() is stabilized through lru_lock */
 	MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
-	SetPageCgroupAcctLRU(pc);
-	if (mem_cgroup_is_root(pc->mem_cgroup))
-		return;
-	list_add(&pc->lru, &mz->lists[lru]);
+	return &mz->lruvec;
 }
 
-/*
- * At handling SwapCache and other FUSE stuff, pc->mem_cgroup may be changed
- * while it's linked to lru because the page may be reused after it's fully
- * uncharged. To handle that, unlink page_cgroup from LRU when charge it again.
- * It's done under lock_page and expected that zone->lru_lock isnever held.
+/**
+ * mem_cgroup_lru_del_list - account for removing an lru page
+ * @page: the page
+ * @lru: target lru
+ *
+ * This function accounts for @page being removed from @lru.
+ *
+ * The callsite is then responsible for physically unlinking
+ * @page->lru.
  */
-static void mem_cgroup_lru_del_before_commit(struct page *page)
+void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
 {
-	unsigned long flags;
-	struct zone *zone = page_zone(page);
-	struct page_cgroup *pc = lookup_page_cgroup(page);
+	struct mem_cgroup_per_zone *mz;
+	struct mem_cgroup *memcg;
+	struct page_cgroup *pc;
 
-	/*
-	 * Doing this check without taking ->lru_lock seems wrong but this
-	 * is safe. Because if page_cgroup's USED bit is unset, the page
-	 * will not be added to any memcg's LRU. If page_cgroup's USED bit is
-	 * set, the commit after this will fail, anyway.
-	 * This all charge/uncharge is done under some mutual execustion.
-	 * So, we don't need to taking care of changes in USED bit.
-	 */
-	if (likely(!PageLRU(page)))
-		return;
-
-	spin_lock_irqsave(&zone->lru_lock, flags);
-	/*
-	 * Forget old LRU when this page_cgroup is *not* used. This Used bit
-	 * is guarded by lock_page() because the page is SwapCache.
-	 */
-	if (!PageCgroupUsed(pc))
-		mem_cgroup_del_lru_list(page, page_lru(page));
-	spin_unlock_irqrestore(&zone->lru_lock, flags);
-}
-
-static void mem_cgroup_lru_add_after_commit(struct page *page)
-{
-	unsigned long flags;
-	struct zone *zone = page_zone(page);
-	struct page_cgroup *pc = lookup_page_cgroup(page);
-	/*
-	 * putback:				charge:
-	 * SetPageLRU				SetPageCgroupUsed
-	 * smp_mb				smp_mb
-	 * PageCgroupUsed && add to memcg LRU	PageLRU && add to memcg LRU
-	 *
-	 * Ensure that one of the two sides adds the page to the memcg
-	 * LRU during a race.
-	 */
-	smp_mb();
-	/* taking care of that the page is added to LRU while we commit it */
-	if (likely(!PageLRU(page)))
-		return;
-	spin_lock_irqsave(&zone->lru_lock, flags);
-	/* link when the page is linked to LRU but page_cgroup isn't */
-	if (PageLRU(page) && !PageCgroupAcctLRU(pc))
-		mem_cgroup_add_lru_list(page, page_lru(page));
-	spin_unlock_irqrestore(&zone->lru_lock, flags);
-}
-
-
-void mem_cgroup_move_lists(struct page *page,
-			   enum lru_list from, enum lru_list to)
-{
 	if (mem_cgroup_disabled())
 		return;
-	mem_cgroup_del_lru_list(page, from);
-	mem_cgroup_add_lru_list(page, to);
+
+	pc = lookup_page_cgroup(page);
+	memcg = pc->mem_cgroup;
+	VM_BUG_ON(!memcg);
+	mz = page_cgroup_zoneinfo(memcg, page);
+	/* huge page split is done under lru_lock. so, we have no races. */
+	VM_BUG_ON(MEM_CGROUP_ZSTAT(mz, lru) < (1 << compound_order(page)));
+	MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page);
+}
+
+void mem_cgroup_lru_del(struct page *page)
+{
+	mem_cgroup_lru_del_list(page, page_lru(page));
+}
+
+/**
+ * mem_cgroup_lru_move_lists - account for moving a page between lrus
+ * @zone: zone of the page
+ * @page: the page
+ * @from: current lru
+ * @to: target lru
+ *
+ * This function accounts for @page being moved between the lrus @from
+ * and @to, and returns the lruvec for the given @zone and the memcg
+ * @page is charged to.
+ *
+ * The callsite is then responsible for physically relinking
+ * @page->lru to the returned lruvec->lists[@to].
+ */
+struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone,
+					 struct page *page,
+					 enum lru_list from,
+					 enum lru_list to)
+{
+	/* XXX: Optimize this, especially for @from == @to */
+	mem_cgroup_lru_del_list(page, from);
+	return mem_cgroup_lru_add_list(zone, page, to);
 }
 
 /*
@@ -1175,10 +1126,21 @@
 	struct task_struct *p;
 
 	p = find_lock_task_mm(task);
-	if (!p)
-		return 0;
-	curr = try_get_mem_cgroup_from_mm(p->mm);
-	task_unlock(p);
+	if (p) {
+		curr = try_get_mem_cgroup_from_mm(p->mm);
+		task_unlock(p);
+	} else {
+		/*
+		 * All threads may have already detached their mm's, but the oom
+		 * killer still needs to detect if they have already been oom
+		 * killed to prevent needlessly killing additional tasks.
+		 */
+		task_lock(task);
+		curr = mem_cgroup_from_task(task);
+		if (curr)
+			css_get(&curr->css);
+		task_unlock(task);
+	}
 	if (!curr)
 		return 0;
 	/*
@@ -1258,68 +1220,6 @@
 	return &mz->reclaim_stat;
 }
 
-unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
-					struct list_head *dst,
-					unsigned long *scanned, int order,
-					isolate_mode_t mode,
-					struct zone *z,
-					struct mem_cgroup *mem_cont,
-					int active, int file)
-{
-	unsigned long nr_taken = 0;
-	struct page *page;
-	unsigned long scan;
-	LIST_HEAD(pc_list);
-	struct list_head *src;
-	struct page_cgroup *pc, *tmp;
-	int nid = zone_to_nid(z);
-	int zid = zone_idx(z);
-	struct mem_cgroup_per_zone *mz;
-	int lru = LRU_FILE * file + active;
-	int ret;
-
-	BUG_ON(!mem_cont);
-	mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
-	src = &mz->lists[lru];
-
-	scan = 0;
-	list_for_each_entry_safe_reverse(pc, tmp, src, lru) {
-		if (scan >= nr_to_scan)
-			break;
-
-		if (unlikely(!PageCgroupUsed(pc)))
-			continue;
-
-		page = lookup_cgroup_page(pc);
-
-		if (unlikely(!PageLRU(page)))
-			continue;
-
-		scan++;
-		ret = __isolate_lru_page(page, mode, file);
-		switch (ret) {
-		case 0:
-			list_move(&page->lru, dst);
-			mem_cgroup_del_lru(page);
-			nr_taken += hpage_nr_pages(page);
-			break;
-		case -EBUSY:
-			/* we don't affect global LRU but rotate in our LRU */
-			mem_cgroup_rotate_lru_list(page, page_lru(page));
-			break;
-		default:
-			break;
-		}
-	}
-
-	*scanned = scan;
-
-	trace_mm_vmscan_memcg_isolate(0, nr_to_scan, scan, nr_taken,
-				      0, 0, 0, mode);
-
-	return nr_taken;
-}
-
 #define mem_cgroup_from_res_counter(counter, member)	\
 	container_of(counter, struct mem_cgroup, member)
 
@@ -1536,41 +1436,40 @@
 	return min(limit, memsw);
 }
 
-/*
- * Visit the first child (need not be the first child as per the ordering
- * of the cgroup list, since we track last_scanned_child) of @mem and use
- * that to reclaim free pages from.
- */
-static struct mem_cgroup *
-mem_cgroup_select_victim(struct mem_cgroup *root_memcg)
+static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
+					gfp_t gfp_mask,
+					unsigned long flags)
 {
-	struct mem_cgroup *ret = NULL;
-	struct cgroup_subsys_state *css;
-	int nextid, found;
+	unsigned long total = 0;
+	bool noswap = false;
+	int loop;
 
-	if (!root_memcg->use_hierarchy) {
-		css_get(&root_memcg->css);
-		ret = root_memcg;
+	if (flags & MEM_CGROUP_RECLAIM_NOSWAP)
+		noswap = true;
+	if (!(flags & MEM_CGROUP_RECLAIM_SHRINK) && memcg->memsw_is_minimum)
+		noswap = true;
+
+	for (loop = 0; loop < MEM_CGROUP_MAX_RECLAIM_LOOPS; loop++) {
+		if (loop)
+			drain_all_stock_async(memcg);
+		total += try_to_free_mem_cgroup_pages(memcg, gfp_mask, noswap);
+		/*
+		 * Allow limit shrinkers, which are triggered directly
+		 * by userspace, to catch signals and stop reclaim
+		 * after minimal progress, regardless of the margin.
+		 */
+		if (total && (flags & MEM_CGROUP_RECLAIM_SHRINK))
+			break;
+		if (mem_cgroup_margin(memcg))
+			break;
+		/*
+		 * If nothing was reclaimed after two attempts, there
+		 * may be no reclaimable pages in this hierarchy.
+		 */
+		if (loop && !total)
+			break;
 	}
-
-	while (!ret) {
-		rcu_read_lock();
-		nextid = root_memcg->last_scanned_child + 1;
-		css = css_get_next(&mem_cgroup_subsys, nextid, &root_memcg->css,
-				   &found);
-		if (css && css_tryget(css))
-			ret = container_of(css, struct mem_cgroup, css);
-
-		rcu_read_unlock();
-		/* Updates scanning parameter */
-		if (!css) {
-			/* this means start scan from ID:1 */
-			root_memcg->last_scanned_child = 0;
-		} else
-			root_memcg->last_scanned_child = found;
-	}
-
-	return ret;
+	return total;
 }
 
 /**
@@ -1710,61 +1609,35 @@
 }
 #endif
 
-/*
- * Scan the hierarchy if needed to reclaim memory. We remember the last child
- * we reclaimed from, so that we don't end up penalizing one child extensively
- * based on its position in the children list.
- *
- * root_memcg is the original ancestor that we've been reclaim from.
- *
- * We give up and return to the caller when we visit root_memcg twice.
- * (other groups can be removed while we're walking....)
- *
- * If shrink==true, for avoiding to free too much, this returns immedieately.
- */
-static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
-						struct zone *zone,
-						gfp_t gfp_mask,
-						unsigned long reclaim_options,
-						unsigned long *total_scanned)
+static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
+				   struct zone *zone,
+				   gfp_t gfp_mask,
+				   unsigned long *total_scanned)
 {
-	struct mem_cgroup *victim;
-	int ret, total = 0;
+	struct mem_cgroup *victim = NULL;
+	int total = 0;
 	int loop = 0;
-	bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
-	bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
-	bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
 	unsigned long excess;
 	unsigned long nr_scanned;
+	struct mem_cgroup_reclaim_cookie reclaim = {
+		.zone = zone,
+		.priority = 0,
+	};
 
 	excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
 
-	/* If memsw_is_minimum==1, swap-out is of-no-use. */
-	if (!check_soft && !shrink && root_memcg->memsw_is_minimum)
-		noswap = true;
-
 	while (1) {
-		victim = mem_cgroup_select_victim(root_memcg);
-		if (victim == root_memcg) {
+		victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
+		if (!victim) {
 			loop++;
-			/*
-			 * We are not draining per cpu cached charges during
-			 * soft limit reclaim  because global reclaim doesn't
-			 * care about charges. It tries to free some memory and
-			 * charges will not give any.
-			 */
-			if (!check_soft && loop >= 1)
-				drain_all_stock_async(root_memcg);
 			if (loop >= 2) {
 				/*
 				 * If we have not been able to reclaim
 				 * anything, it might because there are
 				 * no reclaimable pages under this hierarchy
 				 */
-				if (!check_soft || !total) {
-					css_put(&victim->css);
+				if (!total)
 					break;
-				}
 				/*
 				 * We want to do more targeted reclaim.
 				 * excess >> 2 is not to excessive so as to
@@ -1772,40 +1645,20 @@
 				 * coming back to reclaim from this cgroup
 				 */
 				if (total >= (excess >> 2) ||
-					(loop > MEM_CGROUP_MAX_RECLAIM_LOOPS)) {
-					css_put(&victim->css);
+					(loop > MEM_CGROUP_MAX_RECLAIM_LOOPS))
 					break;
-				}
 			}
-		}
-		if (!mem_cgroup_reclaimable(victim, noswap)) {
-			/* this cgroup's local usage == 0 */
-			css_put(&victim->css);
 			continue;
 		}
-		/* we use swappiness of local cgroup */
-		if (check_soft) {
-			ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
-				noswap, zone, &nr_scanned);
-			*total_scanned += nr_scanned;
-		} else
-			ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
-						noswap);
-		css_put(&victim->css);
-		/*
-		 * At shrinking usage, we can't check we should stop here or
-		 * reclaim more. It's depends on callers. last_scanned_child
-		 * will work enough for keeping fairness under tree.
-		 */
-		if (shrink)
-			return ret;
-		total += ret;
-		if (check_soft) {
-			if (!res_counter_soft_limit_excess(&root_memcg->res))
-				return total;
-		} else if (mem_cgroup_margin(root_memcg))
-			return total;
+		if (!mem_cgroup_reclaimable(victim, false))
+			continue;
+		total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
+						     zone, &nr_scanned);
+		*total_scanned += nr_scanned;
+		if (!res_counter_soft_limit_excess(&root_memcg->res))
+			break;
 	}
+	mem_cgroup_iter_break(root_memcg, victim);
 	return total;
 }
 
@@ -1817,16 +1670,16 @@
 static bool mem_cgroup_oom_lock(struct mem_cgroup *memcg)
 {
 	struct mem_cgroup *iter, *failed = NULL;
-	bool cond = true;
 
-	for_each_mem_cgroup_tree_cond(iter, memcg, cond) {
+	for_each_mem_cgroup_tree(iter, memcg) {
 		if (iter->oom_lock) {
 			/*
 			 * this subtree of our hierarchy is already locked
 			 * so we cannot give a lock.
 			 */
 			failed = iter;
-			cond = false;
+			mem_cgroup_iter_break(memcg, iter);
+			break;
 		} else
 			iter->oom_lock = true;
 	}
@@ -1838,11 +1691,10 @@
 	 * OK, we failed to lock the whole subtree so we have to clean up
 	 * what we set up to the failing subtree
 	 */
-	cond = true;
-	for_each_mem_cgroup_tree_cond(iter, memcg, cond) {
+	for_each_mem_cgroup_tree(iter, memcg) {
 		if (iter == failed) {
-			cond = false;
-			continue;
+			mem_cgroup_iter_break(memcg, iter);
+			break;
 		}
 		iter->oom_lock = false;
 	}
@@ -2007,7 +1859,7 @@
 	bool need_unlock = false;
 	unsigned long uninitialized_var(flags);
 
-	if (unlikely(!pc))
+	if (mem_cgroup_disabled())
 		return;
 
 	rcu_read_lock();
@@ -2238,7 +2090,7 @@
 	struct mem_cgroup *iter;
 
 	if ((action == CPU_ONLINE)) {
-		for_each_mem_cgroup_all(iter)
+		for_each_mem_cgroup(iter)
 			synchronize_mem_cgroup_on_move(iter, cpu);
 		return NOTIFY_OK;
 	}
@@ -2246,7 +2098,7 @@
 	if ((action != CPU_DEAD) || action != CPU_DEAD_FROZEN)
 		return NOTIFY_OK;
 
-	for_each_mem_cgroup_all(iter)
+	for_each_mem_cgroup(iter)
 		mem_cgroup_drain_pcp_counter(iter, cpu);
 
 	stock = &per_cpu(memcg_stock, cpu);
@@ -2300,8 +2152,7 @@
 	if (!(gfp_mask & __GFP_WAIT))
 		return CHARGE_WOULDBLOCK;
 
-	ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
-					      gfp_mask, flags, NULL);
+	ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
 	if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
 		return CHARGE_RETRY;
 	/*
@@ -2334,8 +2185,25 @@
 }
 
 /*
- * Unlike exported interface, "oom" parameter is added. if oom==true,
- * oom-killer can be invoked.
+ * __mem_cgroup_try_charge() does
+ * 1. detect memcg to be charged against from passed *mm and *ptr,
+ * 2. update res_counter
+ * 3. call memory reclaim if necessary.
+ *
+ * In some special case, if the task is fatal, fatal_signal_pending() or
+ * has TIF_MEMDIE, this function returns -EINTR while writing root_mem_cgroup
+ * to *ptr. There are two reasons for this. 1: fatal threads should quit as soon
+ * as possible without any hazards. 2: all pages should have a valid
+ * pc->mem_cgroup. If mm is NULL and the caller doesn't pass a valid memcg
+ * pointer, that is treated as a charge to root_mem_cgroup.
+ *
+ * So __mem_cgroup_try_charge() will return
+ *  0       ...  on success, filling *ptr with a valid memcg pointer.
+ *  -ENOMEM ...  charge failure because of resource limits.
+ *  -EINTR  ...  if thread is fatal. *ptr is filled with root_mem_cgroup.
+ *
+ * Unlike the exported interface, an "oom" parameter is added. if oom==true,
+ * the oom-killer can be invoked.
  */
 static int __mem_cgroup_try_charge(struct mm_struct *mm,
 				   gfp_t gfp_mask,
@@ -2364,7 +2232,7 @@
 	 * set, if so charge the init_mm (happens for pagecache usage).
 	 */
 	if (!*ptr && !mm)
-		goto bypass;
+		*ptr = root_mem_cgroup;
 again:
 	if (*ptr) { /* css should be a valid one */
 		memcg = *ptr;
@@ -2390,7 +2258,9 @@
 		 * task-struct. So, mm->owner can be NULL.
 		 */
 		memcg = mem_cgroup_from_task(p);
-		if (!memcg || mem_cgroup_is_root(memcg)) {
+		if (!memcg)
+			memcg = root_mem_cgroup;
+		if (mem_cgroup_is_root(memcg)) {
 			rcu_read_unlock();
 			goto done;
 		}
@@ -2465,8 +2335,8 @@
 	*ptr = NULL;
 	return -ENOMEM;
 bypass:
-	*ptr = NULL;
-	return 0;
+	*ptr = root_mem_cgroup;
+	return -EINTR;
 }
 
 /*
@@ -2522,7 +2392,7 @@
 			memcg = NULL;
 	} else if (PageSwapCache(page)) {
 		ent.val = page_private(page);
-		id = lookup_swap_cgroup(ent);
+		id = lookup_swap_cgroup_id(ent);
 		rcu_read_lock();
 		memcg = mem_cgroup_lookup(id);
 		if (memcg && !css_tryget(&memcg->css))
@@ -2574,6 +2444,7 @@
 
 	mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), nr_pages);
 	unlock_page_cgroup(pc);
+	WARN_ON_ONCE(PageLRU(page));
 	/*
 	 * "charge_statistics" updated event counter. Then, check it.
 	 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
@@ -2585,44 +2456,29 @@
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 
 #define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\
-			(1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION))
+			(1 << PCG_MIGRATION))
 /*
  * Because tail pages are not marked as "used", set it. We're under
- * zone->lru_lock, 'splitting on pmd' and compund_lock.
+ * zone->lru_lock, 'splitting on pmd' and compound_lock.
+ * charge/uncharge will be never happen and move_account() is done under
+ * compound_lock(), so we don't have to take care of races.
  */
-void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail)
+void mem_cgroup_split_huge_fixup(struct page *head)
 {
 	struct page_cgroup *head_pc = lookup_page_cgroup(head);
-	struct page_cgroup *tail_pc = lookup_page_cgroup(tail);
-	unsigned long flags;
+	struct page_cgroup *pc;
+	int i;
 
 	if (mem_cgroup_disabled())
 		return;
-	/*
-	 * We have no races with charge/uncharge but will have races with
-	 * page state accounting.
-	 */
-	move_lock_page_cgroup(head_pc, &flags);
-
-	tail_pc->mem_cgroup = head_pc->mem_cgroup;
-	smp_wmb(); /* see __commit_charge() */
-	if (PageCgroupAcctLRU(head_pc)) {
-		enum lru_list lru;
-		struct mem_cgroup_per_zone *mz;
-
-		/*
-		 * LRU flags cannot be copied because we need to add tail
-		 *.page to LRU by generic call and our hook will be called.
-		 * We hold lru_lock, then, reduce counter directly.
-		 */
-		lru = page_lru(head);
-		mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head);
-		MEM_CGROUP_ZSTAT(mz, lru) -= 1;
+	for (i = 1; i < HPAGE_PMD_NR; i++) {
+		pc = head_pc + i;
+		pc->mem_cgroup = head_pc->mem_cgroup;
+		smp_wmb();/* see __commit_charge() */
+		pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
 	}
-	tail_pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
-	move_unlock_page_cgroup(head_pc, &flags);
 }
-#endif
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 /**
  * mem_cgroup_move_account - move account of the page
@@ -2737,7 +2593,7 @@
 
 	parent = mem_cgroup_from_cont(pcg);
 	ret = __mem_cgroup_try_charge(NULL, gfp_mask, nr_pages, &parent, false);
-	if (ret || !parent)
+	if (ret)
 		goto put_back;
 
 	if (nr_pages > 1)
@@ -2783,12 +2639,9 @@
 	}
 
 	pc = lookup_page_cgroup(page);
-	BUG_ON(!pc); /* XXX: remove this and move pc lookup into commit */
-
 	ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom);
-	if (ret || !memcg)
+	if (ret == -ENOMEM)
 		return ret;
-
 	__mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype);
 	return 0;
 }
@@ -2798,19 +2651,11 @@
 {
 	if (mem_cgroup_disabled())
 		return 0;
-	/*
-	 * If already mapped, we don't have to account.
-	 * If page cache, page->mapping has address_space.
-	 * But page->mapping may have out-of-use anon_vma pointer,
-	 * detecit it by PageAnon() check. newly-mapped-anon's page->mapping
-	 * is NULL.
-  	 */
-	if (page_mapped(page) || (page->mapping && !PageAnon(page)))
-		return 0;
-	if (unlikely(!mm))
-		mm = &init_mm;
+	VM_BUG_ON(page_mapped(page));
+	VM_BUG_ON(page->mapping && !PageAnon(page));
+	VM_BUG_ON(!mm);
 	return mem_cgroup_charge_common(page, mm, gfp_mask,
-				MEM_CGROUP_CHARGE_TYPE_MAPPED);
+					MEM_CGROUP_CHARGE_TYPE_MAPPED);
 }
 
 static void
@@ -2822,14 +2667,27 @@
 					enum charge_type ctype)
 {
 	struct page_cgroup *pc = lookup_page_cgroup(page);
+	struct zone *zone = page_zone(page);
+	unsigned long flags;
+	bool removed = false;
+
 	/*
 	 * In some case, SwapCache, FUSE(splice_buf->radixtree), the page
 	 * is already on LRU. It means the page may on some other page_cgroup's
 	 * LRU. Take care of it.
 	 */
-	mem_cgroup_lru_del_before_commit(page);
+	spin_lock_irqsave(&zone->lru_lock, flags);
+	if (PageLRU(page)) {
+		del_page_from_lru_list(zone, page, page_lru(page));
+		ClearPageLRU(page);
+		removed = true;
+	}
 	__mem_cgroup_commit_charge(memcg, page, 1, pc, ctype);
-	mem_cgroup_lru_add_after_commit(page);
+	if (removed) {
+		add_page_to_lru_list(zone, page, page_lru(page));
+		SetPageLRU(page);
+	}
+	spin_unlock_irqrestore(&zone->lru_lock, flags);
 	return;
 }
 
@@ -2837,6 +2695,7 @@
 				gfp_t gfp_mask)
 {
 	struct mem_cgroup *memcg = NULL;
+	enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
 	int ret;
 
 	if (mem_cgroup_disabled())
@@ -2846,31 +2705,16 @@
 
 	if (unlikely(!mm))
 		mm = &init_mm;
+	if (!page_is_file_cache(page))
+		type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
 
-	if (page_is_file_cache(page)) {
-		ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, &memcg, true);
-		if (ret || !memcg)
-			return ret;
-
-		/*
-		 * FUSE reuses pages without going through the final
-		 * put that would remove them from the LRU list, make
-		 * sure that they get relinked properly.
-		 */
-		__mem_cgroup_commit_charge_lrucare(page, memcg,
-					MEM_CGROUP_CHARGE_TYPE_CACHE);
-		return ret;
-	}
-	/* shmem */
-	if (PageSwapCache(page)) {
+	if (!PageSwapCache(page))
+		ret = mem_cgroup_charge_common(page, mm, gfp_mask, type);
+	else { /* page is swapcache/shmem */
 		ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &memcg);
 		if (!ret)
-			__mem_cgroup_commit_charge_swapin(page, memcg,
-					MEM_CGROUP_CHARGE_TYPE_SHMEM);
-	} else
-		ret = mem_cgroup_charge_common(page, mm, gfp_mask,
-					MEM_CGROUP_CHARGE_TYPE_SHMEM);
-
+			__mem_cgroup_commit_charge_swapin(page, memcg, type);
+	}
 	return ret;
 }
 
@@ -2882,12 +2726,12 @@
  */
 int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
 				 struct page *page,
-				 gfp_t mask, struct mem_cgroup **ptr)
+				 gfp_t mask, struct mem_cgroup **memcgp)
 {
 	struct mem_cgroup *memcg;
 	int ret;
 
-	*ptr = NULL;
+	*memcgp = NULL;
 
 	if (mem_cgroup_disabled())
 		return 0;
@@ -2905,27 +2749,32 @@
 	memcg = try_get_mem_cgroup_from_page(page);
 	if (!memcg)
 		goto charge_cur_mm;
-	*ptr = memcg;
-	ret = __mem_cgroup_try_charge(NULL, mask, 1, ptr, true);
+	*memcgp = memcg;
+	ret = __mem_cgroup_try_charge(NULL, mask, 1, memcgp, true);
 	css_put(&memcg->css);
+	if (ret == -EINTR)
+		ret = 0;
 	return ret;
 charge_cur_mm:
 	if (unlikely(!mm))
 		mm = &init_mm;
-	return __mem_cgroup_try_charge(mm, mask, 1, ptr, true);
+	ret = __mem_cgroup_try_charge(mm, mask, 1, memcgp, true);
+	if (ret == -EINTR)
+		ret = 0;
+	return ret;
 }
 
 static void
-__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
+__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
 					enum charge_type ctype)
 {
 	if (mem_cgroup_disabled())
 		return;
-	if (!ptr)
+	if (!memcg)
 		return;
-	cgroup_exclude_rmdir(&ptr->css);
+	cgroup_exclude_rmdir(&memcg->css);
 
-	__mem_cgroup_commit_charge_lrucare(page, ptr, ctype);
+	__mem_cgroup_commit_charge_lrucare(page, memcg, ctype);
 	/*
 	 * Now swap is on-memory. This means this page may be
 	 * counted both as mem and swap....double count.
@@ -2935,21 +2784,22 @@
 	 */
 	if (do_swap_account && PageSwapCache(page)) {
 		swp_entry_t ent = {.val = page_private(page)};
+		struct mem_cgroup *swap_memcg;
 		unsigned short id;
-		struct mem_cgroup *memcg;
 
 		id = swap_cgroup_record(ent, 0);
 		rcu_read_lock();
-		memcg = mem_cgroup_lookup(id);
-		if (memcg) {
+		swap_memcg = mem_cgroup_lookup(id);
+		if (swap_memcg) {
 			/*
 			 * This recorded memcg can be obsolete one. So, avoid
 			 * calling css_tryget
 			 */
-			if (!mem_cgroup_is_root(memcg))
-				res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
-			mem_cgroup_swap_statistics(memcg, false);
-			mem_cgroup_put(memcg);
+			if (!mem_cgroup_is_root(swap_memcg))
+				res_counter_uncharge(&swap_memcg->memsw,
+						     PAGE_SIZE);
+			mem_cgroup_swap_statistics(swap_memcg, false);
+			mem_cgroup_put(swap_memcg);
 		}
 		rcu_read_unlock();
 	}
@@ -2958,13 +2808,14 @@
 	 * So, rmdir()->pre_destroy() can be called while we do this charge.
 	 * In that case, we need to call pre_destroy() again. check it here.
 	 */
-	cgroup_release_and_wakeup_rmdir(&ptr->css);
+	cgroup_release_and_wakeup_rmdir(&memcg->css);
 }
 
-void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
+void mem_cgroup_commit_charge_swapin(struct page *page,
+				     struct mem_cgroup *memcg)
 {
-	__mem_cgroup_commit_charge_swapin(page, ptr,
-					MEM_CGROUP_CHARGE_TYPE_MAPPED);
+	__mem_cgroup_commit_charge_swapin(page, memcg,
+					  MEM_CGROUP_CHARGE_TYPE_MAPPED);
 }
 
 void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
@@ -3054,7 +2905,7 @@
 	 * Check if our page_cgroup is valid
 	 */
 	pc = lookup_page_cgroup(page);
-	if (unlikely(!pc || !PageCgroupUsed(pc)))
+	if (unlikely(!PageCgroupUsed(pc)))
 		return NULL;
 
 	lock_page_cgroup(pc);
@@ -3117,8 +2968,7 @@
 	/* early check. */
 	if (page_mapped(page))
 		return;
-	if (page->mapping && !PageAnon(page))
-		return;
+	VM_BUG_ON(page->mapping && !PageAnon(page));
 	__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED);
 }
 
@@ -3176,6 +3026,23 @@
 	batch->memcg = NULL;
 }
 
+/*
+ * A function for resetting pc->mem_cgroup for newly allocated pages.
+ * This function should be called if the newpage will be added to LRU
+ * before start accounting.
+ */
+void mem_cgroup_reset_owner(struct page *newpage)
+{
+	struct page_cgroup *pc;
+
+	if (mem_cgroup_disabled())
+		return;
+
+	pc = lookup_page_cgroup(newpage);
+	VM_BUG_ON(PageCgroupUsed(pc));
+	pc->mem_cgroup = root_mem_cgroup;
+}
+
 #ifdef CONFIG_SWAP
 /*
  * called after __delete_from_swap_cache() and drop "page" account.
@@ -3293,14 +3160,14 @@
  * page belongs to.
  */
 int mem_cgroup_prepare_migration(struct page *page,
-	struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask)
+	struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask)
 {
 	struct mem_cgroup *memcg = NULL;
 	struct page_cgroup *pc;
 	enum charge_type ctype;
 	int ret = 0;
 
-	*ptr = NULL;
+	*memcgp = NULL;
 
 	VM_BUG_ON(PageTransHuge(page));
 	if (mem_cgroup_disabled())
@@ -3351,10 +3218,10 @@
 	if (!memcg)
 		return 0;
 
-	*ptr = memcg;
-	ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, ptr, false);
+	*memcgp = memcg;
+	ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, memcgp, false);
 	css_put(&memcg->css);/* drop extra refcnt */
-	if (ret || *ptr == NULL) {
+	if (ret) {
 		if (PageAnon(page)) {
 			lock_page_cgroup(pc);
 			ClearPageCgroupMigration(pc);
@@ -3364,6 +3231,7 @@
 			 */
 			mem_cgroup_uncharge_page(page);
 		}
+		/* we'll need to revisit this error code (we have -EINTR) */
 		return -ENOMEM;
 	}
 	/*
@@ -3432,12 +3300,51 @@
 	cgroup_release_and_wakeup_rmdir(&memcg->css);
 }
 
+/*
+ * At replace page cache, newpage is not under any memcg but it's on
+ * LRU. So, this function doesn't touch res_counter but handles LRU
+ * in correct way. Both pages are locked so we cannot race with uncharge.
+ */
+void mem_cgroup_replace_page_cache(struct page *oldpage,
+				  struct page *newpage)
+{
+	struct mem_cgroup *memcg;
+	struct page_cgroup *pc;
+	enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
+
+	if (mem_cgroup_disabled())
+		return;
+
+	pc = lookup_page_cgroup(oldpage);
+	/* fix accounting on old pages */
+	lock_page_cgroup(pc);
+	memcg = pc->mem_cgroup;
+	mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), -1);
+	ClearPageCgroupUsed(pc);
+	unlock_page_cgroup(pc);
+
+	if (PageSwapBacked(oldpage))
+		type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
+
+	/*
+	 * Even if newpage->mapping was NULL before starting replacement,
+	 * the newpage may be on LRU(or pagevec for LRU) already. We lock
+	 * LRU while we overwrite pc->mem_cgroup.
+	 */
+	__mem_cgroup_commit_charge_lrucare(newpage, memcg, type);
+}
+
 #ifdef CONFIG_DEBUG_VM
 static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
 {
 	struct page_cgroup *pc;
 
 	pc = lookup_page_cgroup(page);
+	/*
+	 * Can be NULL while feeding pages into the page allocator for
+	 * the first time, i.e. during boot or memory hotplug;
+	 * or when mem_cgroup_disabled().
+	 */
 	if (likely(pc) && PageCgroupUsed(pc))
 		return pc;
 	return NULL;
@@ -3457,23 +3364,8 @@
 
 	pc = lookup_page_cgroup_used(page);
 	if (pc) {
-		int ret = -1;
-		char *path;
-
-		printk(KERN_ALERT "pc:%p pc->flags:%lx pc->mem_cgroup:%p",
+		printk(KERN_ALERT "pc:%p pc->flags:%lx pc->mem_cgroup:%p\n",
 		       pc, pc->flags, pc->mem_cgroup);
-
-		path = kmalloc(PATH_MAX, GFP_KERNEL);
-		if (path) {
-			rcu_read_lock();
-			ret = cgroup_path(pc->mem_cgroup->css.cgroup,
-							path, PATH_MAX);
-			rcu_read_unlock();
-		}
-
-		printk(KERN_CONT "(%s)\n",
-				(ret < 0) ? "cannot get the path" : path);
-		kfree(path);
 	}
 }
 #endif
@@ -3534,9 +3426,8 @@
 		if (!ret)
 			break;
 
-		mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
-						MEM_CGROUP_RECLAIM_SHRINK,
-						NULL);
+		mem_cgroup_reclaim(memcg, GFP_KERNEL,
+				   MEM_CGROUP_RECLAIM_SHRINK);
 		curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
 		/* Usage is reduced ? */
   		if (curusage >= oldusage)
@@ -3594,10 +3485,9 @@
 		if (!ret)
 			break;
 
-		mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
-						MEM_CGROUP_RECLAIM_NOSWAP |
-						MEM_CGROUP_RECLAIM_SHRINK,
-						NULL);
+		mem_cgroup_reclaim(memcg, GFP_KERNEL,
+				   MEM_CGROUP_RECLAIM_NOSWAP |
+				   MEM_CGROUP_RECLAIM_SHRINK);
 		curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
 		/* Usage is reduced ? */
 		if (curusage >= oldusage)
@@ -3640,10 +3530,8 @@
 			break;
 
 		nr_scanned = 0;
-		reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
-						gfp_mask,
-						MEM_CGROUP_RECLAIM_SOFT,
-						&nr_scanned);
+		reclaimed = mem_cgroup_soft_reclaim(mz->mem, zone,
+						    gfp_mask, &nr_scanned);
 		nr_reclaimed += reclaimed;
 		*total_scanned += nr_scanned;
 		spin_lock(&mctz->lock);
@@ -3711,22 +3599,23 @@
 static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
 				int node, int zid, enum lru_list lru)
 {
-	struct zone *zone;
 	struct mem_cgroup_per_zone *mz;
-	struct page_cgroup *pc, *busy;
 	unsigned long flags, loop;
 	struct list_head *list;
+	struct page *busy;
+	struct zone *zone;
 	int ret = 0;
 
 	zone = &NODE_DATA(node)->node_zones[zid];
 	mz = mem_cgroup_zoneinfo(memcg, node, zid);
-	list = &mz->lists[lru];
+	list = &mz->lruvec.lists[lru];
 
 	loop = MEM_CGROUP_ZSTAT(mz, lru);
 	/* give some margin against EBUSY etc...*/
 	loop += 256;
 	busy = NULL;
 	while (loop--) {
+		struct page_cgroup *pc;
 		struct page *page;
 
 		ret = 0;
@@ -3735,24 +3624,24 @@
 			spin_unlock_irqrestore(&zone->lru_lock, flags);
 			break;
 		}
-		pc = list_entry(list->prev, struct page_cgroup, lru);
-		if (busy == pc) {
-			list_move(&pc->lru, list);
+		page = list_entry(list->prev, struct page, lru);
+		if (busy == page) {
+			list_move(&page->lru, list);
 			busy = NULL;
 			spin_unlock_irqrestore(&zone->lru_lock, flags);
 			continue;
 		}
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 
-		page = lookup_cgroup_page(pc);
+		pc = lookup_page_cgroup(page);
 
 		ret = mem_cgroup_move_parent(page, pc, memcg, GFP_KERNEL);
-		if (ret == -ENOMEM)
+		if (ret == -ENOMEM || ret == -EINTR)
 			break;
 
 		if (ret == -EBUSY || ret == -EINVAL) {
 			/* found lock contention or "pc" is obsolete. */
-			busy = pc;
+			busy = page;
 			cond_resched();
 		} else
 			busy = NULL;
@@ -4846,7 +4735,7 @@
 	for (zone = 0; zone < MAX_NR_ZONES; zone++) {
 		mz = &pn->zoneinfo[zone];
 		for_each_lru(l)
-			INIT_LIST_HEAD(&mz->lists[l]);
+			INIT_LIST_HEAD(&mz->lruvec.lists[l]);
 		mz->usage_in_excess = 0;
 		mz->on_tree = false;
 		mz->mem = memcg;
@@ -4906,7 +4795,7 @@
 	mem_cgroup_remove_from_trees(memcg);
 	free_css_id(&mem_cgroup_subsys, &memcg->css);
 
-	for_each_node_state(node, N_POSSIBLE)
+	for_each_node(node)
 		free_mem_cgroup_per_zone_info(memcg, node);
 
 	free_percpu(memcg->stat);
@@ -4965,13 +4854,13 @@
 	struct mem_cgroup_tree_per_zone *rtpz;
 	int tmp, node, zone;
 
-	for_each_node_state(node, N_POSSIBLE) {
+	for_each_node(node) {
 		tmp = node;
 		if (!node_state(node, N_NORMAL_MEMORY))
 			tmp = -1;
 		rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp);
 		if (!rtpn)
-			return 1;
+			goto err_cleanup;
 
 		soft_limit_tree.rb_tree_per_node[node] = rtpn;
 
@@ -4982,6 +4871,16 @@
 		}
 	}
 	return 0;
+
+err_cleanup:
+	for_each_node(node) {
+		if (!soft_limit_tree.rb_tree_per_node[node])
+			break;
+		kfree(soft_limit_tree.rb_tree_per_node[node]);
+		soft_limit_tree.rb_tree_per_node[node] = NULL;
+	}
+	return 1;
+
 }
 
 static struct cgroup_subsys_state * __ref
@@ -4995,7 +4894,7 @@
 	if (!memcg)
 		return ERR_PTR(error);
 
-	for_each_node_state(node, N_POSSIBLE)
+	for_each_node(node)
 		if (alloc_mem_cgroup_per_zone_info(memcg, node))
 			goto free_out;
 
@@ -5033,7 +4932,6 @@
 		res_counter_init(&memcg->res, NULL);
 		res_counter_init(&memcg->memsw, NULL);
 	}
-	memcg->last_scanned_child = 0;
 	memcg->last_scanned_node = MAX_NUMNODES;
 	INIT_LIST_HEAD(&memcg->oom_notify);
 
@@ -5129,9 +5027,9 @@
 		}
 		ret = __mem_cgroup_try_charge(NULL,
 					GFP_KERNEL, 1, &memcg, false);
-		if (ret || !memcg)
+		if (ret)
 			/* mem_cgroup_clear_mc() will do uncharge later */
-			return -ENOMEM;
+			return ret;
 		mc.precharge++;
 	}
 	return ret;
@@ -5276,7 +5174,7 @@
 	}
 	/* There is a swap entry and a page doesn't exist or isn't charged */
 	if (ent.val && !ret &&
-			css_id(&mc.from->css) == lookup_swap_cgroup(ent)) {
+			css_id(&mc.from->css) == lookup_swap_cgroup_id(ent)) {
 		ret = MC_TARGET_SWAP;
 		if (target)
 			target->ent = ent;

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 06d3479..56080ea 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c

@@ -1557,7 +1557,7 @@
 					    page_is_file_cache(page));
 		list_add(&page->lru, &pagelist);
 		ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
-								0, true);
+							0, MIGRATE_SYNC);
 		if (ret) {
 			putback_lru_pages(&pagelist);
 			pr_info("soft offline: %#lx: migration failed %d, type %lx\n",

diff --git a/mm/memory.c b/mm/memory.c
index 829d437..5e30583 100644
--- a/mm/memory.c
+++ b/mm/memory.c

@@ -293,7 +293,7 @@
 {
 	struct mmu_gather_batch *batch;
 
-	tlb->need_flush = 1;
+	VM_BUG_ON(!tlb->need_flush);
 
 	if (tlb_fast_mode(tlb)) {
 		free_page_and_swap_cache(page);
@@ -1231,7 +1231,7 @@
 			if (next-addr != HPAGE_PMD_SIZE) {
 				VM_BUG_ON(!rwsem_is_locked(&tlb->mm->mmap_sem));
 				split_huge_page_pmd(vma->vm_mm, pmd);
-			} else if (zap_huge_pmd(tlb, vma, pmd))
+			} else if (zap_huge_pmd(tlb, vma, pmd, addr))
 				continue;
 			/* fall through */
 		}

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 2168489..6629faf 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c

@@ -809,7 +809,7 @@
 		}
 		/* this function returns # of failed pages */
 		ret = migrate_pages(&source, hotremove_migrate_alloc, 0,
-								true, true);
+							true, MIGRATE_SYNC);
 		if (ret)
 			putback_lru_pages(&source);
 	}

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index e3d58f0..06b145f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c

@@ -942,7 +942,7 @@
 
 	if (!list_empty(&pagelist)) {
 		err = migrate_pages(&pagelist, new_node_page, dest,
-								false, true);
+							false, MIGRATE_SYNC);
 		if (err)
 			putback_lru_pages(&pagelist);
 	}

diff --git a/mm/migrate.c b/mm/migrate.c
index 89ea085..9871a56 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c

@@ -216,6 +216,56 @@
 	pte_unmap_unlock(ptep, ptl);
 }
 
+#ifdef CONFIG_BLOCK
+/* Returns true if all buffers are successfully locked */
+static bool buffer_migrate_lock_buffers(struct buffer_head *head,
+							enum migrate_mode mode)
+{
+	struct buffer_head *bh = head;
+
+	/* Simple case, sync compaction */
+	if (mode != MIGRATE_ASYNC) {
+		do {
+			get_bh(bh);
+			lock_buffer(bh);
+			bh = bh->b_this_page;
+
+		} while (bh != head);
+
+		return true;
+	}
+
+	/* async case, we cannot block on lock_buffer so use trylock_buffer */
+	do {
+		get_bh(bh);
+		if (!trylock_buffer(bh)) {
+			/*
+			 * We failed to lock the buffer and cannot stall in
+			 * async migration. Release the taken locks
+			 */
+			struct buffer_head *failed_bh = bh;
+			put_bh(failed_bh);
+			bh = head;
+			while (bh != failed_bh) {
+				unlock_buffer(bh);
+				put_bh(bh);
+				bh = bh->b_this_page;
+			}
+			return false;
+		}
+
+		bh = bh->b_this_page;
+	} while (bh != head);
+	return true;
+}
+#else
+static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
+							enum migrate_mode mode)
+{
+	return true;
+}
+#endif /* CONFIG_BLOCK */
+
 /*
  * Replace the page in the mapping.
  *
@@ -225,7 +275,8 @@
  * 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
  */
 static int migrate_page_move_mapping(struct address_space *mapping,
-		struct page *newpage, struct page *page)
+		struct page *newpage, struct page *page,
+		struct buffer_head *head, enum migrate_mode mode)
 {
 	int expected_count;
 	void **pslot;
@@ -255,6 +306,20 @@
 	}
 
 	/*
+	 * In the async migration case of moving a page with buffers, lock the
+	 * buffers using trylock before the mapping is moved. If the mapping
+	 * was moved, we later failed to lock the buffers and could not move
+	 * the mapping back due to an elevated page count, we would have to
+	 * block waiting on other references to be dropped.
+	 */
+	if (mode == MIGRATE_ASYNC && head &&
+			!buffer_migrate_lock_buffers(head, mode)) {
+		page_unfreeze_refs(page, expected_count);
+		spin_unlock_irq(&mapping->tree_lock);
+		return -EAGAIN;
+	}
+
+	/*
 	 * Now we know that no one else is looking at the page.
 	 */
 	get_page(newpage);	/* add cache reference */
@@ -409,13 +474,14 @@
  * Pages are locked upon entry and exit.
  */
 int migrate_page(struct address_space *mapping,
-		struct page *newpage, struct page *page)
+		struct page *newpage, struct page *page,
+		enum migrate_mode mode)
 {
 	int rc;
 
 	BUG_ON(PageWriteback(page));	/* Writeback must be complete */
 
-	rc = migrate_page_move_mapping(mapping, newpage, page);
+	rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode);
 
 	if (rc)
 		return rc;
@@ -432,28 +498,28 @@
  * exist.
  */
 int buffer_migrate_page(struct address_space *mapping,
-		struct page *newpage, struct page *page)
+		struct page *newpage, struct page *page, enum migrate_mode mode)
 {
 	struct buffer_head *bh, *head;
 	int rc;
 
 	if (!page_has_buffers(page))
-		return migrate_page(mapping, newpage, page);
+		return migrate_page(mapping, newpage, page, mode);
 
 	head = page_buffers(page);
 
-	rc = migrate_page_move_mapping(mapping, newpage, page);
+	rc = migrate_page_move_mapping(mapping, newpage, page, head, mode);
 
 	if (rc)
 		return rc;
 
-	bh = head;
-	do {
-		get_bh(bh);
-		lock_buffer(bh);
-		bh = bh->b_this_page;
-
-	} while (bh != head);
+	/*
+	 * In the async case, migrate_page_move_mapping locked the buffers
+	 * with an IRQ-safe spinlock held. In the sync case, the buffers
+	 * need to be locked now
+	 */
+	if (mode != MIGRATE_ASYNC)
+		BUG_ON(!buffer_migrate_lock_buffers(head, mode));
 
 	ClearPagePrivate(page);
 	set_page_private(newpage, page_private(page));
@@ -530,10 +596,14 @@
  * Default handling if a filesystem does not provide a migration function.
  */
 static int fallback_migrate_page(struct address_space *mapping,
-	struct page *newpage, struct page *page)
+	struct page *newpage, struct page *page, enum migrate_mode mode)
 {
-	if (PageDirty(page))
+	if (PageDirty(page)) {
+		/* Only writeback pages in full synchronous migration */
+		if (mode != MIGRATE_SYNC)
+			return -EBUSY;
 		return writeout(mapping, page);
+	}
 
 	/*
 	 * Buffers may be managed in a filesystem specific way.
@@ -543,7 +613,7 @@
 	    !try_to_release_page(page, GFP_KERNEL))
 		return -EAGAIN;
 
-	return migrate_page(mapping, newpage, page);
+	return migrate_page(mapping, newpage, page, mode);
 }
 
 /*
@@ -558,7 +628,7 @@
  *  == 0 - success
  */
 static int move_to_new_page(struct page *newpage, struct page *page,
-					int remap_swapcache, bool sync)
+				int remap_swapcache, enum migrate_mode mode)
 {
 	struct address_space *mapping;
 	int rc;
@@ -579,29 +649,18 @@
 
 	mapping = page_mapping(page);
 	if (!mapping)
-		rc = migrate_page(mapping, newpage, page);
-	else {
+		rc = migrate_page(mapping, newpage, page, mode);
+	else if (mapping->a_ops->migratepage)
 		/*
-		 * Do not writeback pages if !sync and migratepage is
-		 * not pointing to migrate_page() which is nonblocking
-		 * (swapcache/tmpfs uses migratepage = migrate_page).
+		 * Most pages have a mapping and most filesystems provide a
+		 * migratepage callback. Anonymous pages are part of swap
+		 * space which also has its own migratepage callback. This
+		 * is the most common path for page migration.
 		 */
-		if (PageDirty(page) && !sync &&
-		    mapping->a_ops->migratepage != migrate_page)
-			rc = -EBUSY;
-		else if (mapping->a_ops->migratepage)
-			/*
-			 * Most pages have a mapping and most filesystems
-			 * should provide a migration function. Anonymous
-			 * pages are part of swap space which also has its
-			 * own migration function. This is the most common
-			 * path for page migration.
-			 */
-			rc = mapping->a_ops->migratepage(mapping,
-							newpage, page);
-		else
-			rc = fallback_migrate_page(mapping, newpage, page);
-	}
+		rc = mapping->a_ops->migratepage(mapping,
+						newpage, page, mode);
+	else
+		rc = fallback_migrate_page(mapping, newpage, page, mode);
 
 	if (rc) {
 		newpage->mapping = NULL;
@@ -616,7 +675,7 @@
 }
 
 static int __unmap_and_move(struct page *page, struct page *newpage,
-				int force, bool offlining, bool sync)
+			int force, bool offlining, enum migrate_mode mode)
 {
 	int rc = -EAGAIN;
 	int remap_swapcache = 1;
@@ -625,7 +684,7 @@
 	struct anon_vma *anon_vma = NULL;
 
 	if (!trylock_page(page)) {
-		if (!force || !sync)
+		if (!force || mode == MIGRATE_ASYNC)
 			goto out;
 
 		/*
@@ -671,10 +730,12 @@
 
 	if (PageWriteback(page)) {
 		/*
-		 * For !sync, there is no point retrying as the retry loop
-		 * is expected to be too short for PageWriteback to be cleared
+		 * Only in the case of a full syncronous migration is it
+		 * necessary to wait for PageWriteback. In the async case,
+		 * the retry loop is too short and in the sync-light case,
+		 * the overhead of stalling is too much
 		 */
-		if (!sync) {
+		if (mode != MIGRATE_SYNC) {
 			rc = -EBUSY;
 			goto uncharge;
 		}
@@ -745,7 +806,7 @@
 
 skip_unmap:
 	if (!page_mapped(page))
-		rc = move_to_new_page(newpage, page, remap_swapcache, sync);
+		rc = move_to_new_page(newpage, page, remap_swapcache, mode);
 
 	if (rc && remap_swapcache)
 		remove_migration_ptes(page, page);
@@ -768,7 +829,8 @@
  * to the newly allocated page in newpage.
  */
 static int unmap_and_move(new_page_t get_new_page, unsigned long private,
-			struct page *page, int force, bool offlining, bool sync)
+			struct page *page, int force, bool offlining,
+			enum migrate_mode mode)
 {
 	int rc = 0;
 	int *result = NULL;
@@ -777,6 +839,8 @@
 	if (!newpage)
 		return -ENOMEM;
 
+	mem_cgroup_reset_owner(newpage);
+
 	if (page_count(page) == 1) {
 		/* page was freed from under us. So we are done. */
 		goto out;
@@ -786,7 +850,7 @@
 		if (unlikely(split_huge_page(page)))
 			goto out;
 
-	rc = __unmap_and_move(page, newpage, force, offlining, sync);
+	rc = __unmap_and_move(page, newpage, force, offlining, mode);
 out:
 	if (rc != -EAGAIN) {
 		/*
@@ -834,7 +898,8 @@
  */
 static int unmap_and_move_huge_page(new_page_t get_new_page,
 				unsigned long private, struct page *hpage,
-				int force, bool offlining, bool sync)
+				int force, bool offlining,
+				enum migrate_mode mode)
 {
 	int rc = 0;
 	int *result = NULL;
@@ -847,7 +912,7 @@
 	rc = -EAGAIN;
 
 	if (!trylock_page(hpage)) {
-		if (!force || !sync)
+		if (!force || mode != MIGRATE_SYNC)
 			goto out;
 		lock_page(hpage);
 	}
@@ -858,7 +923,7 @@
 	try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
 
 	if (!page_mapped(hpage))
-		rc = move_to_new_page(new_hpage, hpage, 1, sync);
+		rc = move_to_new_page(new_hpage, hpage, 1, mode);
 
 	if (rc)
 		remove_migration_ptes(hpage, hpage);
@@ -901,7 +966,7 @@
  */
 int migrate_pages(struct list_head *from,
 		new_page_t get_new_page, unsigned long private, bool offlining,
-		bool sync)
+		enum migrate_mode mode)
 {
 	int retry = 1;
 	int nr_failed = 0;
@@ -922,7 +987,7 @@
 
 			rc = unmap_and_move(get_new_page, private,
 						page, pass > 2, offlining,
-						sync);
+						mode);
 
 			switch(rc) {
 			case -ENOMEM:
@@ -952,7 +1017,7 @@
 
 int migrate_huge_pages(struct list_head *from,
 		new_page_t get_new_page, unsigned long private, bool offlining,
-		bool sync)
+		enum migrate_mode mode)
 {
 	int retry = 1;
 	int nr_failed = 0;
@@ -969,7 +1034,7 @@
 
 			rc = unmap_and_move_huge_page(get_new_page,
 					private, page, pass > 2, offlining,
-					sync);
+					mode);
 
 			switch(rc) {
 			case -ENOMEM:
@@ -1098,7 +1163,7 @@
 	err = 0;
 	if (!list_empty(&pagelist)) {
 		err = migrate_pages(&pagelist, new_page_node,
-				(unsigned long)pm, 0, true);
+				(unsigned long)pm, 0, MIGRATE_SYNC);
 		if (err)
 			putback_lru_pages(&pagelist);
 	}

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 7c122faa..2958fd8 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c

@@ -152,7 +152,7 @@
 
 /* return true if the task is not adequate as candidate victim task. */
 static bool oom_unkillable_task(struct task_struct *p,
-		const struct mem_cgroup *mem, const nodemask_t *nodemask)
+		const struct mem_cgroup *memcg, const nodemask_t *nodemask)
 {
 	if (is_global_init(p))
 		return true;
@@ -160,7 +160,7 @@
 		return true;
 
 	/* When mem_cgroup_out_of_memory() and p is not member of the group */
-	if (mem && !task_in_mem_cgroup(p, mem))
+	if (memcg && !task_in_mem_cgroup(p, memcg))
 		return true;
 
 	/* p may not have freeable memory in nodemask */
@@ -179,12 +179,12 @@
  * predictable as possible.  The goal is to return the highest value for the
  * task consuming the most memory to avoid subsequent oom failures.
  */
-unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
+unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
 		      const nodemask_t *nodemask, unsigned long totalpages)
 {
 	long points;
 
-	if (oom_unkillable_task(p, mem, nodemask))
+	if (oom_unkillable_task(p, memcg, nodemask))
 		return 0;
 
 	p = find_lock_task_mm(p);
@@ -308,7 +308,7 @@
  * (not docbooked, we don't want this one cluttering up the manual)
  */
 static struct task_struct *select_bad_process(unsigned int *ppoints,
-		unsigned long totalpages, struct mem_cgroup *mem,
+		unsigned long totalpages, struct mem_cgroup *memcg,
 		const nodemask_t *nodemask)
 {
 	struct task_struct *g, *p;
@@ -320,7 +320,7 @@
 
 		if (p->exit_state)
 			continue;
-		if (oom_unkillable_task(p, mem, nodemask))
+		if (oom_unkillable_task(p, memcg, nodemask))
 			continue;
 
 		/*
@@ -364,7 +364,7 @@
 			}
 		}
 
-		points = oom_badness(p, mem, nodemask, totalpages);
+		points = oom_badness(p, memcg, nodemask, totalpages);
 		if (points > *ppoints) {
 			chosen = p;
 			*ppoints = points;
@@ -387,14 +387,14 @@
  *
  * Call with tasklist_lock read-locked.
  */
-static void dump_tasks(const struct mem_cgroup *mem, const nodemask_t *nodemask)
+static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemask)
 {
 	struct task_struct *p;
 	struct task_struct *task;
 
 	pr_info("[ pid ]   uid  tgid total_vm      rss cpu oom_adj oom_score_adj name\n");
 	for_each_process(p) {
-		if (oom_unkillable_task(p, mem, nodemask))
+		if (oom_unkillable_task(p, memcg, nodemask))
 			continue;
 
 		task = find_lock_task_mm(p);
@@ -417,7 +417,7 @@
 }
 
 static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
-			struct mem_cgroup *mem, const nodemask_t *nodemask)
+			struct mem_cgroup *memcg, const nodemask_t *nodemask)
 {
 	task_lock(current);
 	pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, "
@@ -427,14 +427,14 @@
 	cpuset_print_task_mems_allowed(current);
 	task_unlock(current);
 	dump_stack();
-	mem_cgroup_print_oom_info(mem, p);
+	mem_cgroup_print_oom_info(memcg, p);
 	show_mem(SHOW_MEM_FILTER_NODES);
 	if (sysctl_oom_dump_tasks)
-		dump_tasks(mem, nodemask);
+		dump_tasks(memcg, nodemask);
 }
 
 #define K(x) ((x) << (PAGE_SHIFT-10))
-static int oom_kill_task(struct task_struct *p, struct mem_cgroup *mem)
+static int oom_kill_task(struct task_struct *p)
 {
 	struct task_struct *q;
 	struct mm_struct *mm;
@@ -484,7 +484,7 @@
 
 static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 			    unsigned int points, unsigned long totalpages,
-			    struct mem_cgroup *mem, nodemask_t *nodemask,
+			    struct mem_cgroup *memcg, nodemask_t *nodemask,
 			    const char *message)
 {
 	struct task_struct *victim = p;
@@ -493,7 +493,7 @@
 	unsigned int victim_points = 0;
 
 	if (printk_ratelimit())
-		dump_header(p, gfp_mask, order, mem, nodemask);
+		dump_header(p, gfp_mask, order, memcg, nodemask);
 
 	/*
 	 * If the task is already exiting, don't alarm the sysadmin or kill
@@ -524,7 +524,7 @@
 			/*
 			 * oom_badness() returns 0 if the thread is unkillable
 			 */
-			child_points = oom_badness(child, mem, nodemask,
+			child_points = oom_badness(child, memcg, nodemask,
 								totalpages);
 			if (child_points > victim_points) {
 				victim = child;
@@ -533,7 +533,7 @@
 		}
 	} while_each_thread(p, t);
 
-	return oom_kill_task(victim, mem);
+	return oom_kill_task(victim);
 }
 
 /*
@@ -561,7 +561,7 @@
 }
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
-void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
+void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask)
 {
 	unsigned long limit;
 	unsigned int points = 0;
@@ -578,14 +578,14 @@
 	}
 
 	check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, 0, NULL);
-	limit = mem_cgroup_get_limit(mem) >> PAGE_SHIFT;
+	limit = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT;
 	read_lock(&tasklist_lock);
 retry:
-	p = select_bad_process(&points, limit, mem, NULL);
+	p = select_bad_process(&points, limit, memcg, NULL);
 	if (!p || PTR_ERR(p) == -1UL)
 		goto out;
 
-	if (oom_kill_process(p, gfp_mask, 0, points, limit, mem, NULL,
+	if (oom_kill_process(p, gfp_mask, 0, points, limit, memcg, NULL,
 				"Memory cgroup out of memory"))
 		goto retry;
 out:

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 794e671..0027d8f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c

@@ -1981,14 +1981,20 @@
 __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	struct zonelist *zonelist, enum zone_type high_zoneidx,
 	nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
-	int migratetype, unsigned long *did_some_progress,
-	bool sync_migration)
+	int migratetype, bool sync_migration,
+	bool *deferred_compaction,
+	unsigned long *did_some_progress)
 {
 	struct page *page;
 
-	if (!order || compaction_deferred(preferred_zone))
+	if (!order)
 		return NULL;
 
+	if (compaction_deferred(preferred_zone)) {
+		*deferred_compaction = true;
+		return NULL;
+	}
+
 	current->flags |= PF_MEMALLOC;
 	*did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
 						nodemask, sync_migration);
@@ -2016,7 +2022,13 @@
 		 * but not enough to satisfy watermarks.
 		 */
 		count_vm_event(COMPACTFAIL);
-		defer_compaction(preferred_zone);
+
+		/*
+		 * As async compaction considers a subset of pageblocks, only
+		 * defer if the failure was a sync compaction failure.
+		 */
+		if (sync_migration)
+			defer_compaction(preferred_zone);
 
 		cond_resched();
 	}
@@ -2028,8 +2040,9 @@
 __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	struct zonelist *zonelist, enum zone_type high_zoneidx,
 	nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
-	int migratetype, unsigned long *did_some_progress,
-	bool sync_migration)
+	int migratetype, bool sync_migration,
+	bool *deferred_compaction,
+	unsigned long *did_some_progress)
 {
 	return NULL;
 }
@@ -2179,6 +2192,7 @@
 	unsigned long pages_reclaimed = 0;
 	unsigned long did_some_progress;
 	bool sync_migration = false;
+	bool deferred_compaction = false;
 
 	/*
 	 * In the slowpath, we sanity check order to avoid ever trying to
@@ -2259,12 +2273,22 @@
 					zonelist, high_zoneidx,
 					nodemask,
 					alloc_flags, preferred_zone,
-					migratetype, &did_some_progress,
-					sync_migration);
+					migratetype, sync_migration,
+					&deferred_compaction,
+					&did_some_progress);
 	if (page)
 		goto got_pg;
 	sync_migration = true;
 
+	/*
+	 * If compaction is deferred for high-order allocations, it is because
+	 * sync compaction recently failed. In this is the case and the caller
+	 * has requested the system not be heavily disrupted, fail the
+	 * allocation now instead of entering direct reclaim
+	 */
+	if (deferred_compaction && (gfp_mask & __GFP_NO_KSWAPD))
+		goto nopage;
+
 	/* Try direct reclaim and then allocating */
 	page = __alloc_pages_direct_reclaim(gfp_mask, order,
 					zonelist, high_zoneidx,
@@ -2328,8 +2352,9 @@
 					zonelist, high_zoneidx,
 					nodemask,
 					alloc_flags, preferred_zone,
-					migratetype, &did_some_progress,
-					sync_migration);
+					migratetype, sync_migration,
+					&deferred_compaction,
+					&did_some_progress);
 		if (page)
 			goto got_pg;
 	}
@@ -4237,7 +4262,7 @@
 	for (j = 0; j < MAX_NR_ZONES; j++) {
 		struct zone *zone = pgdat->node_zones + j;
 		unsigned long size, realsize, memmap_pages;
-		enum lru_list l;
+		enum lru_list lru;
 
 		size = zone_spanned_pages_in_node(nid, j, zones_size);
 		realsize = size - zone_absent_pages_in_node(nid, j,
@@ -4287,8 +4312,8 @@
 		zone->zone_pgdat = pgdat;
 
 		zone_pcp_init(zone);
-		for_each_lru(l)
-			INIT_LIST_HEAD(&zone->lru[l].list);
+		for_each_lru(lru)
+			INIT_LIST_HEAD(&zone->lruvec.lists[lru]);
 		zone->reclaim_stat.recent_rotated[0] = 0;
 		zone->reclaim_stat.recent_rotated[1] = 0;
 		zone->reclaim_stat.recent_scanned[0] = 0;
@@ -4642,8 +4667,10 @@
 
 	for (zone_type = 0; zone_type <= ZONE_NORMAL; zone_type++) {
 		struct zone *zone = &pgdat->node_zones[zone_type];
-		if (zone->present_pages)
+		if (zone->present_pages) {
 			node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY);
+			break;
+		}
 	}
 #endif
 }

diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 2d123f9..de1616a 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c

@@ -11,13 +11,6 @@
 #include <linux/swapops.h>
 #include <linux/kmemleak.h>
 
-static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id)
-{
-	pc->flags = 0;
-	set_page_cgroup_array_id(pc, id);
-	pc->mem_cgroup = NULL;
-	INIT_LIST_HEAD(&pc->lru);
-}
 static unsigned long total_usage;
 
 #if !defined(CONFIG_SPARSEMEM)
@@ -35,35 +28,27 @@
 	struct page_cgroup *base;
 
 	base = NODE_DATA(page_to_nid(page))->node_page_cgroup;
+#ifdef CONFIG_DEBUG_VM
+	/*
+	 * The sanity checks the page allocator does upon freeing a
+	 * page can reach here before the page_cgroup arrays are
+	 * allocated when feeding a range of pages to the allocator
+	 * for the first time during bootup or memory hotplug.
+	 */
 	if (unlikely(!base))
 		return NULL;
-
+#endif
 	offset = pfn - NODE_DATA(page_to_nid(page))->node_start_pfn;
 	return base + offset;
 }
 
-struct page *lookup_cgroup_page(struct page_cgroup *pc)
-{
-	unsigned long pfn;
-	struct page *page;
-	pg_data_t *pgdat;
-
-	pgdat = NODE_DATA(page_cgroup_array_id(pc));
-	pfn = pc - pgdat->node_page_cgroup + pgdat->node_start_pfn;
-	page = pfn_to_page(pfn);
-	VM_BUG_ON(pc != lookup_page_cgroup(page));
-	return page;
-}
-
 static int __init alloc_node_page_cgroup(int nid)
 {
-	struct page_cgroup *base, *pc;
+	struct page_cgroup *base;
 	unsigned long table_size;
-	unsigned long start_pfn, nr_pages, index;
+	unsigned long nr_pages;
 
-	start_pfn = NODE_DATA(nid)->node_start_pfn;
 	nr_pages = NODE_DATA(nid)->node_spanned_pages;
-
 	if (!nr_pages)
 		return 0;
 
@@ -73,10 +58,6 @@
 			table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
 	if (!base)
 		return -ENOMEM;
-	for (index = 0; index < nr_pages; index++) {
-		pc = base + index;
-		init_page_cgroup(pc, nid);
-	}
 	NODE_DATA(nid)->node_page_cgroup = base;
 	total_usage += table_size;
 	return 0;
@@ -111,29 +92,23 @@
 {
 	unsigned long pfn = page_to_pfn(page);
 	struct mem_section *section = __pfn_to_section(pfn);
-
+#ifdef CONFIG_DEBUG_VM
+	/*
+	 * The sanity checks the page allocator does upon freeing a
+	 * page can reach here before the page_cgroup arrays are
+	 * allocated when feeding a range of pages to the allocator
+	 * for the first time during bootup or memory hotplug.
+	 */
 	if (!section->page_cgroup)
 		return NULL;
+#endif
 	return section->page_cgroup + pfn;
 }
 
-struct page *lookup_cgroup_page(struct page_cgroup *pc)
-{
-	struct mem_section *section;
-	struct page *page;
-	unsigned long nr;
-
-	nr = page_cgroup_array_id(pc);
-	section = __nr_to_section(nr);
-	page = pfn_to_page(pc - section->page_cgroup);
-	VM_BUG_ON(pc != lookup_page_cgroup(page));
-	return page;
-}
-
 static void *__meminit alloc_page_cgroup(size_t size, int nid)
 {
+	gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN;
 	void *addr = NULL;
-	gfp_t flags = GFP_KERNEL | __GFP_NOWARN;
 
 	addr = alloc_pages_exact_nid(nid, size, flags);
 	if (addr) {
@@ -142,39 +117,20 @@
 	}
 
 	if (node_state(nid, N_HIGH_MEMORY))
-		addr = vmalloc_node(size, nid);
+		addr = vzalloc_node(size, nid);
 	else
-		addr = vmalloc(size);
+		addr = vzalloc(size);
 
 	return addr;
 }
 
-#ifdef CONFIG_MEMORY_HOTPLUG
-static void free_page_cgroup(void *addr)
-{
-	if (is_vmalloc_addr(addr)) {
-		vfree(addr);
-	} else {
-		struct page *page = virt_to_page(addr);
-		size_t table_size =
-			sizeof(struct page_cgroup) * PAGES_PER_SECTION;
-
-		BUG_ON(PageReserved(page));
-		free_pages_exact(addr, table_size);
-	}
-}
-#endif
-
 static int __meminit init_section_page_cgroup(unsigned long pfn, int nid)
 {
-	struct page_cgroup *base, *pc;
 	struct mem_section *section;
+	struct page_cgroup *base;
 	unsigned long table_size;
-	unsigned long nr;
-	int index;
 
-	nr = pfn_to_section_nr(pfn);
-	section = __nr_to_section(nr);
+	section = __pfn_to_section(pfn);
 
 	if (section->page_cgroup)
 		return 0;
@@ -194,10 +150,6 @@
 		return -ENOMEM;
 	}
 
-	for (index = 0; index < PAGES_PER_SECTION; index++) {
-		pc = base + index;
-		init_page_cgroup(pc, nr);
-	}
 	/*
 	 * The passed "pfn" may not be aligned to SECTION.  For the calculation
 	 * we need to apply a mask.
@@ -208,6 +160,20 @@
 	return 0;
 }
 #ifdef CONFIG_MEMORY_HOTPLUG
+static void free_page_cgroup(void *addr)
+{
+	if (is_vmalloc_addr(addr)) {
+		vfree(addr);
+	} else {
+		struct page *page = virt_to_page(addr);
+		size_t table_size =
+			sizeof(struct page_cgroup) * PAGES_PER_SECTION;
+
+		BUG_ON(PageReserved(page));
+		free_pages_exact(addr, table_size);
+	}
+}
+
 void __free_page_cgroup(unsigned long pfn)
 {
 	struct mem_section *ms;
@@ -366,7 +332,6 @@
 	unsigned short		id;
 };
 #define SC_PER_PAGE	(PAGE_SIZE/sizeof(struct swap_cgroup))
-#define SC_POS_MASK	(SC_PER_PAGE - 1)
 
 /*
  * SwapCgroup implements "lookup" and "exchange" operations.
@@ -408,6 +373,21 @@
 	return -ENOMEM;
 }
 
+static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent,
+					struct swap_cgroup_ctrl **ctrlp)
+{
+	pgoff_t offset = swp_offset(ent);
+	struct swap_cgroup_ctrl *ctrl;
+	struct page *mappage;
+
+	ctrl = &swap_cgroup_ctrl[swp_type(ent)];
+	if (ctrlp)
+		*ctrlp = ctrl;
+
+	mappage = ctrl->map[offset / SC_PER_PAGE];
+	return page_address(mappage) + offset % SC_PER_PAGE;
+}
+
 /**
  * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry.
  * @end: swap entry to be cmpxchged
@@ -420,21 +400,13 @@
 unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
 					unsigned short old, unsigned short new)
 {
-	int type = swp_type(ent);
-	unsigned long offset = swp_offset(ent);
-	unsigned long idx = offset / SC_PER_PAGE;
-	unsigned long pos = offset & SC_POS_MASK;
 	struct swap_cgroup_ctrl *ctrl;
-	struct page *mappage;
 	struct swap_cgroup *sc;
 	unsigned long flags;
 	unsigned short retval;
 
-	ctrl = &swap_cgroup_ctrl[type];
+	sc = lookup_swap_cgroup(ent, &ctrl);
 
-	mappage = ctrl->map[idx];
-	sc = page_address(mappage);
-	sc += pos;
 	spin_lock_irqsave(&ctrl->lock, flags);
 	retval = sc->id;
 	if (retval == old)
@@ -455,21 +427,13 @@
  */
 unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
 {
-	int type = swp_type(ent);
-	unsigned long offset = swp_offset(ent);
-	unsigned long idx = offset / SC_PER_PAGE;
-	unsigned long pos = offset & SC_POS_MASK;
 	struct swap_cgroup_ctrl *ctrl;
-	struct page *mappage;
 	struct swap_cgroup *sc;
 	unsigned short old;
 	unsigned long flags;
 
-	ctrl = &swap_cgroup_ctrl[type];
+	sc = lookup_swap_cgroup(ent, &ctrl);
 
-	mappage = ctrl->map[idx];
-	sc = page_address(mappage);
-	sc += pos;
 	spin_lock_irqsave(&ctrl->lock, flags);
 	old = sc->id;
 	sc->id = id;
@@ -479,28 +443,14 @@
 }
 
 /**
- * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry
+ * lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry
  * @ent: swap entry to be looked up.
  *
  * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
  */
-unsigned short lookup_swap_cgroup(swp_entry_t ent)
+unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
 {
-	int type = swp_type(ent);
-	unsigned long offset = swp_offset(ent);
-	unsigned long idx = offset / SC_PER_PAGE;
-	unsigned long pos = offset & SC_POS_MASK;
-	struct swap_cgroup_ctrl *ctrl;
-	struct page *mappage;
-	struct swap_cgroup *sc;
-	unsigned short ret;
-
-	ctrl = &swap_cgroup_ctrl[type];
-	mappage = ctrl->map[idx];
-	sc = page_address(mappage);
-	sc += pos;
-	ret = sc->id;
-	return ret;
+	return lookup_swap_cgroup(ent, NULL)->id;
 }
 
 int swap_cgroup_swapon(int type, unsigned long max_pages)

diff --git a/mm/rmap.c b/mm/rmap.c
index a2e5ce1..c8454e0 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c

@@ -773,7 +773,7 @@
 }
 
 static int page_referenced_anon(struct page *page,
-				struct mem_cgroup *mem_cont,
+				struct mem_cgroup *memcg,
 				unsigned long *vm_flags)
 {
 	unsigned int mapcount;
@@ -796,7 +796,7 @@
 		 * counting on behalf of references from different
 		 * cgroups
 		 */
-		if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
+		if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
 			continue;
 		referenced += page_referenced_one(page, vma, address,
 						  &mapcount, vm_flags);
@@ -811,7 +811,7 @@
 /**
  * page_referenced_file - referenced check for object-based rmap
  * @page: the page we're checking references on.
- * @mem_cont: target memory controller
+ * @memcg: target memory control group
  * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
  *
  * For an object-based mapped page, find all the places it is mapped and
@@ -822,7 +822,7 @@
  * This function is only called from page_referenced for object-based pages.
  */
 static int page_referenced_file(struct page *page,
-				struct mem_cgroup *mem_cont,
+				struct mem_cgroup *memcg,
 				unsigned long *vm_flags)
 {
 	unsigned int mapcount;
@@ -864,7 +864,7 @@
 		 * counting on behalf of references from different
 		 * cgroups
 		 */
-		if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
+		if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
 			continue;
 		referenced += page_referenced_one(page, vma, address,
 						  &mapcount, vm_flags);
@@ -880,7 +880,7 @@
  * page_referenced - test if the page was referenced
  * @page: the page to test
  * @is_locked: caller holds lock on the page
- * @mem_cont: target memory controller
+ * @memcg: target memory cgroup
  * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
  *
  * Quick test_and_clear_referenced for all mappings to a page,
@@ -888,7 +888,7 @@
  */
 int page_referenced(struct page *page,
 		    int is_locked,
-		    struct mem_cgroup *mem_cont,
+		    struct mem_cgroup *memcg,
 		    unsigned long *vm_flags)
 {
 	int referenced = 0;
@@ -904,13 +904,13 @@
 			}
 		}
 		if (unlikely(PageKsm(page)))
-			referenced += page_referenced_ksm(page, mem_cont,
+			referenced += page_referenced_ksm(page, memcg,
 								vm_flags);
 		else if (PageAnon(page))
-			referenced += page_referenced_anon(page, mem_cont,
+			referenced += page_referenced_anon(page, memcg,
 								vm_flags);
 		else if (page->mapping)
-			referenced += page_referenced_file(page, mem_cont,
+			referenced += page_referenced_file(page, memcg,
 								vm_flags);
 		if (we_locked)
 			unlock_page(page);

diff --git a/mm/slub.c b/mm/slub.c
index 5d37b5e..4907563 100644
--- a/mm/slub.c
+++ b/mm/slub.c

@@ -366,7 +366,8 @@
 		const char *n)
 {
 	VM_BUG_ON(!irqs_disabled());
-#ifdef CONFIG_CMPXCHG_DOUBLE
+#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
+    defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
 	if (s->flags & __CMPXCHG_DOUBLE) {
 		if (cmpxchg_double(&page->freelist, &page->counters,
 			freelist_old, counters_old,
@@ -400,7 +401,8 @@
 		void *freelist_new, unsigned long counters_new,
 		const char *n)
 {
-#ifdef CONFIG_CMPXCHG_DOUBLE
+#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
+    defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
 	if (s->flags & __CMPXCHG_DOUBLE) {
 		if (cmpxchg_double(&page->freelist, &page->counters,
 			freelist_old, counters_old,
@@ -3014,7 +3016,8 @@
 		}
 	}
 
-#ifdef CONFIG_CMPXCHG_DOUBLE
+#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
+    defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
 	if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0)
 		/* Enable fast mode */
 		s->flags |= __CMPXCHG_DOUBLE;

diff --git a/mm/swap.c b/mm/swap.c
index 67a09a6..b0f529b 100644
--- a/mm/swap.c
+++ b/mm/swap.c

@@ -23,7 +23,6 @@
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/mm_inline.h>
-#include <linux/buffer_head.h>	/* for try_to_release_page() */
 #include <linux/percpu_counter.h>
 #include <linux/percpu.h>
 #include <linux/cpu.h>
@@ -54,7 +53,7 @@
 		spin_lock_irqsave(&zone->lru_lock, flags);
 		VM_BUG_ON(!PageLRU(page));
 		__ClearPageLRU(page);
-		del_page_from_lru(zone, page);
+		del_page_from_lru_list(zone, page, page_off_lru(page));
 		spin_unlock_irqrestore(&zone->lru_lock, flags);
 	}
 }
@@ -232,12 +231,14 @@
 static void pagevec_move_tail_fn(struct page *page, void *arg)
 {
 	int *pgmoved = arg;
-	struct zone *zone = page_zone(page);
 
 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
 		enum lru_list lru = page_lru_base_type(page);
-		list_move_tail(&page->lru, &zone->lru[lru].list);
-		mem_cgroup_rotate_reclaimable_page(page);
+		struct lruvec *lruvec;
+
+		lruvec = mem_cgroup_lru_move_lists(page_zone(page),
+						   page, lru, lru);
+		list_move_tail(&page->lru, &lruvec->lists[lru]);
 		(*pgmoved)++;
 	}
 }
@@ -368,7 +369,6 @@
 		SetPageReferenced(page);
 	}
 }
-
 EXPORT_SYMBOL(mark_page_accessed);
 
 void __lru_cache_add(struct page *page, enum lru_list lru)
@@ -377,7 +377,7 @@
 
 	page_cache_get(page);
 	if (!pagevec_add(pvec, page))
-		____pagevec_lru_add(pvec, lru);
+		__pagevec_lru_add(pvec, lru);
 	put_cpu_var(lru_add_pvecs);
 }
 EXPORT_SYMBOL(__lru_cache_add);
@@ -476,12 +476,13 @@
 		 */
 		SetPageReclaim(page);
 	} else {
+		struct lruvec *lruvec;
 		/*
 		 * The page's writeback ends up during pagevec
 		 * We moves tha page into tail of inactive.
 		 */
-		list_move_tail(&page->lru, &zone->lru[lru].list);
-		mem_cgroup_rotate_reclaimable_page(page);
+		lruvec = mem_cgroup_lru_move_lists(zone, page, lru, lru);
+		list_move_tail(&page->lru, &lruvec->lists[lru]);
 		__count_vm_event(PGROTATED);
 	}
 
@@ -504,7 +505,7 @@
 	for_each_lru(lru) {
 		pvec = &pvecs[lru - LRU_BASE];
 		if (pagevec_count(pvec))
-			____pagevec_lru_add(pvec, lru);
+			__pagevec_lru_add(pvec, lru);
 	}
 
 	pvec = &per_cpu(lru_rotate_pvecs, cpu);
@@ -616,7 +617,7 @@
 			}
 			VM_BUG_ON(!PageLRU(page));
 			__ClearPageLRU(page);
-			del_page_from_lru(zone, page);
+			del_page_from_lru_list(zone, page, page_off_lru(page));
 		}
 
 		list_add(&page->lru, &pages_to_free);
@@ -644,9 +645,9 @@
 	release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
 	pagevec_reinit(pvec);
 }
-
 EXPORT_SYMBOL(__pagevec_release);
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 /* used by __split_huge_page_refcount() */
 void lru_add_page_tail(struct zone* zone,
 		       struct page *page, struct page *page_tail)
@@ -654,7 +655,6 @@
 	int active;
 	enum lru_list lru;
 	const int file = 0;
-	struct list_head *head;
 
 	VM_BUG_ON(!PageHead(page));
 	VM_BUG_ON(PageCompound(page_tail));
@@ -673,18 +673,30 @@
 			lru = LRU_INACTIVE_ANON;
 		}
 		update_page_reclaim_stat(zone, page_tail, file, active);
-		if (likely(PageLRU(page)))
-			head = page->lru.prev;
-		else
-			head = &zone->lru[lru].list;
-		__add_page_to_lru_list(zone, page_tail, lru, head);
 	} else {
 		SetPageUnevictable(page_tail);
-		add_page_to_lru_list(zone, page_tail, LRU_UNEVICTABLE);
+		lru = LRU_UNEVICTABLE;
+	}
+
+	if (likely(PageLRU(page)))
+		list_add_tail(&page_tail->lru, &page->lru);
+	else {
+		struct list_head *list_head;
+		/*
+		 * Head page has not yet been counted, as an hpage,
+		 * so we must account for each subpage individually.
+		 *
+		 * Use the standard add function to put page_tail on the list,
+		 * but then correct its position so they all end up in order.
+		 */
+		add_page_to_lru_list(zone, page_tail, lru);
+		list_head = page_tail->lru.prev;
+		list_move_tail(&page_tail->lru, list_head);
 	}
 }
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-static void ____pagevec_lru_add_fn(struct page *page, void *arg)
+static void __pagevec_lru_add_fn(struct page *page, void *arg)
 {
 	enum lru_list lru = (enum lru_list)arg;
 	struct zone *zone = page_zone(page);
@@ -706,32 +718,13 @@
  * Add the passed pages to the LRU, then drop the caller's refcount
  * on them.  Reinitialises the caller's pagevec.
  */
-void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
+void __pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
 {
 	VM_BUG_ON(is_unevictable_lru(lru));
 
-	pagevec_lru_move_fn(pvec, ____pagevec_lru_add_fn, (void *)lru);
+	pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn, (void *)lru);
 }
-
-EXPORT_SYMBOL(____pagevec_lru_add);
-
-/*
- * Try to drop buffers from the pages in a pagevec
- */
-void pagevec_strip(struct pagevec *pvec)
-{
-	int i;
-
-	for (i = 0; i < pagevec_count(pvec); i++) {
-		struct page *page = pvec->pages[i];
-
-		if (page_has_private(page) && trylock_page(page)) {
-			if (page_has_private(page))
-				try_to_release_page(page, 0);
-			unlock_page(page);
-		}
-	}
-}
+EXPORT_SYMBOL(__pagevec_lru_add);
 
 /**
  * pagevec_lookup - gang pagecache lookup
@@ -755,7 +748,6 @@
 	pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages);
 	return pagevec_count(pvec);
 }
-
 EXPORT_SYMBOL(pagevec_lookup);
 
 unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping,
@@ -765,7 +757,6 @@
 					nr_pages, pvec->pages);
 	return pagevec_count(pvec);
 }
-
 EXPORT_SYMBOL(pagevec_lookup_tag);
 
 /*

diff --git a/mm/swap_state.c b/mm/swap_state.c
index ea6b32d..470038a 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c

@@ -300,6 +300,16 @@
 			new_page = alloc_page_vma(gfp_mask, vma, addr);
 			if (!new_page)
 				break;		/* Out of memory */
+			/*
+			 * The memcg-specific accounting when moving
+			 * pages around the LRU lists relies on the
+			 * page's owner (memcg) to be valid.  Usually,
+			 * pages are assigned to a new owner before
+			 * being put on the LRU list, but since this
+			 * is not the case here, the stale owner from
+			 * a previous allocation cycle must be reset.
+			 */
+			mem_cgroup_reset_owner(new_page);
 		}
 
 		/*

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 9520592..d999f09 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c

@@ -847,12 +847,13 @@
 static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 		unsigned long addr, swp_entry_t entry, struct page *page)
 {
-	struct mem_cgroup *ptr;
+	struct mem_cgroup *memcg;
 	spinlock_t *ptl;
 	pte_t *pte;
 	int ret = 1;
 
-	if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, GFP_KERNEL, &ptr)) {
+	if (mem_cgroup_try_charge_swapin(vma->vm_mm, page,
+					 GFP_KERNEL, &memcg)) {
 		ret = -ENOMEM;
 		goto out_nolock;
 	}
@@ -860,7 +861,7 @@
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 	if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) {
 		if (ret > 0)
-			mem_cgroup_cancel_charge_swapin(ptr);
+			mem_cgroup_cancel_charge_swapin(memcg);
 		ret = 0;
 		goto out;
 	}
@@ -871,7 +872,7 @@
 	set_pte_at(vma->vm_mm, addr, pte,
 		   pte_mkold(mk_pte(page, vma->vm_page_prot)));
 	page_add_anon_rmap(page, vma, addr);
-	mem_cgroup_commit_charge_swapin(page, ptr);
+	mem_cgroup_commit_charge_swapin(page, memcg);
 	swap_free(entry);
 	/*
 	 * Move the page to the active list so it is not

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 877ca04..86ce9a5 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c

@@ -2378,7 +2378,7 @@
 	vms = kzalloc(sizeof(vms[0]) * nr_vms, GFP_KERNEL);
 	vas = kzalloc(sizeof(vas[0]) * nr_vms, GFP_KERNEL);
 	if (!vas || !vms)
-		goto err_free;
+		goto err_free2;
 
 	for (area = 0; area < nr_vms; area++) {
 		vas[area] = kzalloc(sizeof(struct vmap_area), GFP_KERNEL);
@@ -2476,11 +2476,10 @@
 
 err_free:
 	for (area = 0; area < nr_vms; area++) {
-		if (vas)
-			kfree(vas[area]);
-		if (vms)
-			kfree(vms[area]);
+		kfree(vas[area]);
+		kfree(vms[area]);
 	}
+err_free2:
 	kfree(vas);
 	kfree(vms);
 	return NULL;

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 26f4a8a..2880396 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c

@@ -103,8 +103,11 @@
 	 */
 	reclaim_mode_t reclaim_mode;
 
-	/* Which cgroup do we reclaim from */
-	struct mem_cgroup *mem_cgroup;
+	/*
+	 * The memory cgroup that hit its limit and as a result is the
+	 * primary target of this reclaim invocation.
+	 */
+	struct mem_cgroup *target_mem_cgroup;
 
 	/*
 	 * Nodemask of nodes allowed by the caller. If NULL, all nodes
@@ -113,6 +116,11 @@
 	nodemask_t	*nodemask;
 };
 
+struct mem_cgroup_zone {
+	struct mem_cgroup *mem_cgroup;
+	struct zone *zone;
+};
+
 #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
 
 #ifdef ARCH_HAS_PREFETCH
@@ -153,28 +161,45 @@
 static DECLARE_RWSEM(shrinker_rwsem);
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
-#define scanning_global_lru(sc)	(!(sc)->mem_cgroup)
-#else
-#define scanning_global_lru(sc)	(1)
-#endif
-
-static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone,
-						  struct scan_control *sc)
+static bool global_reclaim(struct scan_control *sc)
 {
-	if (!scanning_global_lru(sc))
-		return mem_cgroup_get_reclaim_stat(sc->mem_cgroup, zone);
-
-	return &zone->reclaim_stat;
+	return !sc->target_mem_cgroup;
 }
 
-static unsigned long zone_nr_lru_pages(struct zone *zone,
-				struct scan_control *sc, enum lru_list lru)
+static bool scanning_global_lru(struct mem_cgroup_zone *mz)
 {
-	if (!scanning_global_lru(sc))
-		return mem_cgroup_zone_nr_lru_pages(sc->mem_cgroup,
-				zone_to_nid(zone), zone_idx(zone), BIT(lru));
+	return !mz->mem_cgroup;
+}
+#else
+static bool global_reclaim(struct scan_control *sc)
+{
+	return true;
+}
 
-	return zone_page_state(zone, NR_LRU_BASE + lru);
+static bool scanning_global_lru(struct mem_cgroup_zone *mz)
+{
+	return true;
+}
+#endif
+
+static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz)
+{
+	if (!scanning_global_lru(mz))
+		return mem_cgroup_get_reclaim_stat(mz->mem_cgroup, mz->zone);
+
+	return &mz->zone->reclaim_stat;
+}
+
+static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz,
+				       enum lru_list lru)
+{
+	if (!scanning_global_lru(mz))
+		return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup,
+						    zone_to_nid(mz->zone),
+						    zone_idx(mz->zone),
+						    BIT(lru));
+
+	return zone_page_state(mz->zone, NR_LRU_BASE + lru);
 }
 
 
@@ -677,12 +702,13 @@
 };
 
 static enum page_references page_check_references(struct page *page,
+						  struct mem_cgroup_zone *mz,
 						  struct scan_control *sc)
 {
 	int referenced_ptes, referenced_page;
 	unsigned long vm_flags;
 
-	referenced_ptes = page_referenced(page, 1, sc->mem_cgroup, &vm_flags);
+	referenced_ptes = page_referenced(page, 1, mz->mem_cgroup, &vm_flags);
 	referenced_page = TestClearPageReferenced(page);
 
 	/* Lumpy reclaim - ignore references */
@@ -738,7 +764,7 @@
  * shrink_page_list() returns the number of reclaimed pages
  */
 static unsigned long shrink_page_list(struct list_head *page_list,
-				      struct zone *zone,
+				      struct mem_cgroup_zone *mz,
 				      struct scan_control *sc,
 				      int priority,
 				      unsigned long *ret_nr_dirty,
@@ -769,7 +795,7 @@
 			goto keep;
 
 		VM_BUG_ON(PageActive(page));
-		VM_BUG_ON(page_zone(page) != zone);
+		VM_BUG_ON(page_zone(page) != mz->zone);
 
 		sc->nr_scanned++;
 
@@ -803,7 +829,7 @@
 			}
 		}
 
-		references = page_check_references(page, sc);
+		references = page_check_references(page, mz, sc);
 		switch (references) {
 		case PAGEREF_ACTIVATE:
 			goto activate_locked;
@@ -994,8 +1020,8 @@
 	 * back off and wait for congestion to clear because further reclaim
 	 * will encounter the same problem
 	 */
-	if (nr_dirty && nr_dirty == nr_congested && scanning_global_lru(sc))
-		zone_set_flag(zone, ZONE_CONGESTED);
+	if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc))
+		zone_set_flag(mz->zone, ZONE_CONGESTED);
 
 	free_hot_cold_page_list(&free_pages, 1);
 
@@ -1049,8 +1075,39 @@
 
 	ret = -EBUSY;
 
-	if ((mode & ISOLATE_CLEAN) && (PageDirty(page) || PageWriteback(page)))
-		return ret;
+	/*
+	 * To minimise LRU disruption, the caller can indicate that it only
+	 * wants to isolate pages it will be able to operate on without
+	 * blocking - clean pages for the most part.
+	 *
+	 * ISOLATE_CLEAN means that only clean pages should be isolated. This
+	 * is used by reclaim when it is cannot write to backing storage
+	 *
+	 * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
+	 * that it is possible to migrate without blocking
+	 */
+	if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) {
+		/* All the caller can do on PageWriteback is block */
+		if (PageWriteback(page))
+			return ret;
+
+		if (PageDirty(page)) {
+			struct address_space *mapping;
+
+			/* ISOLATE_CLEAN means only clean pages */
+			if (mode & ISOLATE_CLEAN)
+				return ret;
+
+			/*
+			 * Only pages without mappings or that have a
+			 * ->migratepage callback are possible to migrate
+			 * without blocking
+			 */
+			mapping = page_mapping(page);
+			if (mapping && !mapping->a_ops->migratepage)
+				return ret;
+		}
+	}
 
 	if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
 		return ret;
@@ -1079,25 +1136,36 @@
  * Appropriate locks must be held before calling this function.
  *
  * @nr_to_scan:	The number of pages to look through on the list.
- * @src:	The LRU list to pull pages off.
+ * @mz:		The mem_cgroup_zone to pull pages from.
  * @dst:	The temp list to put pages on to.
- * @scanned:	The number of pages that were scanned.
+ * @nr_scanned:	The number of pages that were scanned.
  * @order:	The caller's attempted allocation order
  * @mode:	One of the LRU isolation modes
+ * @active:	True [1] if isolating active pages
  * @file:	True [1] if isolating file [!anon] pages
  *
  * returns how many pages were moved onto *@dst.
  */
 static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
-		struct list_head *src, struct list_head *dst,
-		unsigned long *scanned, int order, isolate_mode_t mode,
-		int file)
+		struct mem_cgroup_zone *mz, struct list_head *dst,
+		unsigned long *nr_scanned, int order, isolate_mode_t mode,
+		int active, int file)
 {
+	struct lruvec *lruvec;
+	struct list_head *src;
 	unsigned long nr_taken = 0;
 	unsigned long nr_lumpy_taken = 0;
 	unsigned long nr_lumpy_dirty = 0;
 	unsigned long nr_lumpy_failed = 0;
 	unsigned long scan;
+	int lru = LRU_BASE;
+
+	lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup);
+	if (active)
+		lru += LRU_ACTIVE;
+	if (file)
+		lru += LRU_FILE;
+	src = &lruvec->lists[lru];
 
 	for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
 		struct page *page;
@@ -1113,15 +1181,14 @@
 
 		switch (__isolate_lru_page(page, mode, file)) {
 		case 0:
+			mem_cgroup_lru_del(page);
 			list_move(&page->lru, dst);
-			mem_cgroup_del_lru(page);
 			nr_taken += hpage_nr_pages(page);
 			break;
 
 		case -EBUSY:
 			/* else it is being freed elsewhere */
 			list_move(&page->lru, src);
-			mem_cgroup_rotate_lru_list(page, page_lru(page));
 			continue;
 
 		default:
@@ -1171,13 +1238,17 @@
 				break;
 
 			if (__isolate_lru_page(cursor_page, mode, file) == 0) {
+				unsigned int isolated_pages;
+
+				mem_cgroup_lru_del(cursor_page);
 				list_move(&cursor_page->lru, dst);
-				mem_cgroup_del_lru(cursor_page);
-				nr_taken += hpage_nr_pages(cursor_page);
-				nr_lumpy_taken++;
+				isolated_pages = hpage_nr_pages(cursor_page);
+				nr_taken += isolated_pages;
+				nr_lumpy_taken += isolated_pages;
 				if (PageDirty(cursor_page))
-					nr_lumpy_dirty++;
+					nr_lumpy_dirty += isolated_pages;
 				scan++;
+				pfn += isolated_pages - 1;
 			} else {
 				/*
 				 * Check if the page is freed already.
@@ -1203,57 +1274,16 @@
 			nr_lumpy_failed++;
 	}
 
-	*scanned = scan;
+	*nr_scanned = scan;
 
 	trace_mm_vmscan_lru_isolate(order,
 			nr_to_scan, scan,
 			nr_taken,
 			nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed,
-			mode);
+			mode, file);
 	return nr_taken;
 }
 
-static unsigned long isolate_pages_global(unsigned long nr,
-					struct list_head *dst,
-					unsigned long *scanned, int order,
-					isolate_mode_t mode,
-					struct zone *z,	int active, int file)
-{
-	int lru = LRU_BASE;
-	if (active)
-		lru += LRU_ACTIVE;
-	if (file)
-		lru += LRU_FILE;
-	return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order,
-								mode, file);
-}
-
-/*
- * clear_active_flags() is a helper for shrink_active_list(), clearing
- * any active bits from the pages in the list.
- */
-static unsigned long clear_active_flags(struct list_head *page_list,
-					unsigned int *count)
-{
-	int nr_active = 0;
-	int lru;
-	struct page *page;
-
-	list_for_each_entry(page, page_list, lru) {
-		int numpages = hpage_nr_pages(page);
-		lru = page_lru_base_type(page);
-		if (PageActive(page)) {
-			lru += LRU_ACTIVE;
-			ClearPageActive(page);
-			nr_active += numpages;
-		}
-		if (count)
-			count[lru] += numpages;
-	}
-
-	return nr_active;
-}
-
 /**
  * isolate_lru_page - tries to isolate a page from its LRU list
  * @page: page to isolate from its LRU list
@@ -1313,7 +1343,7 @@
 	if (current_is_kswapd())
 		return 0;
 
-	if (!scanning_global_lru(sc))
+	if (!global_reclaim(sc))
 		return 0;
 
 	if (file) {
@@ -1327,27 +1357,21 @@
 	return isolated > inactive;
 }
 
-/*
- * TODO: Try merging with migrations version of putback_lru_pages
- */
 static noinline_for_stack void
-putback_lru_pages(struct zone *zone, struct scan_control *sc,
-				unsigned long nr_anon, unsigned long nr_file,
-				struct list_head *page_list)
+putback_inactive_pages(struct mem_cgroup_zone *mz,
+		       struct list_head *page_list)
 {
-	struct page *page;
-	struct pagevec pvec;
-	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
-
-	pagevec_init(&pvec, 1);
+	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
+	struct zone *zone = mz->zone;
+	LIST_HEAD(pages_to_free);
 
 	/*
 	 * Put back any unfreeable pages.
 	 */
-	spin_lock(&zone->lru_lock);
 	while (!list_empty(page_list)) {
+		struct page *page = lru_to_page(page_list);
 		int lru;
-		page = lru_to_page(page_list);
+
 		VM_BUG_ON(PageLRU(page));
 		list_del(&page->lru);
 		if (unlikely(!page_evictable(page, NULL))) {
@@ -1364,30 +1388,53 @@
 			int numpages = hpage_nr_pages(page);
 			reclaim_stat->recent_rotated[file] += numpages;
 		}
-		if (!pagevec_add(&pvec, page)) {
-			spin_unlock_irq(&zone->lru_lock);
-			__pagevec_release(&pvec);
-			spin_lock_irq(&zone->lru_lock);
+		if (put_page_testzero(page)) {
+			__ClearPageLRU(page);
+			__ClearPageActive(page);
+			del_page_from_lru_list(zone, page, lru);
+
+			if (unlikely(PageCompound(page))) {
+				spin_unlock_irq(&zone->lru_lock);
+				(*get_compound_page_dtor(page))(page);
+				spin_lock_irq(&zone->lru_lock);
+			} else
+				list_add(&page->lru, &pages_to_free);
 		}
 	}
-	__mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
-	__mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
 
-	spin_unlock_irq(&zone->lru_lock);
-	pagevec_release(&pvec);
+	/*
+	 * To save our caller's stack, now use input list for pages to free.
+	 */
+	list_splice(&pages_to_free, page_list);
 }
 
-static noinline_for_stack void update_isolated_counts(struct zone *zone,
-					struct scan_control *sc,
-					unsigned long *nr_anon,
-					unsigned long *nr_file,
-					struct list_head *isolated_list)
+static noinline_for_stack void
+update_isolated_counts(struct mem_cgroup_zone *mz,
+		       struct list_head *page_list,
+		       unsigned long *nr_anon,
+		       unsigned long *nr_file)
 {
-	unsigned long nr_active;
+	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
+	struct zone *zone = mz->zone;
 	unsigned int count[NR_LRU_LISTS] = { 0, };
-	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
+	unsigned long nr_active = 0;
+	struct page *page;
+	int lru;
 
-	nr_active = clear_active_flags(isolated_list, count);
+	/*
+	 * Count pages and clear active flags
+	 */
+	list_for_each_entry(page, page_list, lru) {
+		int numpages = hpage_nr_pages(page);
+		lru = page_lru_base_type(page);
+		if (PageActive(page)) {
+			lru += LRU_ACTIVE;
+			ClearPageActive(page);
+			nr_active += numpages;
+		}
+		count[lru] += numpages;
+	}
+
 	__count_vm_events(PGDEACTIVATE, nr_active);
 
 	__mod_zone_page_state(zone, NR_ACTIVE_FILE,
@@ -1401,8 +1448,6 @@
 
 	*nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
 	*nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
-	__mod_zone_page_state(zone, NR_ISOLATED_ANON, *nr_anon);
-	__mod_zone_page_state(zone, NR_ISOLATED_FILE, *nr_file);
 
 	reclaim_stat->recent_scanned[0] += *nr_anon;
 	reclaim_stat->recent_scanned[1] += *nr_file;
@@ -1454,8 +1499,8 @@
  * of reclaimed pages
  */
 static noinline_for_stack unsigned long
-shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
-			struct scan_control *sc, int priority, int file)
+shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz,
+		     struct scan_control *sc, int priority, int file)
 {
 	LIST_HEAD(page_list);
 	unsigned long nr_scanned;
@@ -1466,6 +1511,7 @@
 	unsigned long nr_dirty = 0;
 	unsigned long nr_writeback = 0;
 	isolate_mode_t reclaim_mode = ISOLATE_INACTIVE;
+	struct zone *zone = mz->zone;
 
 	while (unlikely(too_many_isolated(zone, file, sc))) {
 		congestion_wait(BLK_RW_ASYNC, HZ/10);
@@ -1488,9 +1534,10 @@
 
 	spin_lock_irq(&zone->lru_lock);
 
-	if (scanning_global_lru(sc)) {
-		nr_taken = isolate_pages_global(nr_to_scan, &page_list,
-			&nr_scanned, sc->order, reclaim_mode, zone, 0, file);
+	nr_taken = isolate_lru_pages(nr_to_scan, mz, &page_list,
+				     &nr_scanned, sc->order,
+				     reclaim_mode, 0, file);
+	if (global_reclaim(sc)) {
 		zone->pages_scanned += nr_scanned;
 		if (current_is_kswapd())
 			__count_zone_vm_events(PGSCAN_KSWAPD, zone,
@@ -1498,14 +1545,6 @@
 		else
 			__count_zone_vm_events(PGSCAN_DIRECT, zone,
 					       nr_scanned);
-	} else {
-		nr_taken = mem_cgroup_isolate_pages(nr_to_scan, &page_list,
-			&nr_scanned, sc->order, reclaim_mode, zone,
-			sc->mem_cgroup, 0, file);
-		/*
-		 * mem_cgroup_isolate_pages() keeps track of
-		 * scanned pages on its own.
-		 */
 	}
 
 	if (nr_taken == 0) {
@@ -1513,26 +1552,37 @@
 		return 0;
 	}
 
-	update_isolated_counts(zone, sc, &nr_anon, &nr_file, &page_list);
+	update_isolated_counts(mz, &page_list, &nr_anon, &nr_file);
+
+	__mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon);
+	__mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file);
 
 	spin_unlock_irq(&zone->lru_lock);
 
-	nr_reclaimed = shrink_page_list(&page_list, zone, sc, priority,
+	nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority,
 						&nr_dirty, &nr_writeback);
 
 	/* Check if we should syncronously wait for writeback */
 	if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
 		set_reclaim_mode(priority, sc, true);
-		nr_reclaimed += shrink_page_list(&page_list, zone, sc,
+		nr_reclaimed += shrink_page_list(&page_list, mz, sc,
 					priority, &nr_dirty, &nr_writeback);
 	}
 
-	local_irq_disable();
+	spin_lock_irq(&zone->lru_lock);
+
 	if (current_is_kswapd())
 		__count_vm_events(KSWAPD_STEAL, nr_reclaimed);
 	__count_zone_vm_events(PGSTEAL, zone, nr_reclaimed);
 
-	putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list);
+	putback_inactive_pages(mz, &page_list);
+
+	__mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
+	__mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
+
+	spin_unlock_irq(&zone->lru_lock);
+
+	free_hot_cold_page_list(&page_list, 1);
 
 	/*
 	 * If reclaim is isolating dirty pages under writeback, it implies
@@ -1588,30 +1638,47 @@
 
 static void move_active_pages_to_lru(struct zone *zone,
 				     struct list_head *list,
+				     struct list_head *pages_to_free,
 				     enum lru_list lru)
 {
 	unsigned long pgmoved = 0;
-	struct pagevec pvec;
 	struct page *page;
 
-	pagevec_init(&pvec, 1);
+	if (buffer_heads_over_limit) {
+		spin_unlock_irq(&zone->lru_lock);
+		list_for_each_entry(page, list, lru) {
+			if (page_has_private(page) && trylock_page(page)) {
+				if (page_has_private(page))
+					try_to_release_page(page, 0);
+				unlock_page(page);
+			}
+		}
+		spin_lock_irq(&zone->lru_lock);
+	}
 
 	while (!list_empty(list)) {
+		struct lruvec *lruvec;
+
 		page = lru_to_page(list);
 
 		VM_BUG_ON(PageLRU(page));
 		SetPageLRU(page);
 
-		list_move(&page->lru, &zone->lru[lru].list);
-		mem_cgroup_add_lru_list(page, lru);
+		lruvec = mem_cgroup_lru_add_list(zone, page, lru);
+		list_move(&page->lru, &lruvec->lists[lru]);
 		pgmoved += hpage_nr_pages(page);
 
-		if (!pagevec_add(&pvec, page) || list_empty(list)) {
-			spin_unlock_irq(&zone->lru_lock);
-			if (buffer_heads_over_limit)
-				pagevec_strip(&pvec);
-			__pagevec_release(&pvec);
-			spin_lock_irq(&zone->lru_lock);
+		if (put_page_testzero(page)) {
+			__ClearPageLRU(page);
+			__ClearPageActive(page);
+			del_page_from_lru_list(zone, page, lru);
+
+			if (unlikely(PageCompound(page))) {
+				spin_unlock_irq(&zone->lru_lock);
+				(*get_compound_page_dtor(page))(page);
+				spin_lock_irq(&zone->lru_lock);
+			} else
+				list_add(&page->lru, pages_to_free);
 		}
 	}
 	__mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
@@ -1619,19 +1686,22 @@
 		__count_vm_events(PGDEACTIVATE, pgmoved);
 }
 
-static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
-			struct scan_control *sc, int priority, int file)
+static void shrink_active_list(unsigned long nr_to_scan,
+			       struct mem_cgroup_zone *mz,
+			       struct scan_control *sc,
+			       int priority, int file)
 {
 	unsigned long nr_taken;
-	unsigned long pgscanned;
+	unsigned long nr_scanned;
 	unsigned long vm_flags;
 	LIST_HEAD(l_hold);	/* The pages which were snipped off */
 	LIST_HEAD(l_active);
 	LIST_HEAD(l_inactive);
 	struct page *page;
-	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
+	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
 	unsigned long nr_rotated = 0;
 	isolate_mode_t reclaim_mode = ISOLATE_ACTIVE;
+	struct zone *zone = mz->zone;
 
 	lru_add_drain();
 
@@ -1641,26 +1711,16 @@
 		reclaim_mode |= ISOLATE_CLEAN;
 
 	spin_lock_irq(&zone->lru_lock);
-	if (scanning_global_lru(sc)) {
-		nr_taken = isolate_pages_global(nr_pages, &l_hold,
-						&pgscanned, sc->order,
-						reclaim_mode, zone,
-						1, file);
-		zone->pages_scanned += pgscanned;
-	} else {
-		nr_taken = mem_cgroup_isolate_pages(nr_pages, &l_hold,
-						&pgscanned, sc->order,
-						reclaim_mode, zone,
-						sc->mem_cgroup, 1, file);
-		/*
-		 * mem_cgroup_isolate_pages() keeps track of
-		 * scanned pages on its own.
-		 */
-	}
+
+	nr_taken = isolate_lru_pages(nr_to_scan, mz, &l_hold,
+				     &nr_scanned, sc->order,
+				     reclaim_mode, 1, file);
+	if (global_reclaim(sc))
+		zone->pages_scanned += nr_scanned;
 
 	reclaim_stat->recent_scanned[file] += nr_taken;
 
-	__count_zone_vm_events(PGREFILL, zone, pgscanned);
+	__count_zone_vm_events(PGREFILL, zone, nr_scanned);
 	if (file)
 		__mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
 	else
@@ -1678,7 +1738,7 @@
 			continue;
 		}
 
-		if (page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) {
+		if (page_referenced(page, 0, mz->mem_cgroup, &vm_flags)) {
 			nr_rotated += hpage_nr_pages(page);
 			/*
 			 * Identify referenced, file-backed active pages and
@@ -1711,12 +1771,14 @@
 	 */
 	reclaim_stat->recent_rotated[file] += nr_rotated;
 
-	move_active_pages_to_lru(zone, &l_active,
+	move_active_pages_to_lru(zone, &l_active, &l_hold,
 						LRU_ACTIVE + file * LRU_FILE);
-	move_active_pages_to_lru(zone, &l_inactive,
+	move_active_pages_to_lru(zone, &l_inactive, &l_hold,
 						LRU_BASE   + file * LRU_FILE);
 	__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
 	spin_unlock_irq(&zone->lru_lock);
+
+	free_hot_cold_page_list(&l_hold, 1);
 }
 
 #ifdef CONFIG_SWAP
@@ -1741,10 +1803,8 @@
  * Returns true if the zone does not have enough inactive anon pages,
  * meaning some active anon pages need to be deactivated.
  */
-static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc)
+static int inactive_anon_is_low(struct mem_cgroup_zone *mz)
 {
-	int low;
-
 	/*
 	 * If we don't have swap space, anonymous page deactivation
 	 * is pointless.
@@ -1752,15 +1812,14 @@
 	if (!total_swap_pages)
 		return 0;
 
-	if (scanning_global_lru(sc))
-		low = inactive_anon_is_low_global(zone);
-	else
-		low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup, zone);
-	return low;
+	if (!scanning_global_lru(mz))
+		return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup,
+						       mz->zone);
+
+	return inactive_anon_is_low_global(mz->zone);
 }
 #else
-static inline int inactive_anon_is_low(struct zone *zone,
-					struct scan_control *sc)
+static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz)
 {
 	return 0;
 }
@@ -1778,8 +1837,7 @@
 
 /**
  * inactive_file_is_low - check if file pages need to be deactivated
- * @zone: zone to check
- * @sc:   scan control of this context
+ * @mz: memory cgroup and zone to check
  *
  * When the system is doing streaming IO, memory pressure here
  * ensures that active file pages get deactivated, until more
@@ -1791,45 +1849,44 @@
  * This uses a different ratio than the anonymous pages, because
  * the page cache uses a use-once replacement algorithm.
  */
-static int inactive_file_is_low(struct zone *zone, struct scan_control *sc)
+static int inactive_file_is_low(struct mem_cgroup_zone *mz)
 {
-	int low;
+	if (!scanning_global_lru(mz))
+		return mem_cgroup_inactive_file_is_low(mz->mem_cgroup,
+						       mz->zone);
 
-	if (scanning_global_lru(sc))
-		low = inactive_file_is_low_global(zone);
-	else
-		low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup, zone);
-	return low;
+	return inactive_file_is_low_global(mz->zone);
 }
 
-static int inactive_list_is_low(struct zone *zone, struct scan_control *sc,
-				int file)
+static int inactive_list_is_low(struct mem_cgroup_zone *mz, int file)
 {
 	if (file)
-		return inactive_file_is_low(zone, sc);
+		return inactive_file_is_low(mz);
 	else
-		return inactive_anon_is_low(zone, sc);
+		return inactive_anon_is_low(mz);
 }
 
 static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
-	struct zone *zone, struct scan_control *sc, int priority)
+				 struct mem_cgroup_zone *mz,
+				 struct scan_control *sc, int priority)
 {
 	int file = is_file_lru(lru);
 
 	if (is_active_lru(lru)) {
-		if (inactive_list_is_low(zone, sc, file))
-		    shrink_active_list(nr_to_scan, zone, sc, priority, file);
+		if (inactive_list_is_low(mz, file))
+			shrink_active_list(nr_to_scan, mz, sc, priority, file);
 		return 0;
 	}
 
-	return shrink_inactive_list(nr_to_scan, zone, sc, priority, file);
+	return shrink_inactive_list(nr_to_scan, mz, sc, priority, file);
 }
 
-static int vmscan_swappiness(struct scan_control *sc)
+static int vmscan_swappiness(struct mem_cgroup_zone *mz,
+			     struct scan_control *sc)
 {
-	if (scanning_global_lru(sc))
+	if (global_reclaim(sc))
 		return vm_swappiness;
-	return mem_cgroup_swappiness(sc->mem_cgroup);
+	return mem_cgroup_swappiness(mz->mem_cgroup);
 }
 
 /*
@@ -1840,15 +1897,15 @@
  *
  * nr[0] = anon pages to scan; nr[1] = file pages to scan
  */
-static void get_scan_count(struct zone *zone, struct scan_control *sc,
-					unsigned long *nr, int priority)
+static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc,
+			   unsigned long *nr, int priority)
 {
 	unsigned long anon, file, free;
 	unsigned long anon_prio, file_prio;
 	unsigned long ap, fp;
-	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
+	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz);
 	u64 fraction[2], denominator;
-	enum lru_list l;
+	enum lru_list lru;
 	int noswap = 0;
 	bool force_scan = false;
 
@@ -1862,9 +1919,9 @@
 	 * latencies, so it's better to scan a minimum amount there as
 	 * well.
 	 */
-	if (scanning_global_lru(sc) && current_is_kswapd())
+	if (current_is_kswapd() && mz->zone->all_unreclaimable)
 		force_scan = true;
-	if (!scanning_global_lru(sc))
+	if (!global_reclaim(sc))
 		force_scan = true;
 
 	/* If we have no swap space, do not bother scanning anon pages. */
@@ -1876,16 +1933,16 @@
 		goto out;
 	}
 
-	anon  = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
-		zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
-	file  = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
-		zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
+	anon  = zone_nr_lru_pages(mz, LRU_ACTIVE_ANON) +
+		zone_nr_lru_pages(mz, LRU_INACTIVE_ANON);
+	file  = zone_nr_lru_pages(mz, LRU_ACTIVE_FILE) +
+		zone_nr_lru_pages(mz, LRU_INACTIVE_FILE);
 
-	if (scanning_global_lru(sc)) {
-		free  = zone_page_state(zone, NR_FREE_PAGES);
+	if (global_reclaim(sc)) {
+		free  = zone_page_state(mz->zone, NR_FREE_PAGES);
 		/* If we have very few page cache pages,
 		   force-scan anon pages. */
-		if (unlikely(file + free <= high_wmark_pages(zone))) {
+		if (unlikely(file + free <= high_wmark_pages(mz->zone))) {
 			fraction[0] = 1;
 			fraction[1] = 0;
 			denominator = 1;
@@ -1897,8 +1954,8 @@
 	 * With swappiness at 100, anonymous and file have the same priority.
 	 * This scanning priority is essentially the inverse of IO cost.
 	 */
-	anon_prio = vmscan_swappiness(sc);
-	file_prio = 200 - vmscan_swappiness(sc);
+	anon_prio = vmscan_swappiness(mz, sc);
+	file_prio = 200 - vmscan_swappiness(mz, sc);
 
 	/*
 	 * OK, so we have swap space and a fair amount of page cache
@@ -1911,7 +1968,7 @@
 	 *
 	 * anon in [0], file in [1]
 	 */
-	spin_lock_irq(&zone->lru_lock);
+	spin_lock_irq(&mz->zone->lru_lock);
 	if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
 		reclaim_stat->recent_scanned[0] /= 2;
 		reclaim_stat->recent_rotated[0] /= 2;
@@ -1932,24 +1989,24 @@
 
 	fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1);
 	fp /= reclaim_stat->recent_rotated[1] + 1;
-	spin_unlock_irq(&zone->lru_lock);
+	spin_unlock_irq(&mz->zone->lru_lock);
 
 	fraction[0] = ap;
 	fraction[1] = fp;
 	denominator = ap + fp + 1;
 out:
-	for_each_evictable_lru(l) {
-		int file = is_file_lru(l);
+	for_each_evictable_lru(lru) {
+		int file = is_file_lru(lru);
 		unsigned long scan;
 
-		scan = zone_nr_lru_pages(zone, sc, l);
+		scan = zone_nr_lru_pages(mz, lru);
 		if (priority || noswap) {
 			scan >>= priority;
 			if (!scan && force_scan)
 				scan = SWAP_CLUSTER_MAX;
 			scan = div64_u64(scan * fraction[file], denominator);
 		}
-		nr[l] = scan;
+		nr[lru] = scan;
 	}
 }
 
@@ -1960,7 +2017,7 @@
  * back to the allocator and call try_to_compact_zone(), we ensure that
  * there are enough free pages for it to be likely successful
  */
-static inline bool should_continue_reclaim(struct zone *zone,
+static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz,
 					unsigned long nr_reclaimed,
 					unsigned long nr_scanned,
 					struct scan_control *sc)
@@ -2000,15 +2057,15 @@
 	 * inactive lists are large enough, continue reclaiming
 	 */
 	pages_for_compaction = (2UL << sc->order);
-	inactive_lru_pages = zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE);
+	inactive_lru_pages = zone_nr_lru_pages(mz, LRU_INACTIVE_FILE);
 	if (nr_swap_pages > 0)
-		inactive_lru_pages += zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
+		inactive_lru_pages += zone_nr_lru_pages(mz, LRU_INACTIVE_ANON);
 	if (sc->nr_reclaimed < pages_for_compaction &&
 			inactive_lru_pages > pages_for_compaction)
 		return true;
 
 	/* If compaction would go ahead or the allocation would succeed, stop */
-	switch (compaction_suitable(zone, sc->order)) {
+	switch (compaction_suitable(mz->zone, sc->order)) {
 	case COMPACT_PARTIAL:
 	case COMPACT_CONTINUE:
 		return false;
@@ -2020,12 +2077,12 @@
 /*
  * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
  */
-static void shrink_zone(int priority, struct zone *zone,
-				struct scan_control *sc)
+static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz,
+				   struct scan_control *sc)
 {
 	unsigned long nr[NR_LRU_LISTS];
 	unsigned long nr_to_scan;
-	enum lru_list l;
+	enum lru_list lru;
 	unsigned long nr_reclaimed, nr_scanned;
 	unsigned long nr_to_reclaim = sc->nr_to_reclaim;
 	struct blk_plug plug;
@@ -2033,19 +2090,19 @@
 restart:
 	nr_reclaimed = 0;
 	nr_scanned = sc->nr_scanned;
-	get_scan_count(zone, sc, nr, priority);
+	get_scan_count(mz, sc, nr, priority);
 
 	blk_start_plug(&plug);
 	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
 					nr[LRU_INACTIVE_FILE]) {
-		for_each_evictable_lru(l) {
-			if (nr[l]) {
+		for_each_evictable_lru(lru) {
+			if (nr[lru]) {
 				nr_to_scan = min_t(unsigned long,
-						   nr[l], SWAP_CLUSTER_MAX);
-				nr[l] -= nr_to_scan;
+						   nr[lru], SWAP_CLUSTER_MAX);
+				nr[lru] -= nr_to_scan;
 
-				nr_reclaimed += shrink_list(l, nr_to_scan,
-							    zone, sc, priority);
+				nr_reclaimed += shrink_list(lru, nr_to_scan,
+							    mz, sc, priority);
 			}
 		}
 		/*
@@ -2066,17 +2123,89 @@
 	 * Even if we did not try to evict anon pages at all, we want to
 	 * rebalance the anon lru active/inactive ratio.
 	 */
-	if (inactive_anon_is_low(zone, sc))
-		shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
+	if (inactive_anon_is_low(mz))
+		shrink_active_list(SWAP_CLUSTER_MAX, mz, sc, priority, 0);
 
 	/* reclaim/compaction might need reclaim to continue */
-	if (should_continue_reclaim(zone, nr_reclaimed,
+	if (should_continue_reclaim(mz, nr_reclaimed,
 					sc->nr_scanned - nr_scanned, sc))
 		goto restart;
 
 	throttle_vm_writeout(sc->gfp_mask);
 }
 
+static void shrink_zone(int priority, struct zone *zone,
+			struct scan_control *sc)
+{
+	struct mem_cgroup *root = sc->target_mem_cgroup;
+	struct mem_cgroup_reclaim_cookie reclaim = {
+		.zone = zone,
+		.priority = priority,
+	};
+	struct mem_cgroup *memcg;
+
+	memcg = mem_cgroup_iter(root, NULL, &reclaim);
+	do {
+		struct mem_cgroup_zone mz = {
+			.mem_cgroup = memcg,
+			.zone = zone,
+		};
+
+		shrink_mem_cgroup_zone(priority, &mz, sc);
+		/*
+		 * Limit reclaim has historically picked one memcg and
+		 * scanned it with decreasing priority levels until
+		 * nr_to_reclaim had been reclaimed.  This priority
+		 * cycle is thus over after a single memcg.
+		 *
+		 * Direct reclaim and kswapd, on the other hand, have
+		 * to scan all memory cgroups to fulfill the overall
+		 * scan target for the zone.
+		 */
+		if (!global_reclaim(sc)) {
+			mem_cgroup_iter_break(root, memcg);
+			break;
+		}
+		memcg = mem_cgroup_iter(root, memcg, &reclaim);
+	} while (memcg);
+}
+
+/* Returns true if compaction should go ahead for a high-order request */
+static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
+{
+	unsigned long balance_gap, watermark;
+	bool watermark_ok;
+
+	/* Do not consider compaction for orders reclaim is meant to satisfy */
+	if (sc->order <= PAGE_ALLOC_COSTLY_ORDER)
+		return false;
+
+	/*
+	 * Compaction takes time to run and there are potentially other
+	 * callers using the pages just freed. Continue reclaiming until
+	 * there is a buffer of free pages available to give compaction
+	 * a reasonable chance of completing and allocating the page
+	 */
+	balance_gap = min(low_wmark_pages(zone),
+		(zone->present_pages + KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
+			KSWAPD_ZONE_BALANCE_GAP_RATIO);
+	watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order);
+	watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
+
+	/*
+	 * If compaction is deferred, reclaim up to a point where
+	 * compaction will have a chance of success when re-enabled
+	 */
+	if (compaction_deferred(zone))
+		return watermark_ok;
+
+	/* If compaction is not ready to start, keep reclaiming */
+	if (!compaction_suitable(zone, sc->order))
+		return false;
+
+	return watermark_ok;
+}
+
 /*
  * This is the direct reclaim path, for page-allocating processes.  We only
  * try to reclaim pages from zones which will satisfy the caller's allocation
@@ -2094,8 +2223,9 @@
  * scan then give up on it.
  *
  * This function returns true if a zone is being reclaimed for a costly
- * high-order allocation and compaction is either ready to begin or deferred.
- * This indicates to the caller that it should retry the allocation or fail.
+ * high-order allocation and compaction is ready to begin. This indicates to
+ * the caller that it should consider retrying the allocation instead of
+ * further reclaim.
  */
 static bool shrink_zones(int priority, struct zonelist *zonelist,
 					struct scan_control *sc)
@@ -2104,7 +2234,7 @@
 	struct zone *zone;
 	unsigned long nr_soft_reclaimed;
 	unsigned long nr_soft_scanned;
-	bool should_abort_reclaim = false;
+	bool aborted_reclaim = false;
 
 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
 					gfp_zone(sc->gfp_mask), sc->nodemask) {
@@ -2114,7 +2244,7 @@
 		 * Take care memory controller reclaiming has small influence
 		 * to global LRU.
 		 */
-		if (scanning_global_lru(sc)) {
+		if (global_reclaim(sc)) {
 			if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
 				continue;
 			if (zone->all_unreclaimable && priority != DEF_PRIORITY)
@@ -2129,10 +2259,8 @@
 				 * noticable problem, like transparent huge page
 				 * allocations.
 				 */
-				if (sc->order > PAGE_ALLOC_COSTLY_ORDER &&
-					(compaction_suitable(zone, sc->order) ||
-					 compaction_deferred(zone))) {
-					should_abort_reclaim = true;
+				if (compaction_ready(zone, sc)) {
+					aborted_reclaim = true;
 					continue;
 				}
 			}
@@ -2154,7 +2282,7 @@
 		shrink_zone(priority, zone, sc);
 	}
 
-	return should_abort_reclaim;
+	return aborted_reclaim;
 }
 
 static bool zone_reclaimable(struct zone *zone)
@@ -2208,25 +2336,25 @@
 	struct zoneref *z;
 	struct zone *zone;
 	unsigned long writeback_threshold;
+	bool aborted_reclaim;
 
 	get_mems_allowed();
 	delayacct_freepages_start();
 
-	if (scanning_global_lru(sc))
+	if (global_reclaim(sc))
 		count_vm_event(ALLOCSTALL);
 
 	for (priority = DEF_PRIORITY; priority >= 0; priority--) {
 		sc->nr_scanned = 0;
 		if (!priority)
-			disable_swap_token(sc->mem_cgroup);
-		if (shrink_zones(priority, zonelist, sc))
-			break;
+			disable_swap_token(sc->target_mem_cgroup);
+		aborted_reclaim = shrink_zones(priority, zonelist, sc);
 
 		/*
 		 * Don't shrink slabs when reclaiming memory from
 		 * over limit cgroups
 		 */
-		if (scanning_global_lru(sc)) {
+		if (global_reclaim(sc)) {
 			unsigned long lru_pages = 0;
 			for_each_zone_zonelist(zone, z, zonelist,
 					gfp_zone(sc->gfp_mask)) {
@@ -2287,8 +2415,12 @@
 	if (oom_killer_disabled)
 		return 0;
 
+	/* Aborted reclaim to try compaction? don't OOM, then */
+	if (aborted_reclaim)
+		return 1;
+
 	/* top priority shrink_zones still had more to do? don't OOM, then */
-	if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc))
+	if (global_reclaim(sc) && !all_unreclaimable(zonelist, sc))
 		return 1;
 
 	return 0;
@@ -2305,7 +2437,7 @@
 		.may_unmap = 1,
 		.may_swap = 1,
 		.order = order,
-		.mem_cgroup = NULL,
+		.target_mem_cgroup = NULL,
 		.nodemask = nodemask,
 	};
 	struct shrink_control shrink = {
@@ -2325,7 +2457,7 @@
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 
-unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
+unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
 						gfp_t gfp_mask, bool noswap,
 						struct zone *zone,
 						unsigned long *nr_scanned)
@@ -2337,7 +2469,11 @@
 		.may_unmap = 1,
 		.may_swap = !noswap,
 		.order = 0,
-		.mem_cgroup = mem,
+		.target_mem_cgroup = memcg,
+	};
+	struct mem_cgroup_zone mz = {
+		.mem_cgroup = memcg,
+		.zone = zone,
 	};
 
 	sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
@@ -2354,7 +2490,7 @@
 	 * will pick up pages from other mem cgroup's as well. We hack
 	 * the priority and make it zero.
 	 */
-	shrink_zone(0, zone, &sc);
+	shrink_mem_cgroup_zone(0, &mz, &sc);
 
 	trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
 
@@ -2362,7 +2498,7 @@
 	return sc.nr_reclaimed;
 }
 
-unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
+unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 					   gfp_t gfp_mask,
 					   bool noswap)
 {
@@ -2375,7 +2511,7 @@
 		.may_swap = !noswap,
 		.nr_to_reclaim = SWAP_CLUSTER_MAX,
 		.order = 0,
-		.mem_cgroup = mem_cont,
+		.target_mem_cgroup = memcg,
 		.nodemask = NULL, /* we don't care the placement */
 		.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
 				(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
@@ -2389,7 +2525,7 @@
 	 * take care of from where we get pages. So the node where we start the
 	 * scan does not need to be the current node.
 	 */
-	nid = mem_cgroup_select_victim_node(mem_cont);
+	nid = mem_cgroup_select_victim_node(memcg);
 
 	zonelist = NODE_DATA(nid)->node_zonelists;
 
@@ -2405,6 +2541,29 @@
 }
 #endif
 
+static void age_active_anon(struct zone *zone, struct scan_control *sc,
+			    int priority)
+{
+	struct mem_cgroup *memcg;
+
+	if (!total_swap_pages)
+		return;
+
+	memcg = mem_cgroup_iter(NULL, NULL, NULL);
+	do {
+		struct mem_cgroup_zone mz = {
+			.mem_cgroup = memcg,
+			.zone = zone,
+		};
+
+		if (inactive_anon_is_low(&mz))
+			shrink_active_list(SWAP_CLUSTER_MAX, &mz,
+					   sc, priority, 0);
+
+		memcg = mem_cgroup_iter(NULL, memcg, NULL);
+	} while (memcg);
+}
+
 /*
  * pgdat_balanced is used when checking if a node is balanced for high-order
  * allocations. Only zones that meet watermarks and are in a zone allowed
@@ -2525,7 +2684,7 @@
 		 */
 		.nr_to_reclaim = ULONG_MAX,
 		.order = order,
-		.mem_cgroup = NULL,
+		.target_mem_cgroup = NULL,
 	};
 	struct shrink_control shrink = {
 		.gfp_mask = sc.gfp_mask,
@@ -2564,9 +2723,7 @@
 			 * Do some background aging of the anon list, to give
 			 * pages a chance to be referenced before reclaiming.
 			 */
-			if (inactive_anon_is_low(zone, &sc))
-				shrink_active_list(SWAP_CLUSTER_MAX, zone,
-							&sc, priority, 0);
+			age_active_anon(zone, &sc, priority);
 
 			if (!zone_watermark_ok_safe(zone, order,
 					high_wmark_pages(zone), 0, 0)) {
@@ -3355,16 +3512,18 @@
  */
 static void check_move_unevictable_page(struct page *page, struct zone *zone)
 {
-	VM_BUG_ON(PageActive(page));
+	struct lruvec *lruvec;
 
+	VM_BUG_ON(PageActive(page));
 retry:
 	ClearPageUnevictable(page);
 	if (page_evictable(page, NULL)) {
 		enum lru_list l = page_lru_base_type(page);
 
 		__dec_zone_state(zone, NR_UNEVICTABLE);
-		list_move(&page->lru, &zone->lru[l].list);
-		mem_cgroup_move_lists(page, LRU_UNEVICTABLE, l);
+		lruvec = mem_cgroup_lru_move_lists(zone, page,
+						   LRU_UNEVICTABLE, l);
+		list_move(&page->lru, &lruvec->lists[l]);
 		__inc_zone_state(zone, NR_INACTIVE_ANON + l);
 		__count_vm_event(UNEVICTABLE_PGRESCUED);
 	} else {
@@ -3372,8 +3531,9 @@
 		 * rotate unevictable list
 		 */
 		SetPageUnevictable(page);
-		list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list);
-		mem_cgroup_rotate_lru_list(page, LRU_UNEVICTABLE);
+		lruvec = mem_cgroup_lru_move_lists(zone, page, LRU_UNEVICTABLE,
+						   LRU_UNEVICTABLE);
+		list_move(&page->lru, &lruvec->lists[LRU_UNEVICTABLE]);
 		if (page_evictable(page, NULL))
 			goto retry;
 	}

diff --git a/mm/vmstat.c b/mm/vmstat.c
index 8fd603b..f600557 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c

@@ -295,7 +295,7 @@
 }
 EXPORT_SYMBOL(__dec_zone_page_state);
 
-#ifdef CONFIG_CMPXCHG_LOCAL
+#ifdef CONFIG_HAVE_CMPXCHG_LOCAL
 /*
  * If we have cmpxchg_local support then we do not need to incur the overhead
  * that comes with local_irq_save/restore if we use this_cpu_cmpxchg.

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 330421e..3d43206 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c

@@ -272,7 +272,8 @@
 	in = pack_sg_list(chan->sg, out,
 			  VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
 
-	err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
+	err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc,
+				GFP_ATOMIC);
 	if (err < 0) {
 		if (err == -ENOSPC) {
 			chan->ring_bufs_avail = 0;
@@ -414,7 +415,8 @@
 		in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
 				     in_pages, in_nr_pages, uidata, inlen);
 
-	err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
+	err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc,
+				GFP_ATOMIC);
 	if (err < 0) {
 		if (err == -ENOSPC) {
 			chan->ring_bufs_avail = 0;

diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index cdcfcab..ef92864 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c

@@ -156,17 +156,17 @@
 
 void bt_sock_link(struct bt_sock_list *l, struct sock *sk)
 {
-	write_lock_bh(&l->lock);
+	write_lock(&l->lock);
 	sk_add_node(sk, &l->head);
-	write_unlock_bh(&l->lock);
+	write_unlock(&l->lock);
 }
 EXPORT_SYMBOL(bt_sock_link);
 
 void bt_sock_unlink(struct bt_sock_list *l, struct sock *sk)
 {
-	write_lock_bh(&l->lock);
+	write_lock(&l->lock);
 	sk_del_node_init(sk);
-	write_unlock_bh(&l->lock);
+	write_unlock(&l->lock);
 }
 EXPORT_SYMBOL(bt_sock_unlink);
 

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 4221bd2..001307f 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c

@@ -711,7 +711,14 @@
 	if (rp->status)
 		return;
 
-	memcpy(hdev->extfeatures, rp->features, 8);
+	switch (rp->page) {
+	case 0:
+		memcpy(hdev->features, rp->features, 8);
+		break;
+	case 1:
+		memcpy(hdev->host_features, rp->features, 8);
+		break;
+	}
 
 	hci_req_complete(hdev, HCI_OP_READ_LOCAL_EXT_FEATURES, rp->status);
 }
@@ -1047,9 +1054,7 @@
 	case LE_SCANNING_DISABLED:
 		clear_bit(HCI_LE_SCAN, &hdev->dev_flags);
 
-		cancel_delayed_work_sync(&hdev->adv_work);
-		queue_delayed_work(hdev->workqueue, &hdev->adv_work,
-						 jiffies + ADV_CLEAR_TIMEOUT);
+		schedule_delayed_work(&hdev->adv_work, ADV_CLEAR_TIMEOUT);
 		break;
 
 	default:
@@ -2266,20 +2271,19 @@
 	struct hci_ev_num_comp_pkts *ev = (void *) skb->data;
 	int i;
 
-	skb_pull(skb, sizeof(*ev));
-
-	BT_DBG("%s num_hndl %d", hdev->name, ev->num_hndl);
-
 	if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_PACKET_BASED) {
 		BT_ERR("Wrong event for mode %d", hdev->flow_ctl_mode);
 		return;
 	}
 
-	if (skb->len < ev->num_hndl * 4) {
+	if (skb->len < sizeof(*ev) || skb->len < sizeof(*ev) +
+			ev->num_hndl * sizeof(struct hci_comp_pkts_info)) {
 		BT_DBG("%s bad parameters", hdev->name);
 		return;
 	}
 
+	BT_DBG("%s num_hndl %d", hdev->name, ev->num_hndl);
+
 	for (i = 0; i < ev->num_hndl; i++) {
 		struct hci_comp_pkts_info *info = &ev->handles[i];
 		struct hci_conn *conn;

diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 6d94616..0dcc962 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c

@@ -767,7 +767,6 @@
 		/* Detach sockets from device */
 		read_lock(&hci_sk_list.lock);
 		sk_for_each(sk, node, &hci_sk_list.head) {
-			local_bh_disable();
 			bh_lock_sock_nested(sk);
 			if (hci_pi(sk)->hdev == hdev) {
 				hci_pi(sk)->hdev = NULL;
@@ -778,7 +777,6 @@
 				hci_dev_put(hdev);
 			}
 			bh_unlock_sock(sk);
-			local_bh_enable();
 		}
 		read_unlock(&hci_sk_list.lock);
 	}

diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index aa78d8c..faf0b11 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c

@@ -165,7 +165,7 @@
 {
 	int err;
 
-	write_lock_bh(&chan_list_lock);
+	write_lock(&chan_list_lock);
 
 	if (psm && __l2cap_global_chan_by_addr(psm, src)) {
 		err = -EADDRINUSE;
@@ -190,17 +190,17 @@
 	}
 
 done:
-	write_unlock_bh(&chan_list_lock);
+	write_unlock(&chan_list_lock);
 	return err;
 }
 
 int l2cap_add_scid(struct l2cap_chan *chan,  __u16 scid)
 {
-	write_lock_bh(&chan_list_lock);
+	write_lock(&chan_list_lock);
 
 	chan->scid = scid;
 
-	write_unlock_bh(&chan_list_lock);
+	write_unlock(&chan_list_lock);
 
 	return 0;
 }
@@ -289,9 +289,9 @@
 
 	chan->sk = sk;
 
-	write_lock_bh(&chan_list_lock);
+	write_lock(&chan_list_lock);
 	list_add(&chan->global_l, &chan_list);
-	write_unlock_bh(&chan_list_lock);
+	write_unlock(&chan_list_lock);
 
 	INIT_DELAYED_WORK(&chan->chan_timer, l2cap_chan_timeout);
 
@@ -306,9 +306,9 @@
 
 void l2cap_chan_destroy(struct l2cap_chan *chan)
 {
-	write_lock_bh(&chan_list_lock);
+	write_lock(&chan_list_lock);
 	list_del(&chan->global_l);
-	write_unlock_bh(&chan_list_lock);
+	write_unlock(&chan_list_lock);
 
 	l2cap_chan_put(chan);
 }
@@ -543,14 +543,14 @@
 	 *  200 - 254 are used by utilities like l2ping, etc.
 	 */
 
-	spin_lock_bh(&conn->lock);
+	spin_lock(&conn->lock);
 
 	if (++conn->tx_ident > 128)
 		conn->tx_ident = 1;
 
 	id = conn->tx_ident;
 
-	spin_unlock_bh(&conn->lock);
+	spin_unlock(&conn->lock);
 
 	return id;
 }
@@ -1190,7 +1190,7 @@
 	}
 
 	/* Set destination address and psm */
-	bacpy(&bt_sk(sk)->dst, src);
+	bacpy(&bt_sk(sk)->dst, dst);
 	chan->psm = psm;
 	chan->dcid = cid;
 
@@ -4702,7 +4702,7 @@
 {
 	struct l2cap_chan *c;
 
-	read_lock_bh(&chan_list_lock);
+	read_lock(&chan_list_lock);
 
 	list_for_each_entry(c, &chan_list, global_l) {
 		struct sock *sk = c->sk;
@@ -4715,7 +4715,7 @@
 					c->sec_level, c->mode);
 }
 
-	read_unlock_bh(&chan_list_lock);
+	read_unlock(&chan_list_lock);
 
 	return 0;
 }

diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 9ca5616..c61d967 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c

@@ -587,6 +587,7 @@
 			if (smp_conn_security(conn, sec.level))
 				break;
 			sk->sk_state = BT_CONFIG;
+			chan->state = BT_CONFIG;
 
 		/* or for ACL link, under defer_setup time */
 		} else if (sk->sk_state == BT_CONNECT2 &&
@@ -731,6 +732,7 @@
 
 	if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) {
 		sk->sk_state = BT_CONFIG;
+		pi->chan->state = BT_CONFIG;
 
 		__l2cap_connect_rsp_defer(pi->chan);
 		release_sock(sk);

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 2540944..bc8e59d 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c

@@ -291,7 +291,7 @@
 	if (!(hdev->features[4] & LMP_NO_BREDR))
 		settings |= MGMT_SETTING_BREDR;
 
-	if (hdev->extfeatures[0] & LMP_HOST_LE)
+	if (hdev->host_features[0] & LMP_HOST_LE)
 		settings |= MGMT_SETTING_LE;
 
 	if (test_bit(HCI_AUTH, &hdev->flags))
@@ -2756,7 +2756,7 @@
 	if (!cmd)
 		return -ENOENT;
 
-	err = cmd_status(cmd->sk, hdev->id, cmd->opcode, status);
+	err = cmd_status(cmd->sk, hdev->id, cmd->opcode, mgmt_status(status));
 	mgmt_pending_remove(cmd);
 
 	return err;

diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index aea2bdd..f066678 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c

@@ -370,7 +370,7 @@
 		goto done;
 	}
 
-	write_lock_bh(&rfcomm_sk_list.lock);
+	write_lock(&rfcomm_sk_list.lock);
 
 	if (sa->rc_channel && __rfcomm_get_sock_by_addr(sa->rc_channel, &sa->rc_bdaddr)) {
 		err = -EADDRINUSE;
@@ -381,7 +381,7 @@
 		sk->sk_state = BT_BOUND;
 	}
 
-	write_unlock_bh(&rfcomm_sk_list.lock);
+	write_unlock(&rfcomm_sk_list.lock);
 
 done:
 	release_sock(sk);
@@ -455,7 +455,7 @@
 
 		err = -EINVAL;
 
-		write_lock_bh(&rfcomm_sk_list.lock);
+		write_lock(&rfcomm_sk_list.lock);
 
 		for (channel = 1; channel < 31; channel++)
 			if (!__rfcomm_get_sock_by_addr(channel, src)) {
@@ -464,7 +464,7 @@
 				break;
 			}
 
-		write_unlock_bh(&rfcomm_sk_list.lock);
+		write_unlock(&rfcomm_sk_list.lock);
 
 		if (err < 0)
 			goto done;
@@ -982,7 +982,7 @@
 	struct sock *sk;
 	struct hlist_node *node;
 
-	read_lock_bh(&rfcomm_sk_list.lock);
+	read_lock(&rfcomm_sk_list.lock);
 
 	sk_for_each(sk, node, &rfcomm_sk_list.head) {
 		seq_printf(f, "%s %s %d %d\n",
@@ -991,7 +991,7 @@
 				sk->sk_state, rfcomm_pi(sk)->channel);
 	}
 
-	read_unlock_bh(&rfcomm_sk_list.lock);
+	read_unlock(&rfcomm_sk_list.lock);
 
 	return 0;
 }

diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index fa8f4de5..a2d4f51 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c

@@ -76,7 +76,7 @@
 };
 
 static LIST_HEAD(rfcomm_dev_list);
-static DEFINE_RWLOCK(rfcomm_dev_lock);
+static DEFINE_SPINLOCK(rfcomm_dev_lock);
 
 static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb);
 static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err);
@@ -146,7 +146,7 @@
 {
 	struct rfcomm_dev *dev;
 
-	read_lock(&rfcomm_dev_lock);
+	spin_lock(&rfcomm_dev_lock);
 
 	dev = __rfcomm_dev_get(id);
 
@@ -157,7 +157,7 @@
 			rfcomm_dev_hold(dev);
 	}
 
-	read_unlock(&rfcomm_dev_lock);
+	spin_unlock(&rfcomm_dev_lock);
 
 	return dev;
 }
@@ -205,7 +205,7 @@
 	if (!dev)
 		return -ENOMEM;
 
-	write_lock_bh(&rfcomm_dev_lock);
+	spin_lock(&rfcomm_dev_lock);
 
 	if (req->dev_id < 0) {
 		dev->id = 0;
@@ -290,7 +290,7 @@
 	__module_get(THIS_MODULE);
 
 out:
-	write_unlock_bh(&rfcomm_dev_lock);
+	spin_unlock(&rfcomm_dev_lock);
 
 	if (err < 0)
 		goto free;
@@ -327,9 +327,9 @@
 	if (atomic_read(&dev->opened) > 0)
 		return;
 
-	write_lock_bh(&rfcomm_dev_lock);
+	spin_lock(&rfcomm_dev_lock);
 	list_del_init(&dev->list);
-	write_unlock_bh(&rfcomm_dev_lock);
+	spin_unlock(&rfcomm_dev_lock);
 
 	rfcomm_dev_put(dev);
 }
@@ -473,7 +473,7 @@
 
 	di = dl->dev_info;
 
-	read_lock_bh(&rfcomm_dev_lock);
+	spin_lock(&rfcomm_dev_lock);
 
 	list_for_each_entry(dev, &rfcomm_dev_list, list) {
 		if (test_bit(RFCOMM_TTY_RELEASED, &dev->flags))
@@ -488,7 +488,7 @@
 			break;
 	}
 
-	read_unlock_bh(&rfcomm_dev_lock);
+	spin_unlock(&rfcomm_dev_lock);
 
 	dl->dev_num = n;
 	size = sizeof(*dl) + n * sizeof(*di);
@@ -766,9 +766,9 @@
 		rfcomm_dlc_unlock(dev->dlc);
 
 		if (test_bit(RFCOMM_TTY_RELEASED, &dev->flags)) {
-			write_lock_bh(&rfcomm_dev_lock);
+			spin_lock(&rfcomm_dev_lock);
 			list_del_init(&dev->list);
-			write_unlock_bh(&rfcomm_dev_lock);
+			spin_unlock(&rfcomm_dev_lock);
 
 			rfcomm_dev_put(dev);
 		}

diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 5dc2f21..8bf26d1 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c

@@ -482,7 +482,7 @@
 		goto done;
 	}
 
-	write_lock_bh(&sco_sk_list.lock);
+	write_lock(&sco_sk_list.lock);
 
 	if (bacmp(src, BDADDR_ANY) && __sco_get_sock_by_addr(src)) {
 		err = -EADDRINUSE;
@@ -492,7 +492,7 @@
 		sk->sk_state = BT_BOUND;
 	}
 
-	write_unlock_bh(&sco_sk_list.lock);
+	write_unlock(&sco_sk_list.lock);
 
 done:
 	release_sock(sk);
@@ -965,14 +965,14 @@
 	struct sock *sk;
 	struct hlist_node *node;
 
-	read_lock_bh(&sco_sk_list.lock);
+	read_lock(&sco_sk_list.lock);
 
 	sk_for_each(sk, node, &sco_sk_list.head) {
 		seq_printf(f, "%s %s %d\n", batostr(&bt_sk(sk)->src),
 				batostr(&bt_sk(sk)->dst), sk->sk_state);
 	}
 
-	read_unlock_bh(&sco_sk_list.lock);
+	read_unlock(&sco_sk_list.lock);
 
 	return 0;
 }

diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 3a94eae..b79747c 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c

@@ -510,10 +510,15 @@
 		switch (rule->steps[step].op) {
 		case CRUSH_RULE_TAKE:
 			w[0] = rule->steps[step].arg1;
-			if (force_pos >= 0) {
-				BUG_ON(force_context[force_pos] != w[0]);
+
+			/* find position in force_context/hierarchy */
+			while (force_pos >= 0 &&
+			       force_context[force_pos] != w[0])
 				force_pos--;
-			}
+			/* and move past it */
+			if (force_pos >= 0)
+				force_pos--;
+
 			wsize = 1;
 			break;
 

diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 85f3bc0..b780cb7 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c

@@ -15,10 +15,9 @@
 			  const struct ceph_crypto_key *src)
 {
 	memcpy(dst, src, sizeof(struct ceph_crypto_key));
-	dst->key = kmalloc(src->len, GFP_NOFS);
+	dst->key = kmemdup(src->key, src->len, GFP_NOFS);
 	if (!dst->key)
 		return -ENOMEM;
-	memcpy(dst->key, src->key, src->len);
 	return 0;
 }
 

diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index f4f3f58..5e25405 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c

@@ -29,8 +29,8 @@
 			       struct ceph_osd_request *req);
 static void __unregister_linger_request(struct ceph_osd_client *osdc,
 					struct ceph_osd_request *req);
-static int __send_request(struct ceph_osd_client *osdc,
-			  struct ceph_osd_request *req);
+static void __send_request(struct ceph_osd_client *osdc,
+			   struct ceph_osd_request *req);
 
 static int op_needs_trail(int op)
 {
@@ -1022,8 +1022,8 @@
 /*
  * caller should hold map_sem (for read) and request_mutex
  */
-static int __send_request(struct ceph_osd_client *osdc,
-			  struct ceph_osd_request *req)
+static void __send_request(struct ceph_osd_client *osdc,
+			   struct ceph_osd_request *req)
 {
 	struct ceph_osd_request_head *reqhead;
 
@@ -1041,7 +1041,6 @@
 	ceph_msg_get(req->r_request); /* send consumes a ref */
 	ceph_con_send(&req->r_osd->o_con, req->r_request);
 	req->r_sent = req->r_osd->o_incarnation;
-	return 0;
 }
 
 /*
@@ -1726,17 +1725,9 @@
 			dout("send_request %p no up osds in pg\n", req);
 			ceph_monc_request_next_osdmap(&osdc->client->monc);
 		} else {
-			rc = __send_request(osdc, req);
-			if (rc) {
-				if (nofail) {
-					dout("osdc_start_request failed send, "
-					     " will retry %lld\n", req->r_tid);
-					rc = 0;
-				} else {
-					__unregister_request(osdc, req);
-				}
-			}
+			__send_request(osdc, req);
 		}
+		rc = 0;
 	}
 
 out_unlock:

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index abf4393..f3dbd4f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c

@@ -1177,9 +1177,9 @@
 			nonempty = 1;
 	}
 
-	if (nonempty)
-		RCU_INIT_POINTER(dev->xps_maps, new_dev_maps);
-	else {
+	if (nonempty) {
+		rcu_assign_pointer(dev->xps_maps, new_dev_maps);
+	} else {
 		kfree(new_dev_maps);
 		RCU_INIT_POINTER(dev->xps_maps, NULL);
 	}

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 0d38808..556b082 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c

@@ -765,7 +765,7 @@
 	}
 
 	/* last thing to do is link it to the net device structure */
-	RCU_INIT_POINTER(ndev->npinfo, npinfo);
+	rcu_assign_pointer(ndev->npinfo, npinfo);
 
 	return 0;
 

diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index 8f16257..028fc43 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c

@@ -49,13 +49,13 @@
 }
 
 static void dccp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
-		struct inet_diag_req *r, struct nlattr *bc)
+		struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r, bc);
 }
 
 static int dccp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
-		struct inet_diag_req *req)
+		struct inet_diag_req_v2 *req)
 {
 	return inet_diag_dump_one_icsk(&dccp_hashinfo, in_skb, nlh, req);
 }

diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 2ab16e1..74d321a 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c

@@ -388,7 +388,7 @@
 	}
 
 	ifa->ifa_next = dn_db->ifa_list;
-	RCU_INIT_POINTER(dn_db->ifa_list, ifa);
+	rcu_assign_pointer(dn_db->ifa_list, ifa);
 
 	dn_ifaddr_notify(RTM_NEWADDR, ifa);
 	blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa);
@@ -1093,7 +1093,7 @@
 
 	memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms));
 
-	RCU_INIT_POINTER(dev->dn_ptr, dn_db);
+	rcu_assign_pointer(dev->dn_ptr, dn_db);
 	dn_db->dev = dev;
 	init_timer(&dn_db->timer);
 

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 65f01dc..e41c40f 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c

@@ -258,7 +258,7 @@
 		ip_mc_up(in_dev);
 
 	/* we can receive as soon as ip_ptr is set -- do this last */
-	RCU_INIT_POINTER(dev->ip_ptr, in_dev);
+	rcu_assign_pointer(dev->ip_ptr, in_dev);
 out:
 	return in_dev;
 out_kfree:

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index d04b13a..2b555a5 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c

@@ -205,7 +205,7 @@
 	return (struct tnode *)(parent & ~NODE_TYPE_MASK);
 }
 
-/* Same as RCU_INIT_POINTER
+/* Same as rcu_assign_pointer
  * but that macro() assumes that value is a pointer.
  */
 static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
@@ -529,7 +529,7 @@
 	if (n)
 		node_set_parent(n, tn);
 
-	RCU_INIT_POINTER(tn->child[i], n);
+	rcu_assign_pointer(tn->child[i], n);
 }
 
 #define MAX_WORK 10
@@ -1015,7 +1015,7 @@
 
 		tp = node_parent((struct rt_trie_node *) tn);
 		if (!tp)
-			RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn);
+			rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
 
 		tnode_free_flush();
 		if (!tp)
@@ -1027,7 +1027,7 @@
 	if (IS_TNODE(tn))
 		tn = (struct tnode *)resize(t, (struct tnode *)tn);
 
-	RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn);
+	rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
 	tnode_free_flush();
 }
 
@@ -1164,7 +1164,7 @@
 			put_child(t, (struct tnode *)tp, cindex,
 				  (struct rt_trie_node *)tn);
 		} else {
-			RCU_INIT_POINTER(t->trie, (struct rt_trie_node *)tn);
+			rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
 			tp = tn;
 		}
 	}

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 5104bc0..450e5d2 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c

@@ -1249,7 +1249,7 @@
 
 	im->next_rcu = in_dev->mc_list;
 	in_dev->mc_count++;
-	RCU_INIT_POINTER(in_dev->mc_list, im);
+	rcu_assign_pointer(in_dev->mc_list, im);
 
 #ifdef CONFIG_IP_MULTICAST
 	igmpv3_del_delrec(in_dev, im->multiaddr);
@@ -1821,7 +1821,7 @@
 	iml->next_rcu = inet->mc_list;
 	iml->sflist = NULL;
 	iml->sfmode = MCAST_EXCLUDE;
-	RCU_INIT_POINTER(inet->mc_list, iml);
+	rcu_assign_pointer(inet->mc_list, iml);
 	ip_mc_inc_group(in_dev, addr);
 	err = 0;
 done:
@@ -2008,7 +2008,7 @@
 			atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
 			kfree_rcu(psl, rcu);
 		}
-		RCU_INIT_POINTER(pmc->sflist, newpsl);
+		rcu_assign_pointer(pmc->sflist, newpsl);
 		psl = newpsl;
 	}
 	rv = 1;	/* > 0 for insert logic below if sl_count is 0 */
@@ -2111,7 +2111,7 @@
 	} else
 		(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
 			0, NULL, 0);
-	RCU_INIT_POINTER(pmc->sflist, newpsl);
+	rcu_assign_pointer(pmc->sflist, newpsl);
 	pmc->sfmode = msf->imsf_fmode;
 	err = 0;
 done:

diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 2240a8e..fcf2818 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c

@@ -71,7 +71,7 @@
 }
 
 int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
-			      struct sk_buff *skb, struct inet_diag_req *req,
+			      struct sk_buff *skb, struct inet_diag_req_v2 *req,
 			      u32 pid, u32 seq, u16 nlmsg_flags,
 			      const struct nlmsghdr *unlh)
 {
@@ -193,7 +193,7 @@
 EXPORT_SYMBOL_GPL(inet_sk_diag_fill);
 
 static int inet_csk_diag_fill(struct sock *sk,
-			      struct sk_buff *skb, struct inet_diag_req *req,
+			      struct sk_buff *skb, struct inet_diag_req_v2 *req,
 			      u32 pid, u32 seq, u16 nlmsg_flags,
 			      const struct nlmsghdr *unlh)
 {
@@ -202,7 +202,7 @@
 }
 
 static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
-			       struct sk_buff *skb, struct inet_diag_req *req,
+			       struct sk_buff *skb, struct inet_diag_req_v2 *req,
 			       u32 pid, u32 seq, u16 nlmsg_flags,
 			       const struct nlmsghdr *unlh)
 {
@@ -253,7 +253,7 @@
 }
 
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
-			struct inet_diag_req *r, u32 pid, u32 seq, u16 nlmsg_flags,
+			struct inet_diag_req_v2 *r, u32 pid, u32 seq, u16 nlmsg_flags,
 			const struct nlmsghdr *unlh)
 {
 	if (sk->sk_state == TCP_TIME_WAIT)
@@ -264,7 +264,7 @@
 }
 
 int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb,
-		const struct nlmsghdr *nlh, struct inet_diag_req *req)
+		const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req)
 {
 	int err;
 	struct sock *sk;
@@ -333,7 +333,7 @@
 
 static int inet_diag_get_exact(struct sk_buff *in_skb,
 			       const struct nlmsghdr *nlh,
-			       struct inet_diag_req *req)
+			       struct inet_diag_req_v2 *req)
 {
 	const struct inet_diag_handler *handler;
 	int err;
@@ -540,7 +540,7 @@
 static int inet_csk_diag_dump(struct sock *sk,
 			      struct sk_buff *skb,
 			      struct netlink_callback *cb,
-			      struct inet_diag_req *r,
+			      struct inet_diag_req_v2 *r,
 			      const struct nlattr *bc)
 {
 	if (!inet_diag_bc_sk(bc, sk))
@@ -554,7 +554,7 @@
 static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
 			       struct sk_buff *skb,
 			       struct netlink_callback *cb,
-			       struct inet_diag_req *r,
+			       struct inet_diag_req_v2 *r,
 			       const struct nlattr *bc)
 {
 	if (bc != NULL) {
@@ -639,7 +639,7 @@
 
 static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 			       struct netlink_callback *cb,
-			       struct inet_diag_req *r,
+			       struct inet_diag_req_v2 *r,
 			       const struct nlattr *bc)
 {
 	struct inet_diag_entry entry;
@@ -721,7 +721,7 @@
 }
 
 void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
-		struct netlink_callback *cb, struct inet_diag_req *r, struct nlattr *bc)
+		struct netlink_callback *cb, struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	int i, num;
 	int s_i, s_num;
@@ -872,7 +872,7 @@
 EXPORT_SYMBOL_GPL(inet_diag_dump_icsk);
 
 static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
-		struct inet_diag_req *r, struct nlattr *bc)
+		struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	const struct inet_diag_handler *handler;
 
@@ -887,12 +887,12 @@
 static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct nlattr *bc = NULL;
-	int hdrlen = sizeof(struct inet_diag_req);
+	int hdrlen = sizeof(struct inet_diag_req_v2);
 
 	if (nlmsg_attrlen(cb->nlh, hdrlen))
 		bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE);
 
-	return __inet_diag_dump(skb, cb, (struct inet_diag_req *)NLMSG_DATA(cb->nlh), bc);
+	return __inet_diag_dump(skb, cb, (struct inet_diag_req_v2 *)NLMSG_DATA(cb->nlh), bc);
 }
 
 static inline int inet_diag_type2proto(int type)
@@ -909,10 +909,10 @@
 
 static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	struct inet_diag_req_compat *rc = NLMSG_DATA(cb->nlh);
-	struct inet_diag_req req;
+	struct inet_diag_req *rc = NLMSG_DATA(cb->nlh);
+	struct inet_diag_req_v2 req;
 	struct nlattr *bc = NULL;
-	int hdrlen = sizeof(struct inet_diag_req_compat);
+	int hdrlen = sizeof(struct inet_diag_req);
 
 	req.sdiag_family = AF_UNSPEC; /* compatibility */
 	req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type);
@@ -929,8 +929,8 @@
 static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
 			       const struct nlmsghdr *nlh)
 {
-	struct inet_diag_req_compat *rc = NLMSG_DATA(nlh);
-	struct inet_diag_req req;
+	struct inet_diag_req *rc = NLMSG_DATA(nlh);
+	struct inet_diag_req_v2 req;
 
 	req.sdiag_family = rc->idiag_family;
 	req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type);
@@ -943,7 +943,7 @@
 
 static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-	int hdrlen = sizeof(struct inet_diag_req_compat);
+	int hdrlen = sizeof(struct inet_diag_req);
 
 	if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX ||
 	    nlmsg_len(nlh) < hdrlen)
@@ -970,7 +970,7 @@
 
 static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
 {
-	int hdrlen = sizeof(struct inet_diag_req);
+	int hdrlen = sizeof(struct inet_diag_req_v2);
 
 	if (nlmsg_len(h) < hdrlen)
 		return -EINVAL;
@@ -990,7 +990,7 @@
 					  inet_diag_dump, NULL, 0);
 	}
 
-	return inet_diag_get_exact(skb, h, (struct inet_diag_req *)NLMSG_DATA(h));
+	return inet_diag_get_exact(skb, h, (struct inet_diag_req_v2 *)NLMSG_DATA(h));
 }
 
 static struct sock_diag_handler inet_diag_handler = {

diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 413ed1b..22a1993 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c

@@ -231,7 +231,7 @@
 	     (iter = rtnl_dereference(*tp)) != NULL;
 	     tp = &iter->next) {
 		if (t == iter) {
-			RCU_INIT_POINTER(*tp, t->next);
+			rcu_assign_pointer(*tp, t->next);
 			break;
 		}
 	}
@@ -241,8 +241,8 @@
 {
 	struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
 
-	RCU_INIT_POINTER(t->next, rtnl_dereference(*tp));
-	RCU_INIT_POINTER(*tp, t);
+	rcu_assign_pointer(t->next, rtnl_dereference(*tp));
+	rcu_assign_pointer(*tp, t);
 }
 
 static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
@@ -792,7 +792,7 @@
 		return -ENOMEM;
 
 	dev_hold(dev);
-	RCU_INIT_POINTER(ipn->tunnels_wc[0], tunnel);
+	rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
 	return 0;
 }
 

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 8e54490..7bc2db6 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c

@@ -1225,7 +1225,7 @@
 
 		ret = ip_ra_control(sk, 1, mrtsock_destruct);
 		if (ret == 0) {
-			RCU_INIT_POINTER(mrt->mroute_sk, sk);
+			rcu_assign_pointer(mrt->mroute_sk, sk);
 			IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
 		}
 		rtnl_unlock();

diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 8cd357a..ed3f2ad 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c

@@ -35,13 +35,13 @@
 }
 
 static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
-		struct inet_diag_req *r, struct nlattr *bc)
+		struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc);
 }
 
 static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
-		struct inet_diag_req *req)
+		struct inet_diag_req_v2 *req)
 {
 	return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req);
 }

diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index 7fed04f..4997878 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c

@@ -108,7 +108,7 @@
 	tcp = tcp_from_cgproto(cg_proto);
 	percpu_counter_destroy(&tcp->tcp_sockets_allocated);
 
-	val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE);
+	val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
 
 	if (val != RESOURCE_MAX)
 		jump_label_dec(&memcg_socket_limit_enabled);

diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 69f8a7c..e5e18cb 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c

@@ -19,7 +19,7 @@
 #include <linux/sock_diag.h>
 
 static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
-		struct netlink_callback *cb, struct inet_diag_req *req,
+		struct netlink_callback *cb, struct inet_diag_req_v2 *req,
 		struct nlattr *bc)
 {
 	if (!inet_diag_bc_sk(bc, sk))
@@ -30,7 +30,7 @@
 }
 
 static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
-		const struct nlmsghdr *nlh, struct inet_diag_req *req)
+		const struct nlmsghdr *nlh, struct inet_diag_req_v2 *req)
 {
 	int err = -EINVAL;
 	struct sock *sk;
@@ -88,7 +88,7 @@
 }
 
 static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb,
-		struct inet_diag_req *r, struct nlattr *bc)
+		struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	int num, s_num, slot, s_slot;
 
@@ -136,13 +136,13 @@
 }
 
 static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
-		struct inet_diag_req *r, struct nlattr *bc)
+		struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	udp_dump(&udp_table, skb, cb, r, bc);
 }
 
 static int udp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
-		struct inet_diag_req *req)
+		struct inet_diag_req_v2 *req)
 {
 	return udp_dump_one(&udp_table, in_skb, nlh, req);
 }
@@ -154,13 +154,13 @@
 };
 
 static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
-		struct inet_diag_req *r, struct nlattr *bc)
+		struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	udp_dump(&udplite_table, skb, cb, r, bc);
 }
 
 static int udplite_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
-		struct inet_diag_req *req)
+		struct inet_diag_req_v2 *req)
 {
 	return udp_dump_one(&udplite_table, in_skb, nlh, req);
 }

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 0ba0866..a225d5e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c

@@ -429,7 +429,7 @@
 	ndev->tstamp = jiffies;
 	addrconf_sysctl_register(ndev);
 	/* protected by rtnl_lock */
-	RCU_INIT_POINTER(dev->ip6_ptr, ndev);
+	rcu_assign_pointer(dev->ip6_ptr, ndev);
 
 	/* Join all-node multicast group */
 	ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index e1f7761..aa21da6 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c

@@ -218,8 +218,8 @@
 {
 	struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
 
-	RCU_INIT_POINTER(t->next , rtnl_dereference(*tp));
-	RCU_INIT_POINTER(*tp, t);
+	rcu_assign_pointer(t->next , rtnl_dereference(*tp));
+	rcu_assign_pointer(*tp, t);
 }
 
 /**
@@ -237,7 +237,7 @@
 	     (iter = rtnl_dereference(*tp)) != NULL;
 	     tp = &iter->next) {
 		if (t == iter) {
-			RCU_INIT_POINTER(*tp, t->next);
+			rcu_assign_pointer(*tp, t->next);
 			break;
 		}
 	}
@@ -1450,7 +1450,7 @@
 
 	t->parms.proto = IPPROTO_IPV6;
 	dev_hold(dev);
-	RCU_INIT_POINTER(ip6n->tnls_wc[0], t);
+	rcu_assign_pointer(ip6n->tnls_wc[0], t);
 	return 0;
 }
 

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a4894f4..d02f7e4 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c

@@ -131,7 +131,7 @@
 
 int rawv6_mh_filter_register(mh_filter_t filter)
 {
-	RCU_INIT_POINTER(mh_filter, filter);
+	rcu_assign_pointer(mh_filter, filter);
 	return 0;
 }
 EXPORT_SYMBOL(rawv6_mh_filter_register);

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 3b6dac9..133768e 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c

@@ -182,7 +182,7 @@
 	     (iter = rtnl_dereference(*tp)) != NULL;
 	     tp = &iter->next) {
 		if (t == iter) {
-			RCU_INIT_POINTER(*tp, t->next);
+			rcu_assign_pointer(*tp, t->next);
 			break;
 		}
 	}
@@ -192,8 +192,8 @@
 {
 	struct ip_tunnel __rcu **tp = ipip6_bucket(sitn, t);
 
-	RCU_INIT_POINTER(t->next, rtnl_dereference(*tp));
-	RCU_INIT_POINTER(*tp, t);
+	rcu_assign_pointer(t->next, rtnl_dereference(*tp));
+	rcu_assign_pointer(*tp, t);
 }
 
 static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
@@ -393,7 +393,7 @@
 	p->addr = a->addr;
 	p->flags = a->flags;
 	t->prl_count++;
-	RCU_INIT_POINTER(t->prl, p);
+	rcu_assign_pointer(t->prl, p);
 out:
 	return err;
 }
@@ -1177,7 +1177,7 @@
 	if (!dev->tstats)
 		return -ENOMEM;
 	dev_hold(dev);
-	RCU_INIT_POINTER(sitn->tunnels_wc[0], tunnel);
+	rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
 	return 0;
 }
 

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 96debba..1068f66 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c

@@ -332,7 +332,7 @@
 	status = WLAN_STATUS_SUCCESS;
 
 	/* activate it for RX */
-	RCU_INIT_POINTER(sta->ampdu_mlme.tid_rx[tid], tid_agg_rx);
+	rcu_assign_pointer(sta->ampdu_mlme.tid_rx[tid], tid_agg_rx);
 
 	if (timeout)
 		mod_timer(&tid_agg_rx->session_timer, TU_TO_EXP_TIME(timeout));

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 850bb96..e60df48 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c

@@ -616,7 +616,7 @@
 
 	sdata->vif.bss_conf.dtim_period = new->dtim_period;
 
-	RCU_INIT_POINTER(sdata->u.ap.beacon, new);
+	rcu_assign_pointer(sdata->u.ap.beacon, new);
 
 	synchronize_rcu();
 
@@ -1033,7 +1033,7 @@
 				return -EBUSY;
 			}
 
-			RCU_INIT_POINTER(vlansdata->u.vlan.sta, sta);
+			rcu_assign_pointer(vlansdata->u.vlan.sta, sta);
 		}
 
 		sta->sdata = vlansdata;

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index f8a32bf..b3d76b7 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c

@@ -207,7 +207,7 @@
 		*pos++ = 0; /* U-APSD no in use */
 	}
 
-	RCU_INIT_POINTER(ifibss->presp, skb);
+	rcu_assign_pointer(ifibss->presp, skb);
 
 	sdata->vif.bss_conf.beacon_int = beacon_int;
 	sdata->vif.bss_conf.basic_rates = basic_rates;

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index b197136..3c428d4 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c

@@ -73,7 +73,7 @@
 	if (!s)
 		return -ENOENT;
 	if (s == sta) {
-		RCU_INIT_POINTER(local->sta_hash[STA_HASH(sta->sta.addr)],
+		rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)],
 				   s->hnext);
 		return 0;
 	}
@@ -83,7 +83,7 @@
 		s = rcu_dereference_protected(s->hnext,
 					lockdep_is_held(&local->sta_mtx));
 	if (rcu_access_pointer(s->hnext)) {
-		RCU_INIT_POINTER(s->hnext, sta->hnext);
+		rcu_assign_pointer(s->hnext, sta->hnext);
 		return 0;
 	}
 
@@ -226,7 +226,7 @@
 {
 	lockdep_assert_held(&local->sta_mtx);
 	sta->hnext = local->sta_hash[STA_HASH(sta->sta.addr)];
-	RCU_INIT_POINTER(local->sta_hash[STA_HASH(sta->sta.addr)], sta);
+	rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], sta);
 }
 
 static void sta_unblock(struct work_struct *wk)

diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 93aab07..422b798 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c

@@ -106,7 +106,7 @@
 		if (status->flag & RX_FLAG_MMIC_ERROR)
 			goto mic_fail;
 
-		if (!(status->flag & RX_FLAG_IV_STRIPPED))
+		if (!(status->flag & RX_FLAG_IV_STRIPPED) && rx->key)
 			goto update_iv;
 
 		return RX_CONTINUE;

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index e875f89..76613f5 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c

@@ -777,7 +777,7 @@
 		if (exp->helper) {
 			help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
 			if (help)
-				RCU_INIT_POINTER(help->helper, exp->helper);
+				rcu_assign_pointer(help->helper, exp->helper);
 		}
 
 #ifdef CONFIG_NF_CONNTRACK_MARK

diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index b62c414..14af632 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c

@@ -91,7 +91,7 @@
 		ret = -EBUSY;
 		goto out_unlock;
 	}
-	RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, new);
+	rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new);
 	mutex_unlock(&nf_ct_ecache_mutex);
 	return ret;
 
@@ -128,7 +128,7 @@
 		ret = -EBUSY;
 		goto out_unlock;
 	}
-	RCU_INIT_POINTER(net->ct.nf_expect_event_cb, new);
+	rcu_assign_pointer(net->ct.nf_expect_event_cb, new);
 	mutex_unlock(&nf_ct_ecache_mutex);
 	return ret;
 

diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 4605c94..641ff5f 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c

@@ -169,7 +169,7 @@
 	   before updating alloc_size */
 	type->alloc_size = ALIGN(sizeof(struct nf_ct_ext), type->align)
 			   + type->len;
-	RCU_INIT_POINTER(nf_ct_ext_types[type->id], type);
+	rcu_assign_pointer(nf_ct_ext_types[type->id], type);
 	update_alloc_size(type);
 out:
 	mutex_unlock(&nf_ct_ext_type_mutex);

diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index c9e0de0..299fec9 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c

@@ -157,7 +157,7 @@
 		memset(&help->help, 0, sizeof(help->help));
 	}
 
-	RCU_INIT_POINTER(help->helper, helper);
+	rcu_assign_pointer(help->helper, helper);
 out:
 	return ret;
 }

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index e07dc3a..2a4834b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c

@@ -1172,7 +1172,7 @@
 		return -EOPNOTSUPP;
 	}
 
-	RCU_INIT_POINTER(help->helper, helper);
+	rcu_assign_pointer(help->helper, helper);
 
 	return 0;
 }

diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index ce0c406..957374a 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c

@@ -55,7 +55,7 @@
 		llog = rcu_dereference_protected(nf_loggers[pf],
 						 lockdep_is_held(&nf_log_mutex));
 		if (llog == NULL)
-			RCU_INIT_POINTER(nf_loggers[pf], logger);
+			rcu_assign_pointer(nf_loggers[pf], logger);
 	}
 
 	mutex_unlock(&nf_log_mutex);
@@ -92,7 +92,7 @@
 		mutex_unlock(&nf_log_mutex);
 		return -ENOENT;
 	}
-	RCU_INIT_POINTER(nf_loggers[pf], logger);
+	rcu_assign_pointer(nf_loggers[pf], logger);
 	mutex_unlock(&nf_log_mutex);
 	return 0;
 }
@@ -250,7 +250,7 @@
 			mutex_unlock(&nf_log_mutex);
 			return -ENOENT;
 		}
-		RCU_INIT_POINTER(nf_loggers[tindex], logger);
+		rcu_assign_pointer(nf_loggers[tindex], logger);
 		mutex_unlock(&nf_log_mutex);
 	} else {
 		mutex_lock(&nf_log_mutex);

diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 99ffd28..b3a7db6 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c

@@ -40,7 +40,7 @@
 	else if (old)
 		ret = -EBUSY;
 	else {
-		RCU_INIT_POINTER(queue_handler[pf], qh);
+		rcu_assign_pointer(queue_handler[pf], qh);
 		ret = 0;
 	}
 	mutex_unlock(&queue_handler_mutex);

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index c879c1a..b4f8d84 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c

@@ -59,7 +59,7 @@
 		nfnl_unlock();
 		return -EBUSY;
 	}
-	RCU_INIT_POINTER(subsys_table[n->subsys_id], n);
+	rcu_assign_pointer(subsys_table[n->subsys_id], n);
 	nfnl_unlock();
 
 	return 0;
@@ -210,7 +210,7 @@
 	if (!nfnl)
 		return -ENOMEM;
 	net->nfnl_stash = nfnl;
-	RCU_INIT_POINTER(net->nfnl, nfnl);
+	rcu_assign_pointer(net->nfnl, nfnl);
 	return 0;
 }
 

diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 3820411..d8d4243 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c

@@ -282,7 +282,7 @@
 		INIT_LIST_HEAD(&hsh_tbl->tbl[iter]);
 
 	spin_lock(&netlbl_domhsh_lock);
-	RCU_INIT_POINTER(netlbl_domhsh, hsh_tbl);
+	rcu_assign_pointer(netlbl_domhsh, hsh_tbl);
 	spin_unlock(&netlbl_domhsh_lock);
 
 	return 0;
@@ -330,7 +330,7 @@
 				    &rcu_dereference(netlbl_domhsh)->tbl[bkt]);
 		} else {
 			INIT_LIST_HEAD(&entry->list);
-			RCU_INIT_POINTER(netlbl_domhsh_def, entry);
+			rcu_assign_pointer(netlbl_domhsh_def, entry);
 		}
 
 		if (entry->type == NETLBL_NLTYPE_ADDRSELECT) {

diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 4b5fa0f..e7ff694 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c

@@ -354,7 +354,7 @@
 		INIT_LIST_HEAD(&iface->list);
 		if (netlbl_unlhsh_rcu_deref(netlbl_unlhsh_def) != NULL)
 			goto add_iface_failure;
-		RCU_INIT_POINTER(netlbl_unlhsh_def, iface);
+		rcu_assign_pointer(netlbl_unlhsh_def, iface);
 	}
 	spin_unlock(&netlbl_unlhsh_lock);
 
@@ -1447,11 +1447,9 @@
 	for (iter = 0; iter < hsh_tbl->size; iter++)
 		INIT_LIST_HEAD(&hsh_tbl->tbl[iter]);
 
-	rcu_read_lock();
 	spin_lock(&netlbl_unlhsh_lock);
-	RCU_INIT_POINTER(netlbl_unlhsh, hsh_tbl);
+	rcu_assign_pointer(netlbl_unlhsh, hsh_tbl);
 	spin_unlock(&netlbl_unlhsh_lock);
-	rcu_read_unlock();
 
 	register_netdevice_notifier(&netlbl_unlhsh_netdev_notifier);
 

diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index bf10ea8..d65f699 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c

@@ -480,7 +480,7 @@
 	if (proto_tab[protocol])
 		err = -EBUSY;
 	else
-		RCU_INIT_POINTER(proto_tab[protocol], pp);
+		rcu_assign_pointer(proto_tab[protocol], pp);
 	mutex_unlock(&proto_tab_lock);
 
 	return err;

diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index c582761..9b9a85e 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c

@@ -390,7 +390,7 @@
 	daddr = daddr >> 2;
 	mutex_lock(&routes->lock);
 	if (routes->table[daddr] == NULL) {
-		RCU_INIT_POINTER(routes->table[daddr], dev);
+		rcu_assign_pointer(routes->table[daddr], dev);
 		dev_hold(dev);
 		err = 0;
 	}

diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 3f8d0b1..4c7eff3 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c

@@ -680,7 +680,7 @@
 	mutex_lock(&resource_mutex);
 	if (pnres.sk[res] == NULL) {
 		sock_hold(sk);
-		RCU_INIT_POINTER(pnres.sk[res], sk);
+		rcu_assign_pointer(pnres.sk[res], sk);
 		ret = 0;
 	}
 	mutex_unlock(&resource_mutex);

diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index 4e1de17..a817705 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c

@@ -477,17 +477,6 @@
 	}
 }
 
-static inline unsigned int rds_iw_flush_goal(struct rds_iw_mr_pool *pool, int free_all)
-{
-	unsigned int item_count;
-
-	item_count = atomic_read(&pool->item_count);
-	if (free_all)
-		return item_count;
-
-	return 0;
-}
-
 /*
  * Flush our pool of MRs.
  * At a minimum, all currently unused MRs are unmapped.
@@ -500,7 +489,7 @@
 	LIST_HEAD(unmap_list);
 	LIST_HEAD(kill_list);
 	unsigned long flags;
-	unsigned int nfreed = 0, ncleaned = 0, unpinned = 0, free_goal;
+	unsigned int nfreed = 0, ncleaned = 0, unpinned = 0;
 	int ret = 0;
 
 	rds_iw_stats_inc(s_iw_rdma_mr_pool_flush);
@@ -514,8 +503,6 @@
 		list_splice_init(&pool->clean_list, &kill_list);
 	spin_unlock_irqrestore(&pool->list_lock, flags);
 
-	free_goal = rds_iw_flush_goal(pool, free_all);
-
 	/* Batched invalidate of dirty MRs.
 	 * For FMR based MRs, the mappings on the unmap list are
 	 * actually members of an ibmr (ibmr->mapping). They either

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 0a79640..67494ae 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c

@@ -24,6 +24,7 @@
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/flow_keys.h>
+#include <net/red.h>
 
 
 /*	Stochastic Fairness Queuing algorithm.
@@ -108,24 +109,30 @@
 	struct sfq_head dep; /* anchor in dep[] chains */
 	unsigned short	hash; /* hash value (index in ht[]) */
 	short		allot; /* credit for this slot */
+
+	unsigned int    backlog;
+	struct red_vars vars;
 };
 
 struct sfq_sched_data {
 /* frequently used fields */
 	int		limit;		/* limit of total number of packets in this qdisc */
 	unsigned int	divisor;	/* number of slots in hash table */
-	unsigned int	maxflows;	/* number of flows in flows array */
-	int		headdrop;
-	int		maxdepth;	/* limit of packets per flow */
+	u8		headdrop;
+	u8		maxdepth;	/* limit of packets per flow */
 
 	u32		perturbation;
-	struct tcf_proto *filter_list;
-	sfq_index	cur_depth;	/* depth of longest slot */
+	u8		cur_depth;	/* depth of longest slot */
+	u8		flags;
 	unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
-	struct sfq_slot *tail;		/* current slot in round */
+	struct tcf_proto *filter_list;
 	sfq_index	*ht;		/* Hash table ('divisor' slots) */
 	struct sfq_slot	*slots;		/* Flows table ('maxflows' entries) */
 
+	struct red_parms *red_parms;
+	struct tc_sfqred_stats stats;
+	struct sfq_slot *tail;		/* current slot in round */
+
 	struct sfq_head	dep[SFQ_MAX_DEPTH + 1];
 					/* Linked lists of slots, indexed by depth
 					 * dep[0] : list of unused flows
@@ -133,6 +140,7 @@
 					 * dep[X] : list of flows with X packets
 					 */
 
+	unsigned int	maxflows;	/* number of flows in flows array */
 	int		perturb_period;
 	unsigned int	quantum;	/* Allotment per round: MUST BE >= MTU */
 	struct timer_list perturb_timer;
@@ -321,6 +329,7 @@
 drop:
 		skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot);
 		len = qdisc_pkt_len(skb);
+		slot->backlog -= len;
 		sfq_dec(q, x);
 		kfree_skb(skb);
 		sch->q.qlen--;
@@ -341,6 +350,23 @@
 	return 0;
 }
 
+/* Is ECN parameter configured */
+static int sfq_prob_mark(const struct sfq_sched_data *q)
+{
+	return q->flags & TC_RED_ECN;
+}
+
+/* Should packets over max threshold just be marked */
+static int sfq_hard_mark(const struct sfq_sched_data *q)
+{
+	return (q->flags & (TC_RED_ECN | TC_RED_HARDDROP)) == TC_RED_ECN;
+}
+
+static int sfq_headdrop(const struct sfq_sched_data *q)
+{
+	return q->headdrop;
+}
+
 static int
 sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
@@ -349,6 +375,8 @@
 	sfq_index x, qlen;
 	struct sfq_slot *slot;
 	int uninitialized_var(ret);
+	struct sk_buff *head;
+	int delta;
 
 	hash = sfq_classify(skb, sch, &ret);
 	if (hash == 0) {
@@ -368,24 +396,75 @@
 		q->ht[hash] = x;
 		slot = &q->slots[x];
 		slot->hash = hash;
+		slot->backlog = 0; /* should already be 0 anyway... */
+		red_set_vars(&slot->vars);
+		goto enqueue;
+	}
+	if (q->red_parms) {
+		slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms,
+							&slot->vars,
+							slot->backlog);
+		switch (red_action(q->red_parms,
+				   &slot->vars,
+				   slot->vars.qavg)) {
+		case RED_DONT_MARK:
+			break;
+
+		case RED_PROB_MARK:
+			sch->qstats.overlimits++;
+			if (sfq_prob_mark(q)) {
+				/* We know we have at least one packet in queue */
+				if (sfq_headdrop(q) &&
+				    INET_ECN_set_ce(slot->skblist_next)) {
+					q->stats.prob_mark_head++;
+					break;
+				}
+				if (INET_ECN_set_ce(skb)) {
+					q->stats.prob_mark++;
+					break;
+				}
+			}
+			q->stats.prob_drop++;
+			goto congestion_drop;
+
+		case RED_HARD_MARK:
+			sch->qstats.overlimits++;
+			if (sfq_hard_mark(q)) {
+				/* We know we have at least one packet in queue */
+				if (sfq_headdrop(q) &&
+				    INET_ECN_set_ce(slot->skblist_next)) {
+					q->stats.forced_mark_head++;
+					break;
+				}
+				if (INET_ECN_set_ce(skb)) {
+					q->stats.forced_mark++;
+					break;
+				}
+			}
+			q->stats.forced_drop++;
+			goto congestion_drop;
+		}
 	}
 
 	if (slot->qlen >= q->maxdepth) {
-		struct sk_buff *head;
-
-		if (!q->headdrop)
+congestion_drop:
+		if (!sfq_headdrop(q))
 			return qdisc_drop(skb, sch);
 
+		/* We know we have at least one packet in queue */
 		head = slot_dequeue_head(slot);
-		sch->qstats.backlog -= qdisc_pkt_len(head);
+		delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb);
+		sch->qstats.backlog -= delta;
+		slot->backlog -= delta;
 		qdisc_drop(head, sch);
 
-		sch->qstats.backlog += qdisc_pkt_len(skb);
 		slot_queue_add(slot, skb);
 		return NET_XMIT_CN;
 	}
 
+enqueue:
 	sch->qstats.backlog += qdisc_pkt_len(skb);
+	slot->backlog += qdisc_pkt_len(skb);
 	slot_queue_add(slot, skb);
 	sfq_inc(q, x);
 	if (slot->qlen == 1) {		/* The flow is new */
@@ -396,6 +475,7 @@
 			slot->next = q->tail->next;
 			q->tail->next = x;
 		}
+		/* We could use a bigger initial quantum for new flows */
 		slot->allot = q->scaled_quantum;
 	}
 	if (++sch->q.qlen <= q->limit)
@@ -439,7 +519,7 @@
 	qdisc_bstats_update(sch, skb);
 	sch->q.qlen--;
 	sch->qstats.backlog -= qdisc_pkt_len(skb);
-
+	slot->backlog -= qdisc_pkt_len(skb);
 	/* Is the slot empty? */
 	if (slot->qlen == 0) {
 		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
@@ -490,6 +570,8 @@
 			sfq_dec(q, i);
 			__skb_queue_tail(&list, skb);
 		}
+		slot->backlog = 0;
+		red_set_vars(&slot->vars);
 		q->ht[slot->hash] = SFQ_EMPTY_SLOT;
 	}
 	q->tail = NULL;
@@ -514,6 +596,11 @@
 		if (slot->qlen >= q->maxdepth)
 			goto drop;
 		slot_queue_add(slot, skb);
+		if (q->red_parms)
+			slot->vars.qavg = red_calc_qavg(q->red_parms,
+							&slot->vars,
+							slot->backlog);
+		slot->backlog += qdisc_pkt_len(skb);
 		sfq_inc(q, x);
 		if (slot->qlen == 1) {		/* The flow is new */
 			if (q->tail == NULL) {	/* It is the first flow */
@@ -552,6 +639,7 @@
 	struct tc_sfq_qopt *ctl = nla_data(opt);
 	struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
 	unsigned int qlen;
+	struct red_parms *p = NULL;
 
 	if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
 		return -EINVAL;
@@ -560,7 +648,11 @@
 	if (ctl->divisor &&
 	    (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
 		return -EINVAL;
-
+	if (ctl_v1 && ctl_v1->qth_min) {
+		p = kmalloc(sizeof(*p), GFP_KERNEL);
+		if (!p)
+			return -ENOMEM;
+	}
 	sch_tree_lock(sch);
 	if (ctl->quantum) {
 		q->quantum = ctl->quantum;
@@ -576,6 +668,16 @@
 	if (ctl_v1) {
 		if (ctl_v1->depth)
 			q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
+		if (p) {
+			swap(q->red_parms, p);
+			red_set_parms(q->red_parms,
+				      ctl_v1->qth_min, ctl_v1->qth_max,
+				      ctl_v1->Wlog,
+				      ctl_v1->Plog, ctl_v1->Scell_log,
+				      NULL,
+				      ctl_v1->max_P);
+		}
+		q->flags = ctl_v1->flags;
 		q->headdrop = ctl_v1->headdrop;
 	}
 	if (ctl->limit) {
@@ -594,6 +696,7 @@
 		q->perturbation = net_random();
 	}
 	sch_tree_unlock(sch);
+	kfree(p);
 	return 0;
 }
 
@@ -625,6 +728,7 @@
 	del_timer_sync(&q->perturb_timer);
 	sfq_free(q->ht);
 	sfq_free(q->slots);
+	kfree(q->red_parms);
 }
 
 static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
@@ -683,6 +787,7 @@
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_sfq_qopt_v1 opt;
+	struct red_parms *p = q->red_parms;
 
 	memset(&opt, 0, sizeof(opt));
 	opt.v0.quantum	= q->quantum;
@@ -693,6 +798,17 @@
 	opt.depth	= q->maxdepth;
 	opt.headdrop	= q->headdrop;
 
+	if (p) {
+		opt.qth_min	= p->qth_min >> p->Wlog;
+		opt.qth_max	= p->qth_max >> p->Wlog;
+		opt.Wlog	= p->Wlog;
+		opt.Plog	= p->Plog;
+		opt.Scell_log	= p->Scell_log;
+		opt.max_P	= p->max_P;
+	}
+	memcpy(&opt.stats, &q->stats, sizeof(opt.stats));
+	opt.flags	= q->flags;
+
 	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 
 	return skb->len;
@@ -747,15 +863,13 @@
 	sfq_index idx = q->ht[cl - 1];
 	struct gnet_stats_queue qs = { 0 };
 	struct tc_sfq_xstats xstats = { 0 };
-	struct sk_buff *skb;
 
 	if (idx != SFQ_EMPTY_SLOT) {
 		const struct sfq_slot *slot = &q->slots[idx];
 
 		xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
 		qs.qlen = slot->qlen;
-		slot_queue_walk(slot, skb)
-			qs.backlog += qdisc_pkt_len(skb);
+		qs.backlog = slot->backlog;
 	}
 	if (gnet_stats_copy_queue(d, &qs) < 0)
 		return -1;

diff --git a/net/socket.c b/net/socket.c
index e56162c..28a96af 100644
--- a/net/socket.c
+++ b/net/socket.c

@@ -2492,7 +2492,7 @@
 				      lockdep_is_held(&net_family_lock)))
 		err = -EEXIST;
 	else {
-		RCU_INIT_POINTER(net_families[ops->family], ops);
+		rcu_assign_pointer(net_families[ops->family], ops);
 		err = 0;
 	}
 	spin_unlock(&net_family_lock);

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 28d72d2..affa631 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c

@@ -122,7 +122,7 @@
 	if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags))
 		return;
 	gss_get_ctx(ctx);
-	RCU_INIT_POINTER(gss_cred->gc_ctx, ctx);
+	rcu_assign_pointer(gss_cred->gc_ctx, ctx);
 	set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
 	smp_mb__before_clear_bit();
 	clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags);

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b3d3cf8..afeea32 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c

@@ -2250,6 +2250,7 @@
 };
 
 static int parse_station_flags(struct genl_info *info,
+			       enum nl80211_iftype iftype,
 			       struct station_parameters *params)
 {
 	struct nlattr *flags[NL80211_STA_FLAG_MAX + 1];
@@ -2283,8 +2284,33 @@
 			     nla, sta_flags_policy))
 		return -EINVAL;
 
-	params->sta_flags_mask = (1 << __NL80211_STA_FLAG_AFTER_LAST) - 1;
-	params->sta_flags_mask &= ~1;
+	/*
+	 * Only allow certain flags for interface types so that
+	 * other attributes are silently ignored. Remember that
+	 * this is backward compatibility code with old userspace
+	 * and shouldn't be hit in other cases anyway.
+	 */
+	switch (iftype) {
+	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_AP_VLAN:
+	case NL80211_IFTYPE_P2P_GO:
+		params->sta_flags_mask = BIT(NL80211_STA_FLAG_AUTHORIZED) |
+					 BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) |
+					 BIT(NL80211_STA_FLAG_WME) |
+					 BIT(NL80211_STA_FLAG_MFP);
+		break;
+	case NL80211_IFTYPE_P2P_CLIENT:
+	case NL80211_IFTYPE_STATION:
+		params->sta_flags_mask = BIT(NL80211_STA_FLAG_AUTHORIZED) |
+					 BIT(NL80211_STA_FLAG_TDLS_PEER);
+		break;
+	case NL80211_IFTYPE_MESH_POINT:
+		params->sta_flags_mask = BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+					 BIT(NL80211_STA_FLAG_MFP) |
+					 BIT(NL80211_STA_FLAG_AUTHORIZED);
+	default:
+		return -EINVAL;
+	}
 
 	for (flag = 1; flag <= NL80211_STA_FLAG_MAX; flag++)
 		if (flags[flag])
@@ -2585,7 +2611,7 @@
 	if (!rdev->ops->change_station)
 		return -EOPNOTSUPP;
 
-	if (parse_station_flags(info, &params))
+	if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params))
 		return -EINVAL;
 
 	if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION])
@@ -2731,7 +2757,7 @@
 	if (!rdev->ops->add_station)
 		return -EOPNOTSUPP;
 
-	if (parse_station_flags(info, &params))
+	if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params))
 		return -EINVAL;
 
 	switch (dev->ieee80211_ptr->iftype) {

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index e0d747a..637f11a 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c

@@ -2927,7 +2927,7 @@
 	if (nlsk == NULL)
 		return -ENOMEM;
 	net->xfrm.nlsk_stash = nlsk; /* Don't set to NULL */
-	RCU_INIT_POINTER(net->xfrm.nlsk, nlsk);
+	rcu_assign_pointer(net->xfrm.nlsk, nlsk);
 	return 0;
 }
 

diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 363ab46..296f17f 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c

@@ -774,6 +774,15 @@
 	return 1;
 }
 
+/* Looks like: mcp:S */
+static int do_mcp_entry(const char *filename, struct mcp_device_id *id,
+			char *alias)
+{
+	sprintf(alias, MCP_MODULE_PREFIX "%s", id->name);
+
+	return 1;
+}
+
 static const struct dmifield {
 	const char *prefix;
 	int field;
@@ -1095,6 +1104,10 @@
 		do_table(symval, sym->st_size,
 			 sizeof(struct spi_device_id), "spi",
 			 do_spi_entry, mod);
+	else if (sym_is(symname, "__mod_mcp_device_table"))
+		do_table(symval, sym->st_size,
+			 sizeof(struct mcp_device_id), "mcp",
+			 do_mcp_entry, mod);
 	else if (sym_is(symname, "__mod_dmi_device_table"))
 		do_table(symval, sym->st_size,
 			 sizeof(struct dmi_system_id), "dmi",

diff --git a/Documentation/virtual/lguest/.gitignore b/tools/lguest/.gitignore
similarity index 100%
rename from Documentation/virtual/lguest/.gitignore
rename to tools/lguest/.gitignore


diff --git a/Documentation/virtual/lguest/Makefile b/tools/lguest/Makefile
similarity index 100%
rename from Documentation/virtual/lguest/Makefile
rename to tools/lguest/Makefile


diff --git a/Documentation/virtual/lguest/extract b/tools/lguest/extract
similarity index 100%
rename from Documentation/virtual/lguest/extract
rename to tools/lguest/extract


diff --git a/Documentation/virtual/lguest/lguest.c b/tools/lguest/lguest.c
similarity index 99%
rename from Documentation/virtual/lguest/lguest.c
rename to tools/lguest/lguest.c
index c095d79..f759f4f 100644
--- a/Documentation/virtual/lguest/lguest.c
+++ b/tools/lguest/lguest.c

@@ -49,7 +49,7 @@
 #include <linux/virtio_rng.h>
 #include <linux/virtio_ring.h>
 #include <asm/bootparam.h>
-#include "../../../include/linux/lguest_launcher.h"
+#include "../../include/linux/lguest_launcher.h"
 /*L:110
  * We can ignore the 43 include files we need for this program, but I do want
  * to draw attention to the use of kernel-style types.

diff --git a/Documentation/virtual/lguest/lguest.txt b/tools/lguest/lguest.txt
similarity index 100%
rename from Documentation/virtual/lguest/lguest.txt
rename to tools/lguest/lguest.txt


diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
new file mode 100644
index 0000000..4ec8401
--- /dev/null
+++ b/tools/testing/selftests/Makefile

@@ -0,0 +1,11 @@
+TARGETS = breakpoints
+
+all:
+	for TARGET in $(TARGETS); do \
+		make -C $$TARGET; \
+	done;
+
+clean:
+	for TARGET in $(TARGETS); do \
+		make -C $$TARGET clean; \
+	done;

diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile
new file mode 100644
index 0000000..f362722
--- /dev/null
+++ b/tools/testing/selftests/breakpoints/Makefile

@@ -0,0 +1,20 @@
+# Taken from perf makefile
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/)
+ifeq ($(ARCH),i386)
+        ARCH := x86
+endif
+ifeq ($(ARCH),x86_64)
+	ARCH := x86
+endif
+
+
+all:
+ifeq ($(ARCH),x86)
+	gcc breakpoint_test.c -o run_test
+else
+	echo "Not an x86 target, can't build breakpoints selftests"
+endif
+
+clean:
+	rm -fr run_test

diff --git a/tools/testing/selftests/breakpoints/breakpoint_test.c b/tools/testing/selftests/breakpoints/breakpoint_test.c
new file mode 100644
index 0000000..a0743f3
--- /dev/null
+++ b/tools/testing/selftests/breakpoints/breakpoint_test.c

@@ -0,0 +1,394 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
+ *
+ * Licensed under the terms of the GNU GPL License version 2
+ *
+ * Selftests for breakpoints (and more generally the do_debug() path) in x86.
+ */
+
+
+#include <sys/ptrace.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <sys/user.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+
+/* Breakpoint access modes */
+enum {
+	BP_X = 1,
+	BP_RW = 2,
+	BP_W = 4,
+};
+
+static pid_t child_pid;
+
+/*
+ * Ensures the child and parent are always "talking" about
+ * the same test sequence. (ie: that we haven't forgotten
+ * to call check_trapped() somewhere).
+ */
+static int nr_tests;
+
+static void set_breakpoint_addr(void *addr, int n)
+{
+	int ret;
+
+	ret = ptrace(PTRACE_POKEUSER, child_pid,
+		     offsetof(struct user, u_debugreg[n]), addr);
+	if (ret) {
+		perror("Can't set breakpoint addr\n");
+		exit(-1);
+	}
+}
+
+static void toggle_breakpoint(int n, int type, int len,
+			      int local, int global, int set)
+{
+	int ret;
+
+	int xtype, xlen;
+	unsigned long vdr7, dr7;
+
+	switch (type) {
+	case BP_X:
+		xtype = 0;
+		break;
+	case BP_W:
+		xtype = 1;
+		break;
+	case BP_RW:
+		xtype = 3;
+		break;
+	}
+
+	switch (len) {
+	case 1:
+		xlen = 0;
+		break;
+	case 2:
+		xlen = 4;
+		break;
+	case 4:
+		xlen = 0xc;
+		break;
+	case 8:
+		xlen = 8;
+		break;
+	}
+
+	dr7 = ptrace(PTRACE_PEEKUSER, child_pid,
+		     offsetof(struct user, u_debugreg[7]), 0);
+
+	vdr7 = (xlen | xtype) << 16;
+	vdr7 <<= 4 * n;
+
+	if (local) {
+		vdr7 |= 1 << (2 * n);
+		vdr7 |= 1 << 8;
+	}
+	if (global) {
+		vdr7 |= 2 << (2 * n);
+		vdr7 |= 1 << 9;
+	}
+
+	if (set)
+		dr7 |= vdr7;
+	else
+		dr7 &= ~vdr7;
+
+	ret = ptrace(PTRACE_POKEUSER, child_pid,
+		     offsetof(struct user, u_debugreg[7]), dr7);
+	if (ret) {
+		perror("Can't set dr7");
+		exit(-1);
+	}
+}
+
+/* Dummy variables to test read/write accesses */
+static unsigned long long dummy_var[4];
+
+/* Dummy functions to test execution accesses */
+static void dummy_func(void) { }
+static void dummy_func1(void) { }
+static void dummy_func2(void) { }
+static void dummy_func3(void) { }
+
+static void (*dummy_funcs[])(void) = {
+	dummy_func,
+	dummy_func1,
+	dummy_func2,
+	dummy_func3,
+};
+
+static int trapped;
+
+static void check_trapped(void)
+{
+	/*
+	 * If we haven't trapped, wake up the parent
+	 * so that it notices the failure.
+	 */
+	if (!trapped)
+		kill(getpid(), SIGUSR1);
+	trapped = 0;
+
+	nr_tests++;
+}
+
+static void write_var(int len)
+{
+	char *pcval; short *psval; int *pival; long long *plval;
+	int i;
+
+	for (i = 0; i < 4; i++) {
+		switch (len) {
+		case 1:
+			pcval = (char *)&dummy_var[i];
+			*pcval = 0xff;
+			break;
+		case 2:
+			psval = (short *)&dummy_var[i];
+			*psval = 0xffff;
+			break;
+		case 4:
+			pival = (int *)&dummy_var[i];
+			*pival = 0xffffffff;
+			break;
+		case 8:
+			plval = (long long *)&dummy_var[i];
+			*plval = 0xffffffffffffffffLL;
+			break;
+		}
+		check_trapped();
+	}
+}
+
+static void read_var(int len)
+{
+	char cval; short sval; int ival; long long lval;
+	int i;
+
+	for (i = 0; i < 4; i++) {
+		switch (len) {
+		case 1:
+			cval = *(char *)&dummy_var[i];
+			break;
+		case 2:
+			sval = *(short *)&dummy_var[i];
+			break;
+		case 4:
+			ival = *(int *)&dummy_var[i];
+			break;
+		case 8:
+			lval = *(long long *)&dummy_var[i];
+			break;
+		}
+		check_trapped();
+	}
+}
+
+/*
+ * Do the r/w/x accesses to trigger the breakpoints. And run
+ * the usual traps.
+ */
+static void trigger_tests(void)
+{
+	int len, local, global, i;
+	char val;
+	int ret;
+
+	ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
+	if (ret) {
+		perror("Can't be traced?\n");
+		return;
+	}
+
+	/* Wake up father so that it sets up the first test */
+	kill(getpid(), SIGUSR1);
+
+	/* Test instruction breakpoints */
+	for (local = 0; local < 2; local++) {
+		for (global = 0; global < 2; global++) {
+			if (!local && !global)
+				continue;
+
+			for (i = 0; i < 4; i++) {
+				dummy_funcs[i]();
+				check_trapped();
+			}
+		}
+	}
+
+	/* Test write watchpoints */
+	for (len = 1; len <= sizeof(long); len <<= 1) {
+		for (local = 0; local < 2; local++) {
+			for (global = 0; global < 2; global++) {
+				if (!local && !global)
+					continue;
+				write_var(len);
+			}
+		}
+	}
+
+	/* Test read/write watchpoints (on read accesses) */
+	for (len = 1; len <= sizeof(long); len <<= 1) {
+		for (local = 0; local < 2; local++) {
+			for (global = 0; global < 2; global++) {
+				if (!local && !global)
+					continue;
+				read_var(len);
+			}
+		}
+	}
+
+	/* Icebp trap */
+	asm(".byte 0xf1\n");
+	check_trapped();
+
+	/* Int 3 trap */
+	asm("int $3\n");
+	check_trapped();
+
+	kill(getpid(), SIGUSR1);
+}
+
+static void check_success(const char *msg)
+{
+	const char *msg2;
+	int child_nr_tests;
+	int status;
+
+	/* Wait for the child to SIGTRAP */
+	wait(&status);
+
+	msg2 = "Failed";
+
+	if (WSTOPSIG(status) == SIGTRAP) {
+		child_nr_tests = ptrace(PTRACE_PEEKDATA, child_pid,
+					&nr_tests, 0);
+		if (child_nr_tests == nr_tests)
+			msg2 = "Ok";
+		if (ptrace(PTRACE_POKEDATA, child_pid, &trapped, 1)) {
+			perror("Can't poke\n");
+			exit(-1);
+		}
+	}
+
+	nr_tests++;
+
+	printf("%s [%s]\n", msg, msg2);
+}
+
+static void launch_instruction_breakpoints(char *buf, int local, int global)
+{
+	int i;
+
+	for (i = 0; i < 4; i++) {
+		set_breakpoint_addr(dummy_funcs[i], i);
+		toggle_breakpoint(i, BP_X, 1, local, global, 1);
+		ptrace(PTRACE_CONT, child_pid, NULL, 0);
+		sprintf(buf, "Test breakpoint %d with local: %d global: %d",
+			i, local, global);
+		check_success(buf);
+		toggle_breakpoint(i, BP_X, 1, local, global, 0);
+	}
+}
+
+static void launch_watchpoints(char *buf, int mode, int len,
+			       int local, int global)
+{
+	const char *mode_str;
+	int i;
+
+	if (mode == BP_W)
+		mode_str = "write";
+	else
+		mode_str = "read";
+
+	for (i = 0; i < 4; i++) {
+		set_breakpoint_addr(&dummy_var[i], i);
+		toggle_breakpoint(i, mode, len, local, global, 1);
+		ptrace(PTRACE_CONT, child_pid, NULL, 0);
+		sprintf(buf, "Test %s watchpoint %d with len: %d local: "
+			"%d global: %d", mode_str, i, len, local, global);
+		check_success(buf);
+		toggle_breakpoint(i, mode, len, local, global, 0);
+	}
+}
+
+/* Set the breakpoints and check the child successfully trigger them */
+static void launch_tests(void)
+{
+	char buf[1024];
+	int len, local, global, i;
+
+	/* Instruction breakpoints */
+	for (local = 0; local < 2; local++) {
+		for (global = 0; global < 2; global++) {
+			if (!local && !global)
+				continue;
+			launch_instruction_breakpoints(buf, local, global);
+		}
+	}
+
+	/* Write watchpoint */
+	for (len = 1; len <= sizeof(long); len <<= 1) {
+		for (local = 0; local < 2; local++) {
+			for (global = 0; global < 2; global++) {
+				if (!local && !global)
+					continue;
+				launch_watchpoints(buf, BP_W, len,
+						   local, global);
+			}
+		}
+	}
+
+	/* Read-Write watchpoint */
+	for (len = 1; len <= sizeof(long); len <<= 1) {
+		for (local = 0; local < 2; local++) {
+			for (global = 0; global < 2; global++) {
+				if (!local && !global)
+					continue;
+				launch_watchpoints(buf, BP_RW, len,
+						   local, global);
+			}
+		}
+	}
+
+	/* Icebp traps */
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	check_success("Test icebp");
+
+	/* Int 3 traps */
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	check_success("Test int 3 trap");
+
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+}
+
+int main(int argc, char **argv)
+{
+	pid_t pid;
+	int ret;
+
+	pid = fork();
+	if (!pid) {
+		trigger_tests();
+		return 0;
+	}
+
+	child_pid = pid;
+
+	wait(NULL);
+
+	launch_tests();
+
+	wait(NULL);
+
+	return 0;
+}

diff --git a/tools/testing/selftests/run_tests b/tools/testing/selftests/run_tests
new file mode 100644
index 0000000..320718a
--- /dev/null
+++ b/tools/testing/selftests/run_tests

@@ -0,0 +1,8 @@
+#!/bin/bash
+
+TARGETS=breakpoints
+
+for TARGET in $TARGETS
+do
+	$TARGET/run_test
+done

diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
index 669bcdd..b4fbc91 100644
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h

@@ -186,21 +186,12 @@
 #endif
 
 /* Interfaces exported by virtio_ring. */
-int virtqueue_add_buf_gfp(struct virtqueue *vq,
-			  struct scatterlist sg[],
-			  unsigned int out_num,
-			  unsigned int in_num,
-			  void *data,
-			  gfp_t gfp);
-
-static inline int virtqueue_add_buf(struct virtqueue *vq,
-				    struct scatterlist sg[],
-				    unsigned int out_num,
-				    unsigned int in_num,
-				    void *data)
-{
-	return virtqueue_add_buf_gfp(vq, sg, out_num, in_num, data, GFP_ATOMIC);
-}
+int virtqueue_add_buf(struct virtqueue *vq,
+		      struct scatterlist sg[],
+		      unsigned int out_num,
+		      unsigned int in_num,
+		      void *data,
+		      gfp_t gfp);
 
 void virtqueue_kick(struct virtqueue *vq);
 
@@ -214,6 +205,7 @@
 struct virtqueue *vring_new_virtqueue(unsigned int num,
 				      unsigned int vring_align,
 				      struct virtio_device *vdev,
+				      bool weak_barriers,
 				      void *pages,
 				      void (*notify)(struct virtqueue *vq),
 				      void (*callback)(struct virtqueue *vq),

diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index 74d3331..6bf95f9 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c

@@ -92,7 +92,8 @@
 	assert(r >= 0);
 	memset(info->ring, 0, vring_size(num, 4096));
 	vring_init(&info->vring, num, info->ring, 4096);
-	info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev, info->ring,
+	info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev,
+				       true, info->ring,
 				       vq_notify, vq_callback, "test");
 	assert(info->vq);
 	info->vq->priv = info;
@@ -160,7 +161,8 @@
 			if (started < bufs) {
 				sg_init_one(&sl, dev->buf, dev->buf_size);
 				r = virtqueue_add_buf(vq->vq, &sl, 1, 0,
-						      dev->buf + started);
+						      dev->buf + started,
+						      GFP_ATOMIC);
 				if (likely(r >= 0)) {
 					++started;
 					virtqueue_kick(vq->vq);
commit	f88609a0e447fb73851c0f640ed47257838ff99f	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Sat Jan 14 12:25:40 2012 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	Sat Jan 14 12:25:40 2012 -0800
tree	12d2d77f42f7721fec8d9f99780679f7ddd0875c
parent	2145199c4f0db7c517dd788abec301dc84b91bd0 [diff]
parent	046d0a37024a5b1e66ec9314e5e7feba42feb811 [diff]