Merge tag 'hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging

Pull hwmon cleanups from Guenter Roeck:
 "Minor cleanup in ina2xx and hwmon-vid drivers; no functional changes"

* tag 'hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging:
  hwmon: (ina2xx) Remove casting the return value which is a void pointer
  hwmon: (hwmon-vid) Add __maybe_unused attribute to dummy variable
diff --git a/Documentation/ABI/testing/sysfs-class-mtd b/Documentation/ABI/testing/sysfs-class-mtd
index 3105644..bfd119a 100644
--- a/Documentation/ABI/testing/sysfs-class-mtd
+++ b/Documentation/ABI/testing/sysfs-class-mtd
@@ -128,9 +128,8 @@
 Contact:	linux-mtd@lists.infradead.org
 Description:
 		Maximum number of bit errors that the device is capable of
-		correcting within each region covering an ecc step.  This will
-		always be a non-negative integer.  Note that some devices will
-		have multiple ecc steps within each writesize region.
+		correcting within each region covering an ECC step (see
+		ecc_step_size).  This will always be a non-negative integer.
 
 		In the case of devices lacking any ECC capability, it is 0.
 
@@ -173,3 +172,15 @@
 		This is generally applicable only to NAND flash devices with ECC
 		capability.  It is ignored on devices lacking ECC capability;
 		i.e., devices for which ecc_strength is zero.
+
+What:		/sys/class/mtd/mtdX/ecc_step_size
+Date:		May 2013
+KernelVersion:	3.10
+Contact:	linux-mtd@lists.infradead.org
+Description:
+		The size of a single region covered by ECC, known as the ECC
+		step.  Devices may have several equally sized ECC steps within
+		each writesize region.
+
+		It will always be a non-negative integer.  In the case of
+		devices lacking any ECC capability, it is 0.
diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl
index fe122d6..a248f42 100644
--- a/Documentation/DocBook/mtdnand.tmpl
+++ b/Documentation/DocBook/mtdnand.tmpl
@@ -1224,8 +1224,6 @@
 #define NAND_BBT_CREATE		0x00000200
 /* Search good / bad pattern through all pages of a block */
 #define NAND_BBT_SCANALLPAGES	0x00000400
-/* Scan block empty during good / bad block scan */
-#define NAND_BBT_SCANEMPTY	0x00000800
 /* Write bbt if neccecary */
 #define NAND_BBT_WRITE		0x00001000
 /* Read and write back block contents when writing bbt */
diff --git a/Documentation/clk.txt b/Documentation/clk.txt
index 6f68ba0..3aeb5c4 100644
--- a/Documentation/clk.txt
+++ b/Documentation/clk.txt
@@ -70,6 +70,10 @@
 						unsigned long parent_rate);
 		long		(*round_rate)(struct clk_hw *hw, unsigned long,
 						unsigned long *);
+		long		(*determine_rate)(struct clk_hw *hw,
+						unsigned long rate,
+						unsigned long *best_parent_rate,
+						struct clk **best_parent_clk);
 		int		(*set_parent)(struct clk_hw *hw, u8 index);
 		u8		(*get_parent)(struct clk_hw *hw);
 		int		(*set_rate)(struct clk_hw *hw, unsigned long);
@@ -179,26 +183,28 @@
 callback is invalid or otherwise unnecessary.  Empty cells are either
 optional or must be evaluated on a case-by-case basis.
 
-                           clock hardware characteristics
-	     -----------------------------------------------------------
-             | gate | change rate | single parent | multiplexer | root |
-             |------|-------------|---------------|-------------|------|
-.prepare     |      |             |               |             |      |
-.unprepare   |      |             |               |             |      |
-             |      |             |               |             |      |
-.enable      | y    |             |               |             |      |
-.disable     | y    |             |               |             |      |
-.is_enabled  | y    |             |               |             |      |
-             |      |             |               |             |      |
-.recalc_rate |      | y           |               |             |      |
-.round_rate  |      | y           |               |             |      |
-.set_rate    |      | y           |               |             |      |
-             |      |             |               |             |      |
-.set_parent  |      |             | n             | y           | n    |
-.get_parent  |      |             | n             | y           | n    |
-             |      |             |               |             |      |
-.init        |      |             |               |             |      |
-	     -----------------------------------------------------------
+                              clock hardware characteristics
+                -----------------------------------------------------------
+                | gate | change rate | single parent | multiplexer | root |
+                |------|-------------|---------------|-------------|------|
+.prepare        |      |             |               |             |      |
+.unprepare      |      |             |               |             |      |
+                |      |             |               |             |      |
+.enable         | y    |             |               |             |      |
+.disable        | y    |             |               |             |      |
+.is_enabled     | y    |             |               |             |      |
+                |      |             |               |             |      |
+.recalc_rate    |      | y           |               |             |      |
+.round_rate     |      | y [1]       |               |             |      |
+.determine_rate |      | y [1]       |               |             |      |
+.set_rate       |      | y           |               |             |      |
+                |      |             |               |             |      |
+.set_parent     |      |             | n             | y           | n    |
+.get_parent     |      |             | n             | y           | n    |
+                |      |             |               |             |      |
+.init           |      |             |               |             |      |
+                -----------------------------------------------------------
+[1] either one of round_rate or determine_rate is required.
 
 Finally, register your clock at run-time with a hardware-specific
 registration function.  This function simply populates struct clk_foo's
diff --git a/Documentation/device-mapper/cache.txt b/Documentation/device-mapper/cache.txt
index e8cdf72..33d45ee 100644
--- a/Documentation/device-mapper/cache.txt
+++ b/Documentation/device-mapper/cache.txt
@@ -50,14 +50,16 @@
    which are dirty, and extra hints for use by the policy object.
    This information could be put on the cache device, but having it
    separate allows the volume manager to configure it differently,
-   e.g. as a mirror for extra robustness.
+   e.g. as a mirror for extra robustness.  This metadata device may only
+   be used by a single cache device.
 
 Fixed block size
 ----------------
 
 The origin is divided up into blocks of a fixed size.  This block size
 is configurable when you first create the cache.  Typically we've been
-using block sizes of 256k - 1024k.
+using block sizes of 256KB - 1024KB.  The block size must be between 64
+(32KB) and 2097152 (1GB) and a multiple of 64 (32KB).
 
 Having a fixed block size simplifies the target a lot.  But it is
 something of a compromise.  For instance, a small part of a block may be
diff --git a/Documentation/device-mapper/statistics.txt b/Documentation/device-mapper/statistics.txt
new file mode 100644
index 0000000..2a1673a
--- /dev/null
+++ b/Documentation/device-mapper/statistics.txt
@@ -0,0 +1,186 @@
+DM statistics
+=============
+
+Device Mapper supports the collection of I/O statistics on user-defined
+regions of a DM device.	 If no regions are defined no statistics are
+collected so there isn't any performance impact.  Only bio-based DM
+devices are currently supported.
+
+Each user-defined region specifies a starting sector, length and step.
+Individual statistics will be collected for each step-sized area within
+the range specified.
+
+The I/O statistics counters for each step-sized area of a region are
+in the same format as /sys/block/*/stat or /proc/diskstats (see:
+Documentation/iostats.txt).  But two extra counters (12 and 13) are
+provided: total time spent reading and writing in milliseconds.	 All
+these counters may be accessed by sending the @stats_print message to
+the appropriate DM device via dmsetup.
+
+Each region has a corresponding unique identifier, which we call a
+region_id, that is assigned when the region is created.	 The region_id
+must be supplied when querying statistics about the region, deleting the
+region, etc.  Unique region_ids enable multiple userspace programs to
+request and process statistics for the same DM device without stepping
+on each other's data.
+
+The creation of DM statistics will allocate memory via kmalloc or
+fallback to using vmalloc space.  At most, 1/4 of the overall system
+memory may be allocated by DM statistics.  The admin can see how much
+memory is used by reading
+/sys/module/dm_mod/parameters/stats_current_allocated_bytes
+
+Messages
+========
+
+    @stats_create <range> <step> [<program_id> [<aux_data>]]
+
+	Create a new region and return the region_id.
+
+	<range>
+	  "-" - whole device
+	  "<start_sector>+<length>" - a range of <length> 512-byte sectors
+				      starting with <start_sector>.
+
+	<step>
+	  "<area_size>" - the range is subdivided into areas each containing
+			  <area_size> sectors.
+	  "/<number_of_areas>" - the range is subdivided into the specified
+				 number of areas.
+
+	<program_id>
+	  An optional parameter.  A name that uniquely identifies
+	  the userspace owner of the range.  This groups ranges together
+	  so that userspace programs can identify the ranges they
+	  created and ignore those created by others.
+	  The kernel returns this string back in the output of
+	  @stats_list message, but it doesn't use it for anything else.
+
+	<aux_data>
+	  An optional parameter.  A word that provides auxiliary data
+	  that is useful to the client program that created the range.
+	  The kernel returns this string back in the output of
+	  @stats_list message, but it doesn't use this value for anything.
+
+    @stats_delete <region_id>
+
+	Delete the region with the specified id.
+
+	<region_id>
+	  region_id returned from @stats_create
+
+    @stats_clear <region_id>
+
+	Clear all the counters except the in-flight i/o counters.
+
+	<region_id>
+	  region_id returned from @stats_create
+
+    @stats_list [<program_id>]
+
+	List all regions registered with @stats_create.
+
+	<program_id>
+	  An optional parameter.
+	  If this parameter is specified, only matching regions
+	  are returned.
+	  If it is not specified, all regions are returned.
+
+	Output format:
+	  <region_id>: <start_sector>+<length> <step> <program_id> <aux_data>
+
+    @stats_print <region_id> [<starting_line> <number_of_lines>]
+
+	Print counters for each step-sized area of a region.
+
+	<region_id>
+	  region_id returned from @stats_create
+
+	<starting_line>
+	  The index of the starting line in the output.
+	  If omitted, all lines are returned.
+
+	<number_of_lines>
+	  The number of lines to include in the output.
+	  If omitted, all lines are returned.
+
+	Output format for each step-sized area of a region:
+
+	  <start_sector>+<length> counters
+
+	  The first 11 counters have the same meaning as
+	  /sys/block/*/stat or /proc/diskstats.
+
+	  Please refer to Documentation/iostats.txt for details.
+
+	  1. the number of reads completed
+	  2. the number of reads merged
+	  3. the number of sectors read
+	  4. the number of milliseconds spent reading
+	  5. the number of writes completed
+	  6. the number of writes merged
+	  7. the number of sectors written
+	  8. the number of milliseconds spent writing
+	  9. the number of I/Os currently in progress
+	  10. the number of milliseconds spent doing I/Os
+	  11. the weighted number of milliseconds spent doing I/Os
+
+	  Additional counters:
+	  12. the total time spent reading in milliseconds
+	  13. the total time spent writing in milliseconds
+
+    @stats_print_clear <region_id> [<starting_line> <number_of_lines>]
+
+	Atomically print and then clear all the counters except the
+	in-flight i/o counters.	 Useful when the client consuming the
+	statistics does not want to lose any statistics (those updated
+	between printing and clearing).
+
+	<region_id>
+	  region_id returned from @stats_create
+
+	<starting_line>
+	  The index of the starting line in the output.
+	  If omitted, all lines are printed and then cleared.
+
+	<number_of_lines>
+	  The number of lines to process.
+	  If omitted, all lines are printed and then cleared.
+
+    @stats_set_aux <region_id> <aux_data>
+
+	Store auxiliary data aux_data for the specified region.
+
+	<region_id>
+	  region_id returned from @stats_create
+
+	<aux_data>
+	  The string that identifies data which is useful to the client
+	  program that created the range.  The kernel returns this
+	  string back in the output of @stats_list message, but it
+	  doesn't use this value for anything.
+
+Examples
+========
+
+Subdivide the DM device 'vol' into 100 pieces and start collecting
+statistics on them:
+
+  dmsetup message vol 0 @stats_create - /100
+
+Set the auxillary data string to "foo bar baz" (the escape for each
+space must also be escaped, otherwise the shell will consume them):
+
+  dmsetup message vol 0 @stats_set_aux 0 foo\\ bar\\ baz
+
+List the statistics:
+
+  dmsetup message vol 0 @stats_list
+
+Print the statistics:
+
+  dmsetup message vol 0 @stats_print 0
+
+Delete the statistics:
+
+  dmsetup message vol 0 @stats_delete 0
diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt
index 30b8b83..50c44cf 100644
--- a/Documentation/device-mapper/thin-provisioning.txt
+++ b/Documentation/device-mapper/thin-provisioning.txt
@@ -99,13 +99,14 @@
 		 $data_block_size $low_water_mark"
 
 $data_block_size gives the smallest unit of disk space that can be
-allocated at a time expressed in units of 512-byte sectors.  People
-primarily interested in thin provisioning may want to use a value such
-as 1024 (512KB).  People doing lots of snapshotting may want a smaller value
-such as 128 (64KB).  If you are not zeroing newly-allocated data,
-a larger $data_block_size in the region of 256000 (128MB) is suggested.
-$data_block_size must be the same for the lifetime of the
-metadata device.
+allocated at a time expressed in units of 512-byte sectors.
+$data_block_size must be between 128 (64KB) and 2097152 (1GB) and a
+multiple of 128 (64KB).  $data_block_size cannot be changed after the
+thin-pool is created.  People primarily interested in thin provisioning
+may want to use a value such as 1024 (512KB).  People doing lots of
+snapshotting may want a smaller value such as 128 (64KB).  If you are
+not zeroing newly-allocated data, a larger $data_block_size in the
+region of 256000 (128MB) is suggested.
 
 $low_water_mark is expressed in blocks of size $data_block_size.  If
 free space on the data device drops below this level then a dm event
diff --git a/Documentation/devicetree/bindings/clock/exynos4-clock.txt b/Documentation/devicetree/bindings/clock/exynos4-clock.txt
index 14d5c2a..c6bf8a6 100644
--- a/Documentation/devicetree/bindings/clock/exynos4-clock.txt
+++ b/Documentation/devicetree/bindings/clock/exynos4-clock.txt
@@ -236,6 +236,7 @@
   spi0_isp_sclk       380     Exynos4x12
   spi1_isp_sclk       381     Exynos4x12
   uart_isp_sclk       382     Exynos4x12
+  tmu_apbif           383
 
 		[Mux Clocks]
 
diff --git a/Documentation/devicetree/bindings/clock/exynos5250-clock.txt b/Documentation/devicetree/bindings/clock/exynos5250-clock.txt
index 781a627..24765c1 100644
--- a/Documentation/devicetree/bindings/clock/exynos5250-clock.txt
+++ b/Documentation/devicetree/bindings/clock/exynos5250-clock.txt
@@ -59,6 +59,9 @@
   sclk_spi0		154
   sclk_spi1		155
   sclk_spi2		156
+  div_i2s1		157
+  div_i2s2		158
+  sclk_hdmiphy		159
 
 
    [Peripheral Clock Gates]
@@ -154,7 +157,16 @@
   dsim0			341
   dp			342
   mixer			343
-  hdmi			345
+  hdmi			344
+  g2d			345
+
+
+   [Clock Muxes]
+
+  Clock			ID
+  ----------------------------
+  mout_hdmi		1024
+
 
 Example 1: An example of a clock controller node is listed below.
 
diff --git a/Documentation/devicetree/bindings/clock/exynos5420-clock.txt b/Documentation/devicetree/bindings/clock/exynos5420-clock.txt
index 9bcc4b1..32aa34e 100644
--- a/Documentation/devicetree/bindings/clock/exynos5420-clock.txt
+++ b/Documentation/devicetree/bindings/clock/exynos5420-clock.txt
@@ -59,6 +59,7 @@
   sclk_pwm		155
   sclk_gscl_wa		156
   sclk_gscl_wb		157
+  sclk_hdmiphy		158
 
    [Peripheral Clock Gates]
 
@@ -179,6 +180,17 @@
   fimc_lite3		495
   aclk_g3d		500
   g3d			501
+  smmu_mixer		502
+
+  Mux			ID
+  ----------------------------
+
+  mout_hdmi		640
+
+  Divider		ID
+  ----------------------------
+
+  dout_pixel		768
 
 Example 1: An example of a clock controller node is listed below.
 
diff --git a/Documentation/devicetree/bindings/clock/samsung,s3c64xx-clock.txt b/Documentation/devicetree/bindings/clock/samsung,s3c64xx-clock.txt
new file mode 100644
index 0000000..fa171dc
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,s3c64xx-clock.txt
@@ -0,0 +1,77 @@
+* Samsung S3C64xx Clock Controller
+
+The S3C64xx clock controller generates and supplies clock to various controllers
+within the SoC. The clock binding described here is applicable to all SoCs in
+the S3C64xx family.
+
+Required Properties:
+
+- compatible: should be one of the following.
+  - "samsung,s3c6400-clock" - controller compatible with S3C6400 SoC.
+  - "samsung,s3c6410-clock" - controller compatible with S3C6410 SoC.
+
+- reg: physical base address of the controller and length of memory mapped
+  region.
+
+- #clock-cells: should be 1.
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume. Some of the clocks are available only
+on a particular S3C64xx SoC and this is specified where applicable.
+
+All available clocks are defined as preprocessor macros in
+dt-bindings/clock/samsung,s3c64xx-clock.h header and can be used in device
+tree sources.
+
+External clocks:
+
+There are several clocks that are generated outside the SoC. It is expected
+that they are defined using standard clock bindings with following
+clock-output-names:
+ - "fin_pll" - PLL input clock (xtal/extclk) - required,
+ - "xusbxti" - USB xtal - required,
+ - "iiscdclk0" - I2S0 codec clock - optional,
+ - "iiscdclk1" - I2S1 codec clock - optional,
+ - "iiscdclk2" - I2S2 codec clock - optional,
+ - "pcmcdclk0" - PCM0 codec clock - optional,
+ - "pcmcdclk1" - PCM1 codec clock - optional, only S3C6410.
+
+Example: Clock controller node:
+
+	clock: clock-controller@7e00f000 {
+		compatible = "samsung,s3c6410-clock";
+		reg = <0x7e00f000 0x1000>;
+		#clock-cells = <1>;
+	};
+
+Example: Required external clocks:
+
+	fin_pll: clock-fin-pll {
+		compatible = "fixed-clock";
+		clock-output-names = "fin_pll";
+		clock-frequency = <12000000>;
+		#clock-cells = <0>;
+	};
+
+	xusbxti: clock-xusbxti {
+		compatible = "fixed-clock";
+		clock-output-names = "xusbxti";
+		clock-frequency = <48000000>;
+		#clock-cells = <0>;
+	};
+
+Example: UART controller node that consumes the clock generated by the clock
+  controller (refer to the standard clock bindings for information about
+  "clocks" and "clock-names" properties):
+
+		uart0: serial@7f005000 {
+			compatible = "samsung,s3c6400-uart";
+			reg = <0x7f005000 0x100>;
+			interrupt-parent = <&vic1>;
+			interrupts = <5>;
+			clock-names = "uart", "clk_uart_baud2",
+					"clk_uart_baud3";
+			clocks = <&clock PCLK_UART0>, <&clocks PCLK_UART0>,
+					<&clock SCLK_UART>;
+			status = "disabled";
+		};
diff --git a/Documentation/devicetree/bindings/clock/sunxi.txt b/Documentation/devicetree/bindings/clock/sunxi.txt
index d495521..00a5c264 100644
--- a/Documentation/devicetree/bindings/clock/sunxi.txt
+++ b/Documentation/devicetree/bindings/clock/sunxi.txt
@@ -8,19 +8,31 @@
 - compatible : shall be one of the following:
 	"allwinner,sun4i-osc-clk" - for a gatable oscillator
 	"allwinner,sun4i-pll1-clk" - for the main PLL clock
+	"allwinner,sun6i-a31-pll1-clk" - for the main PLL clock on A31
 	"allwinner,sun4i-cpu-clk" - for the CPU multiplexer clock
 	"allwinner,sun4i-axi-clk" - for the AXI clock
 	"allwinner,sun4i-axi-gates-clk" - for the AXI gates
 	"allwinner,sun4i-ahb-clk" - for the AHB clock
 	"allwinner,sun4i-ahb-gates-clk" - for the AHB gates on A10
 	"allwinner,sun5i-a13-ahb-gates-clk" - for the AHB gates on A13
+	"allwinner,sun5i-a10s-ahb-gates-clk" - for the AHB gates on A10s
+	"allwinner,sun7i-a20-ahb-gates-clk" - for the AHB gates on A20
+	"allwinner,sun6i-a31-ahb1-mux-clk" - for the AHB1 multiplexer on A31
+	"allwinner,sun6i-a31-ahb1-gates-clk" - for the AHB1 gates on A31
 	"allwinner,sun4i-apb0-clk" - for the APB0 clock
 	"allwinner,sun4i-apb0-gates-clk" - for the APB0 gates on A10
 	"allwinner,sun5i-a13-apb0-gates-clk" - for the APB0 gates on A13
+	"allwinner,sun5i-a10s-apb0-gates-clk" - for the APB0 gates on A10s
+	"allwinner,sun7i-a20-apb0-gates-clk" - for the APB0 gates on A20
 	"allwinner,sun4i-apb1-clk" - for the APB1 clock
 	"allwinner,sun4i-apb1-mux-clk" - for the APB1 clock muxing
 	"allwinner,sun4i-apb1-gates-clk" - for the APB1 gates on A10
 	"allwinner,sun5i-a13-apb1-gates-clk" - for the APB1 gates on A13
+	"allwinner,sun5i-a10s-apb1-gates-clk" - for the APB1 gates on A10s
+	"allwinner,sun6i-a31-apb1-gates-clk" - for the APB1 gates on A31
+	"allwinner,sun7i-a20-apb1-gates-clk" - for the APB1 gates on A20
+	"allwinner,sun6i-a31-apb2-div-clk" - for the APB2 gates on A31
+	"allwinner,sun6i-a31-apb2-gates-clk" - for the APB2 gates on A31
 
 Required properties for all clocks:
 - reg : shall be the control register address for the clock.
diff --git a/Documentation/devicetree/bindings/clock/sunxi/sun5i-a10s-gates.txt b/Documentation/devicetree/bindings/clock/sunxi/sun5i-a10s-gates.txt
new file mode 100644
index 0000000..d24279f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/sunxi/sun5i-a10s-gates.txt
@@ -0,0 +1,75 @@
+Gate clock outputs
+------------------
+
+  * AXI gates ("allwinner,sun4i-axi-gates-clk")
+
+    DRAM					0
+
+  * AHB gates ("allwinner,sun5i-a10s-ahb-gates-clk")
+
+    USB0					0
+    EHCI0					1
+    OHCI0					2
+
+    SS						5
+    DMA						6
+    BIST					7
+    MMC0					8
+    MMC1					9
+    MMC2					10
+
+    NAND					13
+    SDRAM					14
+
+    EMAC					17
+    TS						18
+
+    SPI0					20
+    SPI1					21
+    SPI2					22
+
+    GPS						26
+
+    HSTIMER					28
+
+    VE						32
+
+    TVE						34
+
+    LCD						36
+
+    CSI						40
+
+    HDMI					43
+    DE_BE					44
+
+    DE_FE					46
+
+    IEP						51
+    MALI400					52
+
+  * APB0 gates ("allwinner,sun5i-a10s-apb0-gates-clk")
+
+    CODEC					0
+
+    IIS						3
+
+    PIO						5
+    IR						6
+
+    KEYPAD					10
+
+  * APB1 gates ("allwinner,sun5i-a10s-apb1-gates-clk")
+
+    I2C0					0
+    I2C1					1
+    I2C2					2
+
+    UART0					16
+    UART1					17
+    UART2					18
+    UART3					19
+
+Notation:
+ [*]:  The datasheet didn't mention these, but they are present on AW code
+ [**]: The datasheet had this marked as "NC" but they are used on AW code
diff --git a/Documentation/devicetree/bindings/clock/sunxi/sun6i-a31-gates.txt b/Documentation/devicetree/bindings/clock/sunxi/sun6i-a31-gates.txt
new file mode 100644
index 0000000..fe44932
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/sunxi/sun6i-a31-gates.txt
@@ -0,0 +1,83 @@
+Gate clock outputs
+------------------
+
+  * AHB1 gates ("allwinner,sun6i-a31-ahb1-gates-clk")
+
+    MIPI DSI					1
+
+    SS						5
+    DMA						6
+
+    MMC0					8
+    MMC1					9
+    MMC2					10
+    MMC3					11
+
+    NAND1					12
+    NAND0					13
+    SDRAM					14
+
+    GMAC					17
+    TS						18
+    HSTIMER					19
+    SPI0					20
+    SPI1					21
+    SPI2					22
+    SPI3					23
+    USB_OTG					24
+
+    EHCI0					26
+    EHCI1					27
+
+    OHCI0					29
+    OHCI1					30
+    OHCI2					31
+    VE						32
+
+    LCD0					36
+    LCD1					37
+
+    CSI						40
+
+    HDMI					43
+    DE_BE0					44
+    DE_BE1					45
+    DE_FE1					46
+    DE_FE1					47
+
+    MP						50
+
+    GPU						52
+
+    DEU0					55
+    DEU1					56
+    DRC0					57
+    DRC1					58
+
+  * APB1 gates ("allwinner,sun6i-a31-apb1-gates-clk")
+
+    CODEC					0
+
+    DIGITAL MIC					4
+    PIO						5
+
+    DAUDIO0					12
+    DAUDIO1					13
+
+  * APB2 gates ("allwinner,sun6i-a31-apb2-gates-clk")
+
+    I2C0					0
+    I2C1					1
+    I2C2					2
+    I2C3					3
+
+    UART0					16
+    UART1					17
+    UART2					18
+    UART3					19
+    UART4					20
+    UART5					21
+
+Notation:
+ [*]:  The datasheet didn't mention these, but they are present on AW code
+ [**]: The datasheet had this marked as "NC" but they are used on AW code
diff --git a/Documentation/devicetree/bindings/clock/sunxi/sun7i-a20-gates.txt b/Documentation/devicetree/bindings/clock/sunxi/sun7i-a20-gates.txt
new file mode 100644
index 0000000..357f4fd
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/sunxi/sun7i-a20-gates.txt
@@ -0,0 +1,98 @@
+Gate clock outputs
+------------------
+
+  * AXI gates ("allwinner,sun4i-axi-gates-clk")
+
+    DRAM					0
+
+  * AHB gates ("allwinner,sun7i-a20-ahb-gates-clk")
+
+    USB0					0
+    EHCI0					1
+    OHCI0					2
+    EHCI1					3
+    OHCI1					4
+    SS						5
+    DMA						6
+    BIST					7
+    MMC0					8
+    MMC1					9
+    MMC2					10
+    MMC3					11
+    MS						12
+    NAND					13
+    SDRAM					14
+
+    ACE						16
+    EMAC					17
+    TS						18
+
+    SPI0					20
+    SPI1					21
+    SPI2					22
+    SPI3					23
+
+    SATA					25
+
+    HSTIMER					28
+
+    VE						32
+    TVD						33
+    TVE0					34
+    TVE1					35
+    LCD0					36
+    LCD1					37
+
+    CSI0					40
+    CSI1					41
+
+    HDMI1					42
+    HDMI0					43
+    DE_BE0					44
+    DE_BE1					45
+    DE_FE1					46
+    DE_FE1					47
+
+    GMAC					49
+    MP						50
+
+    MALI400					52
+
+  * APB0 gates ("allwinner,sun7i-a20-apb0-gates-clk")
+
+    CODEC					0
+    SPDIF					1
+    AC97					2
+    IIS0					3
+    IIS1					4
+    PIO						5
+    IR0						6
+    IR1						7
+    IIS2					8
+
+    KEYPAD					10
+
+  * APB1 gates ("allwinner,sun7i-a20-apb1-gates-clk")
+
+    I2C0					0
+    I2C1					1
+    I2C2					2
+    I2C3					3
+    CAN						4
+    SCR						5
+    PS20					6
+    PS21					7
+
+    I2C4					15
+    UART0					16
+    UART1					17
+    UART2					18
+    UART3					19
+    UART4					20
+    UART5					21
+    UART6					22
+    UART7					23
+
+Notation:
+ [*]:  The datasheet didn't mention these, but they are present on AW code
+ [**]: The datasheet had this marked as "NC" but they are used on AW code
diff --git a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
index 68cee4f5..4fa814d 100644
--- a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
+++ b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
@@ -1,7 +1,12 @@
 * Freescale Smart Direct Memory Access (SDMA) Controller for i.MX
 
 Required properties:
-- compatible : Should be "fsl,<chip>-sdma"
+- compatible : Should be "fsl,imx31-sdma", "fsl,imx31-to1-sdma",
+  "fsl,imx31-to2-sdma", "fsl,imx35-sdma", "fsl,imx35-to1-sdma",
+  "fsl,imx35-to2-sdma", "fsl,imx51-sdma", "fsl,imx53-sdma" or
+  "fsl,imx6q-sdma". The -to variants should be preferred since they
+  allow to determnine the correct ROM script addresses needed for
+  the driver to work without additional firmware.
 - reg : Should contain SDMA registers location and length
 - interrupts : Should contain SDMA interrupt
 - #dma-cells : Must be <3>.
diff --git a/Documentation/devicetree/bindings/dma/k3dma.txt b/Documentation/devicetree/bindings/dma/k3dma.txt
new file mode 100644
index 0000000..23f8d71
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/k3dma.txt
@@ -0,0 +1,46 @@
+* Hisilicon K3 DMA controller
+
+See dma.txt first
+
+Required properties:
+- compatible: Should be "hisilicon,k3-dma-1.0"
+- reg: Should contain DMA registers location and length.
+- interrupts: Should contain one interrupt shared by all channel
+- #dma-cells: see dma.txt, should be 1, para number
+- dma-channels: physical channels supported
+- dma-requests: virtual channels supported, each virtual channel
+		have specific request line
+- clocks: clock required
+
+Example:
+
+Controller:
+		dma0: dma@fcd02000 {
+			compatible = "hisilicon,k3-dma-1.0";
+			reg = <0xfcd02000 0x1000>;
+			#dma-cells = <1>;
+			dma-channels = <16>;
+			dma-requests = <27>;
+			interrupts = <0 12 4>;
+			clocks = <&pclk>;
+			status = "disable";
+		};
+
+Client:
+Use specific request line passing from dmax
+For example, i2c0 read channel request line is 18, while write channel use 19
+
+		i2c0: i2c@fcb08000 {
+			compatible = "snps,designware-i2c";
+			dmas =	<&dma0 18          /* read channel */
+				 &dma0 19>;        /* write channel */
+			dma-names = "rx", "tx";
+		};
+
+		i2c1: i2c@fcb09000 {
+			compatible = "snps,designware-i2c";
+			dmas =	<&dma0 20          /* read channel */
+				 &dma0 21>;        /* write channel */
+			dma-names = "rx", "tx";
+		};
+
diff --git a/Documentation/devicetree/bindings/dma/shdma.txt b/Documentation/devicetree/bindings/dma/shdma.txt
index c15994a..2a3f3b8 100644
--- a/Documentation/devicetree/bindings/dma/shdma.txt
+++ b/Documentation/devicetree/bindings/dma/shdma.txt
@@ -22,42 +22,51 @@
 * DMA controller
 
 Required properties:
-- compatible:	should be "renesas,shdma"
+- compatible:	should be of the form "renesas,shdma-<soc>", where <soc> should
+		be replaced with the desired SoC model, e.g.
+		"renesas,shdma-r8a73a4" for the system DMAC on r8a73a4 SoC
 
 Example:
-	dmac: dma-mux0 {
+	dmac: dma-multiplexer@0 {
 		compatible = "renesas,shdma-mux";
 		#dma-cells = <1>;
-		dma-channels = <6>;
+		dma-channels = <20>;
 		dma-requests = <256>;
-		reg = <0 0>;	/* Needed for AUXDATA */
-		#address-cells = <1>;
-		#size-cells = <1>;
+		#address-cells = <2>;
+		#size-cells = <2>;
 		ranges;
 
-		dma0: shdma@fe008020 {
-			compatible = "renesas,shdma";
-			reg = <0xfe008020 0x270>,
-				<0xfe009000 0xc>;
+		dma0: dma-controller@e6700020 {
+			compatible = "renesas,shdma-r8a73a4";
+			reg = <0 0xe6700020 0 0x89e0>;
 			interrupt-parent = <&gic>;
-			interrupts = <0 34 4
-					0 28 4
-					0 29 4
-					0 30 4
-					0 31 4
-					0 32 4
-					0 33 4>;
+			interrupts = <0 220 4
+					0 200 4
+					0 201 4
+					0 202 4
+					0 203 4
+					0 204 4
+					0 205 4
+					0 206 4
+					0 207 4
+					0 208 4
+					0 209 4
+					0 210 4
+					0 211 4
+					0 212 4
+					0 213 4
+					0 214 4
+					0 215 4
+					0 216 4
+					0 217 4
+					0 218 4
+					0 219 4>;
 			interrupt-names = "error",
 					"ch0", "ch1", "ch2", "ch3",
-					"ch4", "ch5";
-		};
-
-		dma1: shdma@fe018020 {
-			...
-		};
-
-		dma2: shdma@fe028020 {
-			...
+					"ch4", "ch5", "ch6", "ch7",
+					"ch8", "ch9", "ch10", "ch11",
+					"ch12", "ch13", "ch14", "ch15",
+					"ch16", "ch17", "ch18", "ch19";
 		};
 	};
 
diff --git a/Documentation/devicetree/bindings/gpu/samsung-g2d.txt b/Documentation/devicetree/bindings/gpu/samsung-g2d.txt
index 3f454ff..c4f358d 100644
--- a/Documentation/devicetree/bindings/gpu/samsung-g2d.txt
+++ b/Documentation/devicetree/bindings/gpu/samsung-g2d.txt
@@ -11,8 +11,11 @@
 
   - interrupts : G2D interrupt number to the CPU.
   - clocks : from common clock binding: handle to G2D clocks.
-  - clock-names : from common clock binding: must contain "sclk_fimg2d" and
-		  "fimg2d", corresponding to entries in the clocks property.
+  - clock-names : names of clocks listed in clocks property, in the same
+		  order, depending on SoC type:
+		  - for S5PV210 and Exynos4 based SoCs: "fimg2d" and
+		    "sclk_fimg2d"
+		  - for Exynos5250 SoC: "fimg2d".
 
 Example:
 	g2d@12800000 {
diff --git a/Documentation/devicetree/bindings/memory.txt b/Documentation/devicetree/bindings/memory.txt
new file mode 100644
index 0000000..eb24693
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory.txt
@@ -0,0 +1,168 @@
+*** Memory binding ***
+
+The /memory node provides basic information about the address and size
+of the physical memory. This node is usually filled or updated by the
+bootloader, depending on the actual memory configuration of the given
+hardware.
+
+The memory layout is described by the following node:
+
+/ {
+	#address-cells = <(n)>;
+	#size-cells = <(m)>;
+	memory {
+		device_type = "memory";
+		reg =  <(baseaddr1) (size1)
+			(baseaddr2) (size2)
+			...
+			(baseaddrN) (sizeN)>;
+	};
+	...
+};
+
+A memory node follows the typical device tree rules for "reg" property:
+n:		number of cells used to store base address value
+m:		number of cells used to store size value
+baseaddrX:	defines a base address of the defined memory bank
+sizeX:		the size of the defined memory bank
+
+
+More than one memory bank can be defined.
+
+
+*** Reserved memory regions ***
+
+In /memory/reserved-memory node one can create child nodes describing
+particular reserved (excluded from normal use) memory regions. Such
+memory regions are usually designed for the special usage by various
+device drivers. A good example are contiguous memory allocations or
+memory sharing with other operating system on the same hardware board.
+Those special memory regions might depend on the board configuration and
+devices used on the target system.
+
+Parameters for each memory region can be encoded into the device tree
+with the following convention:
+
+[(label):] (name) {
+	compatible = "linux,contiguous-memory-region", "reserved-memory-region";
+	reg = <(address) (size)>;
+	(linux,default-contiguous-region);
+};
+
+compatible:	one or more of:
+	- "linux,contiguous-memory-region" - enables binding of this
+	  region to Contiguous Memory Allocator (special region for
+	  contiguous memory allocations, shared with movable system
+	  memory, Linux kernel-specific).
+	- "reserved-memory-region" - compatibility is defined, given
+	  region is assigned for exclusive usage for by the respective
+	  devices.
+
+reg:	standard property defining the base address and size of
+	the memory region
+
+linux,default-contiguous-region: property indicating that the region
+	is the default region for all contiguous memory
+	allocations, Linux specific (optional)
+
+It is optional to specify the base address, so if one wants to use
+autoconfiguration of the base address, '0' can be specified as a base
+address in the 'reg' property.
+
+The /memory/reserved-memory node must contain the same #address-cells
+and #size-cells value as the root node.
+
+
+*** Device node's properties ***
+
+Once regions in the /memory/reserved-memory node have been defined, they
+may be referenced by other device nodes. Bindings that wish to reference
+memory regions should explicitly document their use of the following
+property:
+
+memory-region = <&phandle_to_defined_region>;
+
+This property indicates that the device driver should use the memory
+region pointed by the given phandle.
+
+
+*** Example ***
+
+This example defines a memory consisting of 4 memory banks. 3 contiguous
+regions are defined for Linux kernel, one default of all device drivers
+(named contig_mem, placed at 0x72000000, 64MiB), one dedicated to the
+framebuffer device (labelled display_mem, placed at 0x78000000, 8MiB)
+and one for multimedia processing (labelled multimedia_mem, placed at
+0x77000000, 64MiB). 'display_mem' region is then assigned to fb@12300000
+device for DMA memory allocations (Linux kernel drivers will use CMA is
+available or dma-exclusive usage otherwise). 'multimedia_mem' is
+assigned to scaler@12500000 and codec@12600000 devices for contiguous
+memory allocations when CMA driver is enabled.
+
+The reason for creating a separate region for framebuffer device is to
+match the framebuffer base address to the one configured by bootloader,
+so once Linux kernel drivers starts no glitches on the displayed boot
+logo appears. Scaller and codec drivers should share the memory
+allocations.
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	/* ... */
+
+	memory {
+		reg =  <0x40000000 0x10000000
+			0x50000000 0x10000000
+			0x60000000 0x10000000
+			0x70000000 0x10000000>;
+
+		reserved-memory {
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			/*
+			 * global autoconfigured region for contiguous allocations
+			 * (used only with Contiguous Memory Allocator)
+			 */
+			contig_region@0 {
+				compatible = "linux,contiguous-memory-region";
+				reg = <0x0 0x4000000>;
+				linux,default-contiguous-region;
+			};
+
+			/*
+			 * special region for framebuffer
+			 */
+			display_region: region@78000000 {
+				compatible = "linux,contiguous-memory-region", "reserved-memory-region";
+				reg = <0x78000000 0x800000>;
+			};
+
+			/*
+			 * special region for multimedia processing devices
+			 */
+			multimedia_region: region@77000000 {
+				compatible = "linux,contiguous-memory-region";
+				reg = <0x77000000 0x4000000>;
+			};
+		};
+	};
+
+	/* ... */
+
+	fb0: fb@12300000 {
+		status = "okay";
+		memory-region = <&display_region>;
+	};
+
+	scaler: scaler@12500000 {
+		status = "okay";
+		memory-region = <&multimedia_region>;
+	};
+
+	codec: codec@12600000 {
+		status = "okay";
+		memory-region = <&multimedia_region>;
+	};
+};
diff --git a/Documentation/devicetree/bindings/metag/pdc-intc.txt b/Documentation/devicetree/bindings/metag/pdc-intc.txt
new file mode 100644
index 0000000..a691185
--- /dev/null
+++ b/Documentation/devicetree/bindings/metag/pdc-intc.txt
@@ -0,0 +1,105 @@
+* ImgTec Powerdown Controller (PDC) Interrupt Controller Binding
+
+This binding specifies what properties must be available in the device tree
+representation of a PDC IRQ controller. This has a number of input interrupt
+lines which can wake the system, and are passed on through output interrupt
+lines.
+
+Required properties:
+
+    - compatible: Specifies the compatibility list for the interrupt controller.
+      The type shall be <string> and the value shall include "img,pdc-intc".
+
+    - reg: Specifies the base PDC physical address(s) and size(s) of the
+      addressable register space. The type shall be <prop-encoded-array>.
+
+    - interrupt-controller: The presence of this property identifies the node
+      as an interrupt controller. No property value shall be defined.
+
+    - #interrupt-cells: Specifies the number of cells needed to encode an
+      interrupt source. The type shall be a <u32> and the value shall be 2.
+
+    - num-perips: Number of waking peripherals.
+
+    - num-syswakes: Number of SysWake inputs.
+
+    - interrupts: List of interrupt specifiers. The first specifier shall be the
+      shared SysWake interrupt, and remaining specifies shall be PDC peripheral
+      interrupts in order.
+
+* Interrupt Specifier Definition
+
+  Interrupt specifiers consists of 2 cells encoded as follows:
+
+    - <1st-cell>: The interrupt-number that identifies the interrupt source.
+                    0-7:  Peripheral interrupts
+                    8-15: SysWake interrupts
+
+    - <2nd-cell>: The level-sense information, encoded using the Linux interrupt
+                  flags as follows (only 4 valid for peripheral interrupts):
+                    0 = none (decided by software)
+                    1 = low-to-high edge triggered
+                    2 = high-to-low edge triggered
+                    3 = both edge triggered
+                    4 = active-high level-sensitive (required for perip irqs)
+                    8 = active-low level-sensitive
+
+* Examples
+
+Example 1:
+
+	/*
+	 * TZ1090 PDC block
+	 */
+	pdc: pdc@0x02006000 {
+		// This is an interrupt controller node.
+		interrupt-controller;
+
+		// Three cells to encode interrupt sources.
+		#interrupt-cells = <2>;
+
+		// Offset address of 0x02006000 and size of 0x1000.
+		reg = <0x02006000 0x1000>;
+
+		// Compatible with Meta hardware trigger block.
+		compatible = "img,pdc-intc";
+
+		// Three peripherals are connected.
+		num-perips = <3>;
+
+		// Four SysWakes are connected.
+		num-syswakes = <4>;
+
+		interrupts = <18 4 /* level */>, /* Syswakes */
+			     <30 4 /* level */>, /* Peripheral 0 (RTC) */
+			     <29 4 /* level */>, /* Peripheral 1 (IR) */
+			     <31 4 /* level */>; /* Peripheral 2 (WDT) */
+	};
+
+Example 2:
+
+	/*
+	 * An SoC peripheral that is wired through the PDC.
+	 */
+	rtc0 {
+		// The interrupt controller that this device is wired to.
+		interrupt-parent = <&pdc>;
+
+		// Interrupt source Peripheral 0
+		interrupts = <0   /* Peripheral 0 (RTC) */
+		              4>  /* IRQ_TYPE_LEVEL_HIGH */
+	};
+
+Example 3:
+
+	/*
+	 * An interrupt generating device that is wired to a SysWake pin.
+	 */
+	touchscreen0 {
+		// The interrupt controller that this device is wired to.
+		interrupt-parent = <&pdc>;
+
+		// Interrupt source SysWake 0 that is active-low level-sensitive
+		interrupts = <8 /* SysWake0 */
+			      8 /* IRQ_TYPE_LEVEL_LOW */>;
+	};
diff --git a/Documentation/devicetree/bindings/mmc/fsl-esdhc.txt b/Documentation/devicetree/bindings/mmc/fsl-esdhc.txt
index bd9be0b..b7943f3 100644
--- a/Documentation/devicetree/bindings/mmc/fsl-esdhc.txt
+++ b/Documentation/devicetree/bindings/mmc/fsl-esdhc.txt
@@ -19,6 +19,9 @@
     "bus-width = <1>" property.
   - sdhci,auto-cmd12: specifies that a controller can only handle auto
     CMD12.
+  - voltage-ranges : two cells are required, first cell specifies minimum
+    slot voltage (mV), second cell specifies maximum slot voltage (mV).
+    Several ranges could be specified.
 
 Example:
 
@@ -29,4 +32,5 @@
 	interrupt-parent = <&ipic>;
 	/* Filled in by U-Boot */
 	clock-frequency = <0>;
+	voltage-ranges = <3300 3300>;
 };
diff --git a/Documentation/devicetree/bindings/mtd/atmel-nand.txt b/Documentation/devicetree/bindings/mtd/atmel-nand.txt
index d555421..c472883 100644
--- a/Documentation/devicetree/bindings/mtd/atmel-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/atmel-nand.txt
@@ -15,6 +15,7 @@
   optional gpio and may be set to 0 if not present.
 
 Optional properties:
+- atmel,nand-has-dma : boolean to support dma transfer for nand read/write.
 - nand-ecc-mode : String, operation mode of the NAND ecc mode, soft by default.
   Supported values are: "none", "soft", "hw", "hw_syndrome", "hw_oob_first",
   "soft_bch".
@@ -29,6 +30,14 @@
   sector size 1024.
 - nand-bus-width : 8 or 16 bus width if not present 8
 - nand-on-flash-bbt: boolean to enable on flash bbt option if not present false
+- Nand Flash Controller(NFC) is a slave driver under Atmel nand flash
+  - Required properties:
+    - compatible : "atmel,sama5d3-nfc".
+    - reg : should specify the address and size used for NFC command registers,
+            NFC registers and NFC Sram. NFC Sram address and size can be absent
+            if don't want to use it.
+  - Optional properties:
+    - atmel,write-by-sram: boolean to enable NFC write by sram.
 
 Examples:
 nand0: nand@40000000,0 {
@@ -77,3 +86,22 @@
 		...
 	};
 };
+
+/* for NFC supported chips */
+nand0: nand@40000000 {
+	compatible = "atmel,at91rm9200-nand";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	ranges;
+        ...
+        nfc@70000000 {
+		compatible = "atmel,sama5d3-nfc";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <
+			0x70000000 0x10000000	/* NFC Command Registers */
+			0xffffc000 0x00000070	/* NFC HSMC regs */
+			0x00200000 0x00100000	/* NFC SRAM banks */
+		>;
+	};
+};
diff --git a/Documentation/devicetree/bindings/mtd/fsmc-nand.txt b/Documentation/devicetree/bindings/mtd/fsmc-nand.txt
index 2240ac0..ec42935 100644
--- a/Documentation/devicetree/bindings/mtd/fsmc-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/fsmc-nand.txt
@@ -1,4 +1,5 @@
-* FSMC NAND
+ST Microelectronics Flexible Static Memory Controller (FSMC)
+NAND Interface
 
 Required properties:
 - compatible : "st,spear600-fsmc-nand", "stericsson,fsmc-nand"
@@ -9,6 +10,26 @@
 - bank-width : Width (in bytes) of the device.  If not present, the width
   defaults to 1 byte
 - nand-skip-bbtscan: Indicates the the BBT scanning should be skipped
+- timings: array of 6 bytes for NAND timings. The meanings of these bytes
+  are:
+  byte 0 TCLR  : CLE to RE delay in number of AHB clock cycles, only 4 bits
+                 are valid. Zero means one clockcycle, 15 means 16 clock
+                 cycles.
+  byte 1 TAR   : ALE to RE delay, 4 bits are valid. Same format as TCLR.
+  byte 2 THIZ  : number of HCLK clock cycles during which the data bus is
+                 kept in Hi-Z (tristate) after the start of a write access.
+                 Only valid for write transactions. Zero means zero cycles,
+                 255 means 255 cycles.
+  byte 3 THOLD : number of HCLK clock cycles to hold the address (and data
+                 when writing) after the command deassertation. Zero means
+                 one cycle, 255 means 256 cycles.
+  byte 4 TWAIT : number of HCLK clock cycles to assert the command to the
+                 NAND flash in response to SMWAITn. Zero means 1 cycle,
+                 255 means 256 cycles.
+  byte 5 TSET  : number of HCLK clock cycles to assert the address before the
+                 command is asserted. Zero means one cycle, 255 means 256
+                 cycles.
+- bank: default NAND bank to use (0-3 are valid, 0 is the default).
 
 Example:
 
@@ -24,6 +45,8 @@
 
 		bank-width = <1>;
 		nand-skip-bbtscan;
+		timings = /bits/ 8 <0 0 0 2 3 0>;
+		bank = <1>;
 
 		partition@0 {
 			...
diff --git a/Documentation/devicetree/bindings/mtd/partition.txt b/Documentation/devicetree/bindings/mtd/partition.txt
index 9315ac9..8e5557d 100644
--- a/Documentation/devicetree/bindings/mtd/partition.txt
+++ b/Documentation/devicetree/bindings/mtd/partition.txt
@@ -4,6 +4,7 @@
 on platforms which have strong conventions about which portions of a flash are
 used for what purposes, but which don't use an on-flash partition table such
 as RedBoot.
+NOTE: if the sub-node has a compatible string, then it is not a partition.
 
 #address-cells & #size-cells must both be present in the mtd device. There are
 two valid values for both:
diff --git a/Documentation/devicetree/bindings/net/can/sja1000.txt b/Documentation/devicetree/bindings/net/can/sja1000.txt
index c2dbcec..f2105a4 100644
--- a/Documentation/devicetree/bindings/net/can/sja1000.txt
+++ b/Documentation/devicetree/bindings/net/can/sja1000.txt
@@ -37,7 +37,7 @@
 	If not specified or if the specified value is 0, the CLKOUT pin
 	will be disabled.
 
-- nxp,no-comparator-bypass : Allows to disable the CAN input comperator.
+- nxp,no-comparator-bypass : Allows to disable the CAN input comparator.
 
 For further information, please have a look to the SJA1000 data sheet.
 
diff --git a/Documentation/devicetree/bindings/pwm/pwm-samsung.txt b/Documentation/devicetree/bindings/pwm/pwm-samsung.txt
index 4caa1a7..d61fccd 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-samsung.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-samsung.txt
@@ -19,6 +19,16 @@
 - reg: base address and size of register area
 - interrupts: list of timer interrupts (one interrupt per timer, starting at
   timer 0)
+- clock-names: should contain all following required clock names:
+    - "timers" - PWM base clock used to generate PWM signals,
+  and any subset of following optional clock names:
+    - "pwm-tclk0" - first external PWM clock source,
+    - "pwm-tclk1" - second external PWM clock source.
+  Note that not all IP variants allow using all external clock sources.
+  Refer to SoC documentation to learn which clock source configurations
+  are available.
+- clocks: should contain clock specifiers of all clocks, which input names
+  have been specified in clock-names property, in same order.
 - #pwm-cells: should be 3. See pwm.txt in this directory for a description of
   the cells format. The only third cell flag supported by this binding is
   PWM_POLARITY_INVERTED.
@@ -34,6 +44,8 @@
 		reg = <0x7f006000 0x1000>;
 		interrupt-parent = <&vic0>;
 		interrupts = <23>, <24>, <25>, <27>, <28>;
+		clocks = <&clock 67>;
+		clock-names = "timers";
 		samsung,pwm-outputs = <0>, <1>;
 		#pwm-cells = <3>;
 	}
diff --git a/Documentation/dmatest.txt b/Documentation/dmatest.txt
index 132a094..a2b5663 100644
--- a/Documentation/dmatest.txt
+++ b/Documentation/dmatest.txt
@@ -16,15 +16,16 @@
 	Part 2 - When dmatest is built as a module...
 
 After mounting debugfs and loading the module, the /sys/kernel/debug/dmatest
-folder with nodes will be created. They are the same as module parameters with
-addition of the 'run' node that controls run and stop phases of the test.
+folder with nodes will be created. There are two important files located. First
+is the 'run' node that controls run and stop phases of the test, and the second
+one, 'results', is used to get the test case results.
 
 Note that in this case test will not run on load automatically.
 
 Example of usage:
-	% echo dma0chan0 > /sys/kernel/debug/dmatest/channel
-	% echo 2000 > /sys/kernel/debug/dmatest/timeout
-	% echo 1 > /sys/kernel/debug/dmatest/iterations
+	% echo dma0chan0 > /sys/module/dmatest/parameters/channel
+	% echo 2000 > /sys/module/dmatest/parameters/timeout
+	% echo 1 > /sys/module/dmatest/parameters/iterations
 	% echo 1 > /sys/kernel/debug/dmatest/run
 
 Hint: available channel list could be extracted by running the following
@@ -55,8 +56,8 @@
 re-run with the same or different parameters. For the details see the above
 section "Part 2 - When dmatest is built as a module..."
 
-In both cases the module parameters are used as initial values for the test case.
-You always could check them at run-time by running
+In both cases the module parameters are used as the actual values for the test
+case. You always could check them at run-time by running
 	% grep -H . /sys/module/dmatest/parameters/*
 
 	Part 4 - Gathering the test results
diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index fb57d85..fcb34a5 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -299,3 +299,6 @@
 PHY
   devm_usb_get_phy()
   devm_usb_put_phy()
+
+SLAVE DMA ENGINE
+  devm_acpi_dma_controller_register()
diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt
index d78bab9..277d1e8 100644
--- a/Documentation/filesystems/caching/backend-api.txt
+++ b/Documentation/filesystems/caching/backend-api.txt
@@ -299,6 +299,15 @@
      enough space in the cache to permit this.
 
 
+ (*) Check coherency state of an object [mandatory]:
+
+	int (*check_consistency)(struct fscache_object *object)
+
+     This method is called to have the cache check the saved auxiliary data of
+     the object against the netfs's idea of the state.  0 should be returned
+     if they're consistent and -ESTALE otherwise.  -ENOMEM and -ERESTARTSYS
+     may also be returned.
+
  (*) Update object [mandatory]:
 
 	int (*update_object)(struct fscache_object *object)
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt
index 97e6c0ec..11a0a40 100644
--- a/Documentation/filesystems/caching/netfs-api.txt
+++ b/Documentation/filesystems/caching/netfs-api.txt
@@ -32,7 +32,7 @@
 	 (9) Setting the data file size
 	(10) Page alloc/read/write
 	(11) Page uncaching
-	(12) Index and data file update
+	(12) Index and data file consistency
 	(13) Miscellaneous cookie operations
 	(14) Cookie unregistration
 	(15) Index invalidation
@@ -433,7 +433,7 @@
 
 
 =====================
-PAGE READ/ALLOC/WRITE
+PAGE ALLOC/READ/WRITE
 =====================
 
 And the sixth step is to store and retrieve pages in the cache.  There are
@@ -499,7 +499,7 @@
      (*) An argument that's 0 on success or negative for an error code.
 
      If an error occurs, it should be assumed that the page contains no usable
-     data.
+     data.  fscache_readpages_cancel() may need to be called.
 
      end_io_func() will be called in process context if the read is results in
      an error, but it might be called in interrupt context if the read is
@@ -623,6 +623,22 @@
 been marked appropriately and will need uncaching.
 
 
+CANCELLATION OF UNREAD PAGES
+----------------------------
+
+If one or more pages are passed to fscache_read_or_alloc_pages() but not then
+read from the cache and also not read from the underlying filesystem then
+those pages will need to have any marks and reservations removed.  This can be
+done by calling:
+
+	void fscache_readpages_cancel(struct fscache_cookie *cookie,
+				      struct list_head *pages);
+
+prior to returning to the caller.  The cookie argument should be as passed to
+fscache_read_or_alloc_pages().  Every page in the pages list will be examined
+and any that have PG_fscache set will be uncached.
+
+
 ==============
 PAGE UNCACHING
 ==============
@@ -690,9 +706,18 @@
 error is returned.
 
 
-==========================
-INDEX AND DATA FILE UPDATE
-==========================
+===============================
+INDEX AND DATA FILE CONSISTENCY
+===============================
+
+To find out whether auxiliary data for an object is up to data within the
+cache, the following function can be called:
+
+	int fscache_check_consistency(struct fscache_cookie *cookie)
+
+This will call back to the netfs to check whether the auxiliary data associated
+with a cookie is correct.  It returns 0 if it is and -ESTALE if it isn't; it
+may also return -ENOMEM and -ERESTARTSYS.
 
 To request an update of the index data for an index or other object, the
 following function should be called:
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 479eeaf..1a036cd9 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1898,6 +1898,18 @@
 			will be sent.
 			The default is to send the implementation identification
 			information.
+	
+	nfs.recover_lost_locks =
+			[NFSv4] Attempt to recover locks that were lost due
+			to a lease timeout on the server. Please note that
+			doing this risks data corruption, since there are
+			no guarantees that the file will remain unchanged
+			after the locks are lost.
+			If you want to enable the kernel legacy behaviour of
+			attempting to recover these locks, then set this
+			parameter to '1'.
+			The default parameter value of '0' causes the kernel
+			not to attempt recovery of lost locks.
 
 	nfsd.nfs4_disable_idmapping=
 			[NFSv4] When set to the default of '1', the NFSv4
diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index d7993dc..b9ca023 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -167,8 +167,8 @@
 	int container, group, device, i;
 	struct vfio_group_status group_status =
 					{ .argsz = sizeof(group_status) };
-	struct vfio_iommu_x86_info iommu_info = { .argsz = sizeof(iommu_info) };
-	struct vfio_iommu_x86_dma_map dma_map = { .argsz = sizeof(dma_map) };
+	struct vfio_iommu_type1_info iommu_info = { .argsz = sizeof(iommu_info) };
+	struct vfio_iommu_type1_dma_map dma_map = { .argsz = sizeof(dma_map) };
 	struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
 
 	/* Create a new container */
@@ -193,7 +193,7 @@
 	ioctl(group, VFIO_GROUP_SET_CONTAINER, &container);
 
 	/* Enable the IOMMU model we want */
-	ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU)
+	ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU);
 
 	/* Get addition IOMMU info */
 	ioctl(container, VFIO_IOMMU_GET_INFO, &iommu_info);
@@ -229,7 +229,7 @@
 
 		irq.index = i;
 
-		ioctl(device, VFIO_DEVICE_GET_IRQ_INFO, &reg);
+		ioctl(device, VFIO_DEVICE_GET_IRQ_INFO, &irq);
 
 		/* Setup IRQs... eventfds, VFIO_DEVICE_SET_IRQS */
 	}
diff --git a/MAINTAINERS b/MAINTAINERS
index 99c6f35..38a37f7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -933,24 +933,24 @@
 
 ARM/INTEL IOP32X ARM ARCHITECTURE
 M:	Lennert Buytenhek <kernel@wantstofly.org>
-M:	Dan Williams <djbw@fb.com>
+M:	Dan Williams <dan.j.williams@intel.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
 ARM/INTEL IOP33X ARM ARCHITECTURE
-M:	Dan Williams <djbw@fb.com>
+M:	Dan Williams <dan.j.williams@intel.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
 ARM/INTEL IOP13XX ARM ARCHITECTURE
 M:	Lennert Buytenhek <kernel@wantstofly.org>
-M:	Dan Williams <djbw@fb.com>
+M:	Dan Williams <dan.j.williams@intel.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
 ARM/INTEL IQ81342EX MACHINE SUPPORT
 M:	Lennert Buytenhek <kernel@wantstofly.org>
-M:	Dan Williams <djbw@fb.com>
+M:	Dan Williams <dan.j.williams@intel.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
@@ -975,7 +975,7 @@
 
 ARM/INTEL XSC3 (MANZANO) ARM CORE
 M:	Lennert Buytenhek <kernel@wantstofly.org>
-M:	Dan Williams <djbw@fb.com>
+M:	Dan Williams <dan.j.williams@intel.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
@@ -1386,7 +1386,7 @@
 F:	drivers/platform/x86/eeepc*.c
 
 ASYNCHRONOUS TRANSFERS/TRANSFORMS (IOAT) API
-M:	Dan Williams <djbw@fb.com>
+M:	Dan Williams <dan.j.williams@intel.com>
 W:	http://sourceforge.net/projects/xscaleiop
 S:	Maintained
 F:	Documentation/crypto/async-tx-api.txt
@@ -2307,6 +2307,15 @@
 F:	drivers/cpufreq/arm_big_little.c
 F:	drivers/cpufreq/arm_big_little_dt.c
 
+CPUIDLE DRIVER - ARM BIG LITTLE
+M:      Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+M:      Daniel Lezcano <daniel.lezcano@linaro.org>
+L:      linux-pm@vger.kernel.org
+L:      linux-arm-kernel@lists.infradead.org
+T:      git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git
+S:      Maintained
+F:      drivers/cpuidle/cpuidle-big_little.c
+
 CPUIDLE DRIVERS
 M:	Rafael J. Wysocki <rjw@sisk.pl>
 M:	Daniel Lezcano <daniel.lezcano@linaro.org>
@@ -2682,7 +2691,7 @@
 
 DMA GENERIC OFFLOAD ENGINE SUBSYSTEM
 M:	Vinod Koul <vinod.koul@intel.com>
-M:	Dan Williams <djbw@fb.com>
+M:	Dan Williams <dan.j.williams@intel.com>
 S:	Supported
 F:	drivers/dma/
 F:	include/linux/dma*
@@ -4314,7 +4323,7 @@
 F:	arch/x86/kernel/microcode_intel.c
 
 INTEL I/OAT DMA DRIVER
-M:	Dan Williams <djbw@fb.com>
+M:	Dan Williams <dan.j.williams@intel.com>
 S:	Maintained
 F:	drivers/dma/ioat*
 
@@ -4327,7 +4336,7 @@
 F:	include/linux/intel-iommu.h
 
 INTEL IOP-ADMA DMA DRIVER
-M:	Dan Williams <djbw@fb.com>
+M:	Dan Williams <dan.j.williams@intel.com>
 S:	Odd fixes
 F:	drivers/dma/iop-adma.c
 
@@ -5442,6 +5451,7 @@
 
 METAG ARCHITECTURE
 M:	James Hogan <james.hogan@imgtec.com>
+L:	linux-metag@vger.kernel.org
 S:	Supported
 F:	arch/metag/
 F:	Documentation/metag/
@@ -7194,6 +7204,7 @@
 
 SYNOPSYS DESIGNWARE DMAC DRIVER
 M:	Viresh Kumar <viresh.linux@gmail.com>
+M:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
 S:	Maintained
 F:	include/linux/dw_dmac.h
 F:	drivers/dma/dw/
diff --git a/arch/arc/boot/.gitignore b/arch/arc/boot/.gitignore
new file mode 100644
index 0000000..5d65b54
--- /dev/null
+++ b/arch/arc/boot/.gitignore
@@ -0,0 +1 @@
+*.dtb*
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index 5802849..e4abdaa 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -57,7 +57,7 @@
 
 extern void arc_cache_init(void);
 extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
-extern void __init read_decode_cache_bcr(void);
+extern void read_decode_cache_bcr(void);
 
 #endif	/* !__ASSEMBLY__ */
 
diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h
index 442ce5d..43de302 100644
--- a/arch/arc/include/asm/delay.h
+++ b/arch/arc/include/asm/delay.h
@@ -53,11 +53,10 @@
 {
 	unsigned long loops;
 
-	/* (long long) cast ensures 64 bit MPY - real or emulated
+	/* (u64) cast ensures 64 bit MPY - real or emulated
 	 * HZ * 4295 is pre-evaluated by gcc - hence only 2 mpy ops
 	 */
-	loops = ((long long)(usecs * 4295 * HZ) *
-		 (long long)(loops_per_jiffy)) >> 32;
+	loops = ((u64) usecs * 4295 * HZ * loops_per_jiffy) >> 32;
 
 	__delay(loops);
 }
diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h
index df57611..8840810 100644
--- a/arch/arc/include/asm/entry.h
+++ b/arch/arc/include/asm/entry.h
@@ -365,7 +365,7 @@
  * it to memory (non-SMP case) or SCRATCH0 Aux Reg (SMP).
  *
  * Before saving the full regfile - this reg is restored back, only
- * to be saved again on kernel mode stack, as part of ptregs.
+ * to be saved again on kernel mode stack, as part of pt_regs.
  *-------------------------------------------------------------*/
 .macro EXCPN_PROLOG_FREEUP_REG	reg
 #ifdef CONFIG_SMP
@@ -384,6 +384,28 @@
 .endm
 
 /*--------------------------------------------------------------
+ * Exception Entry prologue
+ * -Switches stack to K mode (if not already)
+ * -Saves the register file
+ *
+ * After this it is safe to call the "C" handlers
+ *-------------------------------------------------------------*/
+.macro EXCEPTION_PROLOGUE
+
+	/* Need at least 1 reg to code the early exception prologue */
+	EXCPN_PROLOG_FREEUP_REG r9
+
+	/* U/K mode at time of exception (stack not switched if already K) */
+	lr  r9, [erstatus]
+
+	/* ARC700 doesn't provide auto-stack switching */
+	SWITCH_TO_KERNEL_STK
+
+	/* save the regfile */
+	SAVE_ALL_SYS
+.endm
+
+/*--------------------------------------------------------------
  * Save all registers used by Exceptions (TLB Miss, Prot-V, Mem err etc)
  * Requires SP to be already switched to kernel mode Stack
  * sp points to the next free element on the stack at exit of this macro.
diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h
index 473424d..334ce70 100644
--- a/arch/arc/include/asm/io.h
+++ b/arch/arc/include/asm/io.h
@@ -100,6 +100,10 @@
 
 }
 
+#define readb_relaxed readb
+#define readw_relaxed readw
+#define readl_relaxed readl
+
 #include <asm-generic/io.h>
 
 #endif /* _ASM_ARC_IO_H */
diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h
index d99f79b..b68b53f 100644
--- a/arch/arc/include/asm/irqflags.h
+++ b/arch/arc/include/asm/irqflags.h
@@ -157,13 +157,6 @@
 	flag	\scratch
 .endm
 
-.macro IRQ_DISABLE_SAVE  scratch, save
-	lr	\scratch, [status32]
-	mov	\save, \scratch		/* Make a copy */
-	bic	\scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
-	flag	\scratch
-.endm
-
 .macro IRQ_ENABLE  scratch
 	lr	\scratch, [status32]
 	or	\scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
diff --git a/arch/arc/include/asm/mmu.h b/arch/arc/include/asm/mmu.h
index 7c03fe6..c2663b3 100644
--- a/arch/arc/include/asm/mmu.h
+++ b/arch/arc/include/asm/mmu.h
@@ -32,6 +32,8 @@
 /* Error code if probe fails */
 #define TLB_LKUP_ERR		0x80000000
 
+#define TLB_DUP_ERR	(TLB_LKUP_ERR | 0x00000001)
+
 /* TLB Commands */
 #define TLBWrite    0x1
 #define TLBRead     0x2
@@ -46,21 +48,18 @@
 #ifndef __ASSEMBLY__
 
 typedef struct {
-	unsigned long asid;	/* Pvt Addr-Space ID for mm */
-#ifdef CONFIG_ARC_TLB_DBG
-	struct task_struct *tsk;
-#endif
+	unsigned long asid;	/* 8 bit MMU PID + Generation cycle */
 } mm_context_t;
 
 #ifdef CONFIG_ARC_DBG_TLB_PARANOIA
-void tlb_paranoid_check(unsigned int pid_sw, unsigned long address);
+void tlb_paranoid_check(unsigned int mm_asid, unsigned long address);
 #else
 #define tlb_paranoid_check(a, b)
 #endif
 
 void arc_mmu_init(void);
 extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
-void __init read_decode_mmu_bcr(void);
+void read_decode_mmu_bcr(void);
 
 #endif	/* !__ASSEMBLY__ */
 
diff --git a/arch/arc/include/asm/mmu_context.h b/arch/arc/include/asm/mmu_context.h
index 0d71fb1..43a1b51 100644
--- a/arch/arc/include/asm/mmu_context.h
+++ b/arch/arc/include/asm/mmu_context.h
@@ -34,95 +34,65 @@
  * When it reaches max 255, the allocation cycle starts afresh by flushing
  * the entire TLB and wrapping ASID back to zero.
  *
- * For book-keeping, Linux uses a couple of data-structures:
- *  -mm_struct has an @asid field to keep a note of task's ASID (needed at the
- *   time of say switch_mm( )
- *  -An array of mm structs @asid_mm_map[] for asid->mm the reverse mapping,
- *  given an ASID, finding the mm struct associated.
- *
- * The round-robin allocation algorithm allows for ASID stealing.
- * If asid tracker is at "x-1", a new req will allocate "x", even if "x" was
- * already assigned to another (switched-out) task. Obviously the prev owner
- * is marked with an invalid ASID to make it request for a new ASID when it
- * gets scheduled next time. However its TLB entries (with ASID "x") could
- * exist, which must be cleared before the same ASID is used by the new owner.
- * Flushing them would be plausible but costly solution. Instead we force a
- * allocation policy quirk, which ensures that a stolen ASID won't have any
- * TLB entries associates, alleviating the need to flush.
- * The quirk essentially is not allowing ASID allocated in prev cycle
- * to be used past a roll-over in the next cycle.
- * When this happens (i.e. task ASID > asid tracker), task needs to refresh
- * its ASID, aligning it to current value of tracker. If the task doesn't get
- * scheduled past a roll-over, hence its ASID is not yet realigned with
- * tracker, such ASID is anyways safely reusable because it is
- * gauranteed that TLB entries with that ASID wont exist.
+ * A new allocation cycle, post rollover, could potentially reassign an ASID
+ * to a different task. Thus the rule is to refresh the ASID in a new cycle.
+ * The 32 bit @asid_cache (and mm->asid) have 8 bits MMU PID and rest 24 bits
+ * serve as cycle/generation indicator and natural 32 bit unsigned math
+ * automagically increments the generation when lower 8 bits rollover.
  */
 
-#define FIRST_ASID  0
-#define MAX_ASID    255			/* 8 bit PID field in PID Aux reg */
-#define NO_ASID     (MAX_ASID + 1)	/* ASID Not alloc to mmu ctxt */
-#define NUM_ASID    ((MAX_ASID - FIRST_ASID) + 1)
+#define MM_CTXT_ASID_MASK	0x000000ff /* MMU PID reg :8 bit PID */
+#define MM_CTXT_CYCLE_MASK	(~MM_CTXT_ASID_MASK)
 
-/* ASID to mm struct mapping */
-extern struct mm_struct *asid_mm_map[NUM_ASID + 1];
+#define MM_CTXT_FIRST_CYCLE	(MM_CTXT_ASID_MASK + 1)
+#define MM_CTXT_NO_ASID		0UL
 
-extern int asid_cache;
+#define hw_pid(mm)		(mm->context.asid & MM_CTXT_ASID_MASK)
+
+extern unsigned int asid_cache;
 
 /*
- * Assign a new ASID to task. If the task already has an ASID, it is
- * relinquished.
+ * Get a new ASID if task doesn't have a valid one (unalloc or from prev cycle)
+ * Also set the MMU PID register to existing/updated ASID
  */
 static inline void get_new_mmu_context(struct mm_struct *mm)
 {
-	struct mm_struct *prev_owner;
 	unsigned long flags;
 
 	local_irq_save(flags);
 
 	/*
-	 * Relinquish the currently owned ASID (if any).
-	 * Doing unconditionally saves a cmp-n-branch; for already unused
-	 * ASID slot, the value was/remains NULL
+	 * Move to new ASID if it was not from current alloc-cycle/generation.
+	 * This is done by ensuring that the generation bits in both mm->ASID
+	 * and cpu's ASID counter are exactly same.
+	 *
+	 * Note: Callers needing new ASID unconditionally, independent of
+	 * 	 generation, e.g. local_flush_tlb_mm() for forking  parent,
+	 * 	 first need to destroy the context, setting it to invalid
+	 * 	 value.
 	 */
-	asid_mm_map[mm->context.asid] = (struct mm_struct *)NULL;
+	if (!((mm->context.asid ^ asid_cache) & MM_CTXT_CYCLE_MASK))
+		goto set_hw;
 
-	/* move to new ASID */
-	if (++asid_cache > MAX_ASID) {	/* ASID roll-over */
-		asid_cache = FIRST_ASID;
+	/* move to new ASID and handle rollover */
+	if (unlikely(!(++asid_cache & MM_CTXT_ASID_MASK))) {
+
 		flush_tlb_all();
+
+		/*
+		 * Above checke for rollover of 8 bit ASID in 32 bit container.
+		 * If the container itself wrapped around, set it to a non zero
+		 * "generation" to distinguish from no context
+		 */
+		if (!asid_cache)
+			asid_cache = MM_CTXT_FIRST_CYCLE;
 	}
 
-	/*
-	 * Is next ASID already owned by some-one else (we are stealing it).
-	 * If so, let the orig owner be aware of this, so when it runs, it
-	 * asks for a brand new ASID. This would only happen for a long-lived
-	 * task with ASID from prev allocation cycle (before ASID roll-over).
-	 *
-	 * This might look wrong - if we are re-using some other task's ASID,
-	 * won't we use it's stale TLB entries too. Actually switch_mm( ) takes
-	 * care of such a case: it ensures that task with ASID from prev alloc
-	 * cycle, when scheduled will refresh it's ASID: see switch_mm( ) below
-	 * The stealing scenario described here will only happen if that task
-	 * didn't get a chance to refresh it's ASID - implying stale entries
-	 * won't exist.
-	 */
-	prev_owner = asid_mm_map[asid_cache];
-	if (prev_owner)
-		prev_owner->context.asid = NO_ASID;
-
 	/* Assign new ASID to tsk */
-	asid_mm_map[asid_cache] = mm;
 	mm->context.asid = asid_cache;
 
-#ifdef CONFIG_ARC_TLB_DBG
-	pr_info("ARC_TLB_DBG: NewMM=0x%x OldMM=0x%x task_struct=0x%x Task: %s,"
-	       " pid:%u, assigned asid:%lu\n",
-	       (unsigned int)mm, (unsigned int)prev_owner,
-	       (unsigned int)(mm->context.tsk), (mm->context.tsk)->comm,
-	       (mm->context.tsk)->pid, mm->context.asid);
-#endif
-
-	write_aux_reg(ARC_REG_PID, asid_cache | MMU_ENABLE);
+set_hw:
+	write_aux_reg(ARC_REG_PID, hw_pid(mm) | MMU_ENABLE);
 
 	local_irq_restore(flags);
 }
@@ -134,10 +104,7 @@
 static inline int
 init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
-	mm->context.asid = NO_ASID;
-#ifdef CONFIG_ARC_TLB_DBG
-	mm->context.tsk = tsk;
-#endif
+	mm->context.asid = MM_CTXT_NO_ASID;
 	return 0;
 }
 
@@ -152,40 +119,21 @@
 	write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
 #endif
 
-	/*
-	 * Get a new ASID if task doesn't have a valid one. Possible when
-	 *  -task never had an ASID (fresh after fork)
-	 *  -it's ASID was stolen - past an ASID roll-over.
-	 *  -There's a third obscure scenario (if this task is running for the
-	 *   first time afer an ASID rollover), where despite having a valid
-	 *   ASID, we force a get for new ASID - see comments at top.
-	 *
-	 * Both the non-alloc scenario and first-use-after-rollover can be
-	 * detected using the single condition below:  NO_ASID = 256
-	 * while asid_cache is always a valid ASID value (0-255).
-	 */
-	if (next->context.asid > asid_cache) {
-		get_new_mmu_context(next);
-	} else {
-		/*
-		 * XXX: This will never happen given the chks above
-		 * BUG_ON(next->context.asid > MAX_ASID);
-		 */
-		write_aux_reg(ARC_REG_PID, next->context.asid | MMU_ENABLE);
-	}
-
+	get_new_mmu_context(next);
 }
 
+/*
+ * Called at the time of execve() to get a new ASID
+ * Note the subtlety here: get_new_mmu_context() behaves differently here
+ * vs. in switch_mm(). Here it always returns a new ASID, because mm has
+ * an unallocated "initial" value, while in latter, it moves to a new ASID,
+ * only if it was unallocated
+ */
+#define activate_mm(prev, next)		switch_mm(prev, next, NULL)
+
 static inline void destroy_context(struct mm_struct *mm)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
-
-	asid_mm_map[mm->context.asid] = NULL;
-	mm->context.asid = NO_ASID;
-
-	local_irq_restore(flags);
+	mm->context.asid = MM_CTXT_NO_ASID;
 }
 
 /* it seemed that deactivate_mm( ) is a reasonable place to do book-keeping
@@ -197,17 +145,6 @@
  */
 #define deactivate_mm(tsk, mm)   do { } while (0)
 
-static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
-{
-#ifndef CONFIG_SMP
-	write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd);
-#endif
-
-	/* Unconditionally get a new ASID */
-	get_new_mmu_context(next);
-
-}
-
 #define enter_lazy_tlb(mm, tsk)
 
 #endif /* __ASM_ARC_MMU_CONTEXT_H */
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 4749a0e..6b0b7f7e 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -57,43 +57,31 @@
 
 #define _PAGE_ACCESSED      (1<<1)	/* Page is accessed (S) */
 #define _PAGE_CACHEABLE     (1<<2)	/* Page is cached (H) */
-#define _PAGE_U_EXECUTE     (1<<3)	/* Page has user execute perm (H) */
-#define _PAGE_U_WRITE       (1<<4)	/* Page has user write perm (H) */
-#define _PAGE_U_READ        (1<<5)	/* Page has user read perm (H) */
-#define _PAGE_K_EXECUTE     (1<<6)	/* Page has kernel execute perm (H) */
-#define _PAGE_K_WRITE       (1<<7)	/* Page has kernel write perm (H) */
-#define _PAGE_K_READ        (1<<8)	/* Page has kernel perm (H) */
-#define _PAGE_GLOBAL        (1<<9)	/* Page is global (H) */
-#define _PAGE_MODIFIED      (1<<10)	/* Page modified (dirty) (S) */
-#define _PAGE_FILE          (1<<10)	/* page cache/ swap (S) */
-#define _PAGE_PRESENT       (1<<11)	/* TLB entry is valid (H) */
+#define _PAGE_EXECUTE       (1<<3)	/* Page has user execute perm (H) */
+#define _PAGE_WRITE         (1<<4)	/* Page has user write perm (H) */
+#define _PAGE_READ          (1<<5)	/* Page has user read perm (H) */
+#define _PAGE_MODIFIED      (1<<6)	/* Page modified (dirty) (S) */
+#define _PAGE_FILE          (1<<7)	/* page cache/ swap (S) */
+#define _PAGE_GLOBAL        (1<<8)	/* Page is global (H) */
+#define _PAGE_PRESENT       (1<<10)	/* TLB entry is valid (H) */
 
-#else
+#else	/* MMU v3 onwards */
 
-/* PD1 */
 #define _PAGE_CACHEABLE     (1<<0)	/* Page is cached (H) */
-#define _PAGE_U_EXECUTE     (1<<1)	/* Page has user execute perm (H) */
-#define _PAGE_U_WRITE       (1<<2)	/* Page has user write perm (H) */
-#define _PAGE_U_READ        (1<<3)	/* Page has user read perm (H) */
-#define _PAGE_K_EXECUTE     (1<<4)	/* Page has kernel execute perm (H) */
-#define _PAGE_K_WRITE       (1<<5)	/* Page has kernel write perm (H) */
-#define _PAGE_K_READ        (1<<6)	/* Page has kernel perm (H) */
-#define _PAGE_ACCESSED      (1<<7)	/* Page is accessed (S) */
-
-/* PD0 */
+#define _PAGE_EXECUTE       (1<<1)	/* Page has user execute perm (H) */
+#define _PAGE_WRITE         (1<<2)	/* Page has user write perm (H) */
+#define _PAGE_READ          (1<<3)	/* Page has user read perm (H) */
+#define _PAGE_ACCESSED      (1<<4)	/* Page is accessed (S) */
+#define _PAGE_MODIFIED      (1<<5)	/* Page modified (dirty) (S) */
+#define _PAGE_FILE          (1<<6)	/* page cache/ swap (S) */
 #define _PAGE_GLOBAL        (1<<8)	/* Page is global (H) */
 #define _PAGE_PRESENT       (1<<9)	/* TLB entry is valid (H) */
-#define _PAGE_SHARED_CODE   (1<<10)	/* Shared Code page with cmn vaddr
+#define _PAGE_SHARED_CODE   (1<<11)	/* Shared Code page with cmn vaddr
 					   usable for shared TLB entries (H) */
-
-#define _PAGE_MODIFIED      (1<<11)	/* Page modified (dirty) (S) */
-#define _PAGE_FILE          (1<<12)	/* page cache/ swap (S) */
-
-#define _PAGE_SHARED_CODE_H (1<<31)	/* Hardware counterpart of above */
 #endif
 
-/* Kernel allowed all permissions for all pages */
-#define _K_PAGE_PERMS  (_PAGE_K_EXECUTE | _PAGE_K_WRITE | _PAGE_K_READ | \
+/* vmalloc permissions */
+#define _K_PAGE_PERMS  (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ | \
 			_PAGE_GLOBAL | _PAGE_PRESENT)
 
 #ifdef CONFIG_ARC_CACHE_PAGES
@@ -109,10 +97,6 @@
  */
 #define ___DEF (_PAGE_PRESENT | _PAGE_DEF_CACHEABLE)
 
-#define _PAGE_READ	(_PAGE_U_READ    | _PAGE_K_READ)
-#define _PAGE_WRITE	(_PAGE_U_WRITE   | _PAGE_K_WRITE)
-#define _PAGE_EXECUTE	(_PAGE_U_EXECUTE | _PAGE_K_EXECUTE)
-
 /* Set of bits not changed in pte_modify */
 #define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_MODIFIED)
 
@@ -126,8 +110,8 @@
 
 #define PAGE_SHARED	PAGE_U_W_R
 
-/* While kernel runs out of unstrslated space, vmalloc/modules use a chunk of
- * kernel vaddr space - visible in all addr spaces, but kernel mode only
+/* While kernel runs out of unstranslated space, vmalloc/modules use a chunk of
+ * user vaddr space - visible in all addr spaces, but kernel mode only
  * Thus Global, all-kernel-access, no-user-access, cached
  */
 #define PAGE_KERNEL          __pgprot(_K_PAGE_PERMS | _PAGE_DEF_CACHEABLE)
@@ -136,10 +120,9 @@
 #define PAGE_KERNEL_NO_CACHE __pgprot(_K_PAGE_PERMS)
 
 /* Masks for actual TLB "PD"s */
-#define PTE_BITS_IN_PD0	(_PAGE_GLOBAL | _PAGE_PRESENT)
-#define PTE_BITS_IN_PD1	(PAGE_MASK | _PAGE_CACHEABLE | \
-			 _PAGE_U_EXECUTE | _PAGE_U_WRITE | _PAGE_U_READ | \
-			 _PAGE_K_EXECUTE | _PAGE_K_WRITE | _PAGE_K_READ)
+#define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT)
+#define PTE_BITS_RWX		(_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
+#define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK | _PAGE_CACHEABLE)
 
 /**************************************************************************
  * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index c9938e7..1bfeec2 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -20,27 +20,17 @@
 
 	/* Real registers */
 	long bta;	/* bta_l1, bta_l2, erbta */
-	long lp_start;
-	long lp_end;
-	long lp_count;
+
+	long lp_start, lp_end, lp_count;
+
 	long status32;	/* status32_l1, status32_l2, erstatus */
 	long ret;	/* ilink1, ilink2 or eret */
 	long blink;
 	long fp;
 	long r26;	/* gp */
-	long r12;
-	long r11;
-	long r10;
-	long r9;
-	long r8;
-	long r7;
-	long r6;
-	long r5;
-	long r4;
-	long r3;
-	long r2;
-	long r1;
-	long r0;
+
+	long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0;
+
 	long sp;	/* user/kernel sp depending on where we came from  */
 	long orig_r0;
 
@@ -70,19 +60,7 @@
 /* Callee saved registers - need to be saved only when you are scheduled out */
 
 struct callee_regs {
-	long r25;
-	long r24;
-	long r23;
-	long r22;
-	long r21;
-	long r20;
-	long r19;
-	long r18;
-	long r17;
-	long r16;
-	long r15;
-	long r14;
-	long r13;
+	long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13;
 };
 
 #define instruction_pointer(regs)	((regs)->ret)
diff --git a/arch/arc/include/asm/spinlock_types.h b/arch/arc/include/asm/spinlock_types.h
index 8276bfd..662627c 100644
--- a/arch/arc/include/asm/spinlock_types.h
+++ b/arch/arc/include/asm/spinlock_types.h
@@ -20,9 +20,9 @@
 #define __ARCH_SPIN_LOCK_LOCKED		{ __ARCH_SPIN_LOCK_LOCKED__ }
 
 /*
- * Unlocked:     0x01_00_00_00
- * Read lock(s): 0x00_FF_00_00 to say 0x01
- * Write lock:   0x0, but only possible if prior value "unlocked" 0x0100_0000
+ * Unlocked     : 0x0100_0000
+ * Read lock(s) : 0x00FF_FFFF to 0x01  (Multiple Readers decrement it)
+ * Write lock   : 0x0, but only if prior value is "unlocked" 0x0100_0000
  */
 typedef struct {
 	volatile unsigned int	counter;
diff --git a/arch/arc/kernel/.gitignore b/arch/arc/kernel/.gitignore
new file mode 100644
index 0000000..c5f676c
--- /dev/null
+++ b/arch/arc/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c
index bdee3a8..2340af0 100644
--- a/arch/arc/kernel/devtree.c
+++ b/arch/arc/kernel/devtree.c
@@ -18,12 +18,6 @@
 #include <asm/clk.h>
 #include <asm/mach_desc.h>
 
-/* called from unflatten_device_tree() to bootstrap devicetree itself */
-void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
-{
-	return __va(memblock_alloc(size, align));
-}
-
 /**
  * setup_machine_fdt - Machine setup when an dtb was passed to the kernel
  * @dt:		virtual address pointer to dt blob
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index 1d71651..b908dde 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -267,12 +267,7 @@
 
 ARC_ENTRY instr_service
 
-	EXCPN_PROLOG_FREEUP_REG r9
-
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	lr  r0, [efa]
 	mov r1, sp
@@ -289,15 +284,13 @@
 
 ARC_ENTRY mem_service
 
-	EXCPN_PROLOG_FREEUP_REG r9
-
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	lr  r0, [efa]
 	mov r1, sp
+
+	FAKE_RET_FROM_EXCPN r9
+
 	bl  do_memory_error
 	b   ret_from_exception
 ARC_EXIT mem_service
@@ -308,11 +301,7 @@
 
 ARC_ENTRY EV_MachineCheck
 
-	EXCPN_PROLOG_FREEUP_REG r9
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	lr  r2, [ecr]
 	lr  r0, [efa]
@@ -342,13 +331,7 @@
 
 ARC_ENTRY EV_TLBProtV
 
-	EXCPN_PROLOG_FREEUP_REG r9
-
-	;Which mode (user/kernel) was the system in when Exception occured
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	;---------(3) Save some more regs-----------------
 	;  vineetg: Mar 6th: Random Seg Fault issue #1
@@ -406,12 +389,7 @@
 ; ---------------------------------------------
 ARC_ENTRY EV_PrivilegeV
 
-	EXCPN_PROLOG_FREEUP_REG r9
-
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	lr  r0, [efa]
 	mov r1, sp
@@ -427,14 +405,13 @@
 ; ---------------------------------------------
 ARC_ENTRY EV_Extension
 
-	EXCPN_PROLOG_FREEUP_REG r9
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	lr  r0, [efa]
 	mov r1, sp
+
+	FAKE_RET_FROM_EXCPN r9
+
 	bl  do_extension_fault
 	b   ret_from_exception
 ARC_EXIT EV_Extension
@@ -526,14 +503,7 @@
 
 ARC_ENTRY EV_Trap
 
-	; Need at least 1 reg to code the early exception prolog
-	EXCPN_PROLOG_FREEUP_REG r9
-
-	;Which mode (user/kernel) was the system in when intr occured
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	;------- (4) What caused the Trap --------------
 	lr     r12, [ecr]
@@ -642,6 +612,9 @@
 
 #ifdef CONFIG_PREEMPT
 
+	; This is a must for preempt_schedule_irq()
+	IRQ_DISABLE	r9
+
 	; Can't preempt if preemption disabled
 	GET_CURR_THR_INFO_FROM_SP   r10
 	ld  r8, [r10, THREAD_INFO_PREEMPT_COUNT]
@@ -651,8 +624,6 @@
 	ld  r9, [r10, THREAD_INFO_FLAGS]
 	bbit0  r9, TIF_NEED_RESCHED, restore_regs
 
-	IRQ_DISABLE	r9
-
 	; Invoke PREEMPTION
 	bl      preempt_schedule_irq
 
@@ -665,12 +636,11 @@
 ;
 ; Restore the saved sys context (common exit-path for EXCPN/IRQ/Trap)
 ; IRQ shd definitely not happen between now and rtie
+; All 2 entry points to here already disable interrupts
 
 restore_regs :
 
-	; Disable Interrupts while restoring reg-file back
-	; XXX can this be optimised out
-	IRQ_DISABLE_SAVE    r9, r10	;@r10 has prisitine (pre-disable) copy
+	lr	r10, [status32]
 
 	; Restore REG File. In case multiple Events outstanding,
 	; use the same priorty as rtie: EXCPN, L2 IRQ, L1 IRQ, None
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index 6b08345..b011f8c 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -357,8 +357,6 @@
 	 */
 	root_mountflags &= ~MS_RDONLY;
 
-	console_verbose();
-
 #if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE)
 	conswitchp = &dummy_con;
 #endif
diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c
index c0f832f..28d1700 100644
--- a/arch/arc/kernel/unaligned.c
+++ b/arch/arc/kernel/unaligned.c
@@ -16,6 +16,16 @@
 #include <linux/uaccess.h>
 #include <asm/disasm.h>
 
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define BE		1
+#define FIRST_BYTE_16	"swap %1, %1\n swape %1, %1\n"
+#define FIRST_BYTE_32	"swape %1, %1\n"
+#else
+#define BE		0
+#define FIRST_BYTE_16
+#define FIRST_BYTE_32
+#endif
+
 #define __get8_unaligned_check(val, addr, err)		\
 	__asm__(					\
 	"1:	ldb.ab	%1, [%2, 1]\n"			\
@@ -36,9 +46,9 @@
 	do {						\
 		unsigned int err = 0, v, a = addr;	\
 		__get8_unaligned_check(v, a, err);	\
-		val =  v ;				\
+		val =  v << ((BE) ? 8 : 0);		\
 		__get8_unaligned_check(v, a, err);	\
-		val |= v << 8;				\
+		val |= v << ((BE) ? 0 : 8);		\
 		if (err)				\
 			goto fault;			\
 	} while (0)
@@ -47,13 +57,13 @@
 	do {						\
 		unsigned int err = 0, v, a = addr;	\
 		__get8_unaligned_check(v, a, err);	\
-		val =  v << 0;				\
+		val =  v << ((BE) ? 24 : 0);		\
 		__get8_unaligned_check(v, a, err);	\
-		val |= v << 8;				\
+		val |= v << ((BE) ? 16 : 8);		\
 		__get8_unaligned_check(v, a, err);	\
-		val |= v << 16;				\
+		val |= v << ((BE) ? 8 : 16);		\
 		__get8_unaligned_check(v, a, err);	\
-		val |= v << 24;				\
+		val |= v << ((BE) ? 0 : 24);		\
 		if (err)				\
 			goto fault;			\
 	} while (0)
@@ -63,6 +73,7 @@
 		unsigned int err = 0, v = val, a = addr;\
 							\
 		__asm__(				\
+		FIRST_BYTE_16				\
 		"1:	stb.ab	%1, [%2, 1]\n"		\
 		"	lsr %1, %1, 8\n"		\
 		"2:	stb	%1, [%2]\n"		\
@@ -87,8 +98,9 @@
 #define put32_unaligned_check(val, addr)		\
 	do {						\
 		unsigned int err = 0, v = val, a = addr;\
-		__asm__(				\
 							\
+		__asm__(				\
+		FIRST_BYTE_32				\
 		"1:	stb.ab	%1, [%2, 1]\n"		\
 		"	lsr %1, %1, 8\n"		\
 		"2:	stb.ab	%1, [%2, 1]\n"		\
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index f415d85..5a1259c 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -622,12 +622,12 @@
 /*
  * General purpose helper to make I and D cache lines consistent.
  * @paddr is phy addr of region
- * @vaddr is typically user or kernel vaddr (vmalloc)
- *    Howver in one instance, flush_icache_range() by kprobe (for a breakpt in
+ * @vaddr is typically user vaddr (breakpoint) or kernel vaddr (vmalloc)
+ *    However in one instance, when called by kprobe (for a breakpt in
  *    builtin kernel code) @vaddr will be paddr only, meaning CDU operation will
  *    use a paddr to index the cache (despite VIPT). This is fine since since a
- *    built-in kernel page will not have any virtual mappings (not even kernel)
- *    kprobe on loadable module is different as it will have kvaddr.
+ *    builtin kernel page will not have any virtual mappings.
+ *    kprobe on loadable module will be kernel vaddr.
  */
 void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len)
 {
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index a08ce71..81279ec 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -127,9 +127,8 @@
 #endif
 
 #ifdef CONFIG_OF_FLATTREE
-void __init early_init_dt_setup_initrd_arch(unsigned long start,
-					    unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
 {
-	pr_err("%s(%lx, %lx)\n", __func__, start, end);
+	pr_err("%s(%llx, %llx)\n", __func__, start, end);
 }
 #endif /* CONFIG_OF_FLATTREE */
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 7957dc4..71cb26d 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -52,6 +52,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/bug.h>
 #include <asm/arcregs.h>
 #include <asm/setup.h>
 #include <asm/mmu_context.h>
@@ -99,48 +100,45 @@
 
 
 /* A copy of the ASID from the PID reg is kept in asid_cache */
-int asid_cache = FIRST_ASID;
-
-/* ASID to mm struct mapping. We have one extra entry corresponding to
- * NO_ASID to save us a compare when clearing the mm entry for old asid
- * see get_new_mmu_context (asm-arc/mmu_context.h)
- */
-struct mm_struct *asid_mm_map[NUM_ASID + 1];
+unsigned int asid_cache = MM_CTXT_FIRST_CYCLE;
 
 /*
  * Utility Routine to erase a J-TLB entry
- * The procedure is to look it up in the MMU. If found, ERASE it by
- *  issuing a TlbWrite CMD with PD0 = PD1 = 0
+ * Caller needs to setup Index Reg (manually or via getIndex)
  */
-
-static void __tlb_entry_erase(void)
+static inline void __tlb_entry_erase(void)
 {
 	write_aux_reg(ARC_REG_TLBPD1, 0);
 	write_aux_reg(ARC_REG_TLBPD0, 0);
 	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
 }
 
+static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid)
+{
+	unsigned int idx;
+
+	write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid);
+
+	write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe);
+	idx = read_aux_reg(ARC_REG_TLBINDEX);
+
+	return idx;
+}
+
 static void tlb_entry_erase(unsigned int vaddr_n_asid)
 {
 	unsigned int idx;
 
 	/* Locate the TLB entry for this vaddr + ASID */
-	write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid);
-	write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe);
-	idx = read_aux_reg(ARC_REG_TLBINDEX);
+	idx = tlb_entry_lkup(vaddr_n_asid);
 
 	/* No error means entry found, zero it out */
 	if (likely(!(idx & TLB_LKUP_ERR))) {
 		__tlb_entry_erase();
-	} else {		/* Some sort of Error */
-
+	} else {
 		/* Duplicate entry error */
-		if (idx & 0x1) {
-			/* TODO we need to handle this case too */
-			pr_emerg("unhandled Duplicate flush for %x\n",
-			       vaddr_n_asid);
-		}
-		/* else entry not found so nothing to do */
+		WARN(idx == TLB_DUP_ERR, "Probe returned Dup PD for %x\n",
+					   vaddr_n_asid);
 	}
 }
 
@@ -159,7 +157,7 @@
 {
 #if (CONFIG_ARC_MMU_VER >= 2)
 
-#if (CONFIG_ARC_MMU_VER < 3)
+#if (CONFIG_ARC_MMU_VER == 2)
 	/* MMU v2 introduced the uTLB Flush command.
 	 * There was however an obscure hardware bug, where uTLB flush would
 	 * fail when a prior probe for J-TLB (both totally unrelated) would
@@ -182,6 +180,36 @@
 
 }
 
+static void tlb_entry_insert(unsigned int pd0, unsigned int pd1)
+{
+	unsigned int idx;
+
+	/*
+	 * First verify if entry for this vaddr+ASID already exists
+	 * This also sets up PD0 (vaddr, ASID..) for final commit
+	 */
+	idx = tlb_entry_lkup(pd0);
+
+	/*
+	 * If Not already present get a free slot from MMU.
+	 * Otherwise, Probe would have located the entry and set INDEX Reg
+	 * with existing location. This will cause Write CMD to over-write
+	 * existing entry with new PD0 and PD1
+	 */
+	if (likely(idx & TLB_LKUP_ERR))
+		write_aux_reg(ARC_REG_TLBCOMMAND, TLBGetIndex);
+
+	/* setup the other half of TLB entry (pfn, rwx..) */
+	write_aux_reg(ARC_REG_TLBPD1, pd1);
+
+	/*
+	 * Commit the Entry to MMU
+	 * It doesnt sound safe to use the TLBWriteNI cmd here
+	 * which doesn't flush uTLBs. I'd rather be safe than sorry.
+	 */
+	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
+}
+
 /*
  * Un-conditionally (without lookup) erase the entire MMU contents
  */
@@ -224,13 +252,14 @@
 		return;
 
 	/*
-	 * Workaround for Android weirdism:
-	 * A binder VMA could end up in a task such that vma->mm != tsk->mm
-	 * old code would cause h/w - s/w ASID to get out of sync
+	 * - Move to a new ASID, but only if the mm is still wired in
+	 *   (Android Binder ended up calling this for vma->mm != tsk->mm,
+	 *    causing h/w - s/w ASID to get out of sync)
+	 * - Also get_new_mmu_context() new implementation allocates a new
+	 *   ASID only if it is not allocated already - so unallocate first
 	 */
-	if (current->mm != mm)
-		destroy_context(mm);
-	else
+	destroy_context(mm);
+	if (current->mm == mm)
 		get_new_mmu_context(mm);
 }
 
@@ -246,7 +275,6 @@
 			   unsigned long end)
 {
 	unsigned long flags;
-	unsigned int asid;
 
 	/* If range @start to @end is more than 32 TLB entries deep,
 	 * its better to move to a new ASID rather than searching for
@@ -268,11 +296,10 @@
 	start &= PAGE_MASK;
 
 	local_irq_save(flags);
-	asid = vma->vm_mm->context.asid;
 
-	if (asid != NO_ASID) {
+	if (vma->vm_mm->context.asid != MM_CTXT_NO_ASID) {
 		while (start < end) {
-			tlb_entry_erase(start | (asid & 0xff));
+			tlb_entry_erase(start | hw_pid(vma->vm_mm));
 			start += PAGE_SIZE;
 		}
 	}
@@ -326,9 +353,8 @@
 	 */
 	local_irq_save(flags);
 
-	if (vma->vm_mm->context.asid != NO_ASID) {
-		tlb_entry_erase((page & PAGE_MASK) |
-				(vma->vm_mm->context.asid & 0xff));
+	if (vma->vm_mm->context.asid != MM_CTXT_NO_ASID) {
+		tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm));
 		utlb_invalidate();
 	}
 
@@ -341,8 +367,8 @@
 void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
 {
 	unsigned long flags;
-	unsigned int idx, asid_or_sasid;
-	unsigned long pd0_flags;
+	unsigned int asid_or_sasid, rwx;
+	unsigned long pd0, pd1;
 
 	/*
 	 * create_tlb() assumes that current->mm == vma->mm, since
@@ -381,40 +407,30 @@
 	/* update this PTE credentials */
 	pte_val(*ptep) |= (_PAGE_PRESENT | _PAGE_ACCESSED);
 
-	/* Create HW TLB entry Flags (in PD0) from PTE Flags */
-#if (CONFIG_ARC_MMU_VER <= 2)
-	pd0_flags = ((pte_val(*ptep) & PTE_BITS_IN_PD0) >> 1);
-#else
-	pd0_flags = ((pte_val(*ptep) & PTE_BITS_IN_PD0));
-#endif
+	/* Create HW TLB(PD0,PD1) from PTE  */
 
 	/* ASID for this task */
 	asid_or_sasid = read_aux_reg(ARC_REG_PID) & 0xff;
 
-	write_aux_reg(ARC_REG_TLBPD0, address | pd0_flags | asid_or_sasid);
-
-	/* Load remaining info in PD1 (Page Frame Addr and Kx/Kw/Kr Flags) */
-	write_aux_reg(ARC_REG_TLBPD1, (pte_val(*ptep) & PTE_BITS_IN_PD1));
-
-	/* First verify if entry for this vaddr+ASID already exists */
-	write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe);
-	idx = read_aux_reg(ARC_REG_TLBINDEX);
+	pd0 = address | asid_or_sasid | (pte_val(*ptep) & PTE_BITS_IN_PD0);
 
 	/*
-	 * If Not already present get a free slot from MMU.
-	 * Otherwise, Probe would have located the entry and set INDEX Reg
-	 * with existing location. This will cause Write CMD to over-write
-	 * existing entry with new PD0 and PD1
+	 * ARC MMU provides fully orthogonal access bits for K/U mode,
+	 * however Linux only saves 1 set to save PTE real-estate
+	 * Here we convert 3 PTE bits into 6 MMU bits:
+	 * -Kernel only entries have Kr Kw Kx 0 0 0
+	 * -User entries have mirrored K and U bits
 	 */
-	if (likely(idx & TLB_LKUP_ERR))
-		write_aux_reg(ARC_REG_TLBCOMMAND, TLBGetIndex);
+	rwx = pte_val(*ptep) & PTE_BITS_RWX;
 
-	/*
-	 * Commit the Entry to MMU
-	 * It doesnt sound safe to use the TLBWriteNI cmd here
-	 * which doesn't flush uTLBs. I'd rather be safe than sorry.
-	 */
-	write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
+	if (pte_val(*ptep) & _PAGE_GLOBAL)
+		rwx <<= 3;		/* r w x => Kr Kw Kx 0 0 0 */
+	else
+		rwx |= (rwx << 3);	/* r w x => Kr Kw Kx Ur Uw Ux */
+
+	pd1 = rwx | (pte_val(*ptep) & PTE_BITS_NON_RWX_IN_PD1);
+
+	tlb_entry_insert(pd0, pd1);
 
 	local_irq_restore(flags);
 }
@@ -553,13 +569,6 @@
 	if (mmu->pg_sz != PAGE_SIZE)
 		panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE));
 
-	/*
-	 * ASID mgmt data structures are compile time init
-	 *  asid_cache = FIRST_ASID and asid_mm_map[] all zeroes
-	 */
-
-	local_flush_tlb_all();
-
 	/* Enable the MMU */
 	write_aux_reg(ARC_REG_PID, MMU_ENABLE);
 
@@ -671,25 +680,28 @@
  * Low Level ASM TLB handler calls this if it finds that HW and SW ASIDS
  * don't match
  */
-void print_asid_mismatch(int is_fast_path)
+void print_asid_mismatch(int mm_asid, int mmu_asid, int is_fast_path)
 {
-	int pid_sw, pid_hw;
-	pid_sw = current->active_mm->context.asid;
-	pid_hw = read_aux_reg(ARC_REG_PID) & 0xff;
-
 	pr_emerg("ASID Mismatch in %s Path Handler: sw-pid=0x%x hw-pid=0x%x\n",
-	       is_fast_path ? "Fast" : "Slow", pid_sw, pid_hw);
+	       is_fast_path ? "Fast" : "Slow", mm_asid, mmu_asid);
 
 	__asm__ __volatile__("flag 1");
 }
 
-void tlb_paranoid_check(unsigned int pid_sw, unsigned long addr)
+void tlb_paranoid_check(unsigned int mm_asid, unsigned long addr)
 {
-	unsigned int pid_hw;
+	unsigned int mmu_asid;
 
-	pid_hw = read_aux_reg(ARC_REG_PID) & 0xff;
+	mmu_asid = read_aux_reg(ARC_REG_PID) & 0xff;
 
-	if (addr < 0x70000000 && ((pid_hw != pid_sw) || (pid_sw == NO_ASID)))
-		print_asid_mismatch(0);
+	/*
+	 * At the time of a TLB miss/installation
+	 *   - HW version needs to match SW version
+	 *   - SW needs to have a valid ASID
+	 */
+	if (addr < 0x70000000 &&
+	    ((mm_asid == MM_CTXT_NO_ASID) ||
+	      (mmu_asid != (mm_asid & MM_CTXT_ASID_MASK))))
+		print_asid_mismatch(mm_asid, mmu_asid, 0);
 }
 #endif
diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S
index 5c5bb23..cf7d7d9 100644
--- a/arch/arc/mm/tlbex.S
+++ b/arch/arc/mm/tlbex.S
@@ -44,17 +44,36 @@
 #include <asm/arcregs.h>
 #include <asm/cache.h>
 #include <asm/processor.h>
-#if (CONFIG_ARC_MMU_VER == 1)
 #include <asm/tlb-mmu1.h>
-#endif
 
-;--------------------------------------------------------------------------
-; scratch memory to save the registers (r0-r3) used to code TLB refill Handler
-; For details refer to comments before TLBMISS_FREEUP_REGS below
+;-----------------------------------------------------------------
+; ARC700 Exception Handling doesn't auto-switch stack and it only provides
+; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0"
+;
+; For Non-SMP, the scratch AUX reg is repurposed to cache task PGD, so a
+; "global" is used to free-up FIRST core reg to be able to code the rest of
+; exception prologue (IRQ auto-disabled on Exceptions, so it's IRQ-safe).
+; Since the Fast Path TLB Miss handler is coded with 4 regs, the remaining 3
+; need to be saved as well by extending the "global" to be 4 words. Hence
+;	".size   ex_saved_reg1, 16"
+; [All of this dance is to avoid stack switching for each TLB Miss, since we
+; only need to save only a handful of regs, as opposed to complete reg file]
+;
+; For ARC700 SMP, the "global" obviously can't be used for free up the FIRST
+; core reg as it will not be SMP safe.
+; Thus scratch AUX reg is used (and no longer used to cache task PGD).
+; To save the rest of 3 regs - per cpu, the global is made "per-cpu".
+; Epilogue thus has to locate the "per-cpu" storage for regs.
+; To avoid cache line bouncing the per-cpu global is aligned/sized per
+; L1_CACHE_SHIFT, despite fundamentally needing to be 12 bytes only. Hence
+;	".size   ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)"
+
+; As simple as that....
 ;--------------------------------------------------------------------------
 
+; scratch memory to save [r0-r3] used to code TLB refill Handler
 ARCFP_DATA ex_saved_reg1
-	.align 1 << L1_CACHE_SHIFT	; IMP: Must be Cache Line aligned
+	.align 1 << L1_CACHE_SHIFT
 	.type   ex_saved_reg1, @object
 #ifdef CONFIG_SMP
 	.size   ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)
@@ -66,6 +85,44 @@
 	.zero 16
 #endif
 
+.macro TLBMISS_FREEUP_REGS
+#ifdef CONFIG_SMP
+	sr  r0, [ARC_REG_SCRATCH_DATA0]	; freeup r0 to code with
+	GET_CPU_ID  r0			; get to per cpu scratch mem,
+	lsl r0, r0, L1_CACHE_SHIFT	; cache line wide per cpu
+	add r0, @ex_saved_reg1, r0
+#else
+	st    r0, [@ex_saved_reg1]
+	mov_s r0, @ex_saved_reg1
+#endif
+	st_s  r1, [r0, 4]
+	st_s  r2, [r0, 8]
+	st_s  r3, [r0, 12]
+
+	; VERIFY if the ASID in MMU-PID Reg is same as
+	; one in Linux data structures
+
+	tlb_paranoid_check_asm
+.endm
+
+.macro TLBMISS_RESTORE_REGS
+#ifdef CONFIG_SMP
+	GET_CPU_ID  r0			; get to per cpu scratch mem
+	lsl r0, r0, L1_CACHE_SHIFT	; each is cache line wide
+	add r0, @ex_saved_reg1, r0
+	ld_s  r3, [r0,12]
+	ld_s  r2, [r0, 8]
+	ld_s  r1, [r0, 4]
+	lr    r0, [ARC_REG_SCRATCH_DATA0]
+#else
+	mov_s r0, @ex_saved_reg1
+	ld_s  r3, [r0,12]
+	ld_s  r2, [r0, 8]
+	ld_s  r1, [r0, 4]
+	ld_s  r0, [r0]
+#endif
+.endm
+
 ;============================================================================
 ;  Troubleshooting Stuff
 ;============================================================================
@@ -76,34 +133,35 @@
 ; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble.
 ; So we try to detect this in TLB Mis shandler
 
-
-.macro DBG_ASID_MISMATCH
+.macro tlb_paranoid_check_asm
 
 #ifdef CONFIG_ARC_DBG_TLB_PARANOIA
 
-	; make sure h/w ASID is same as s/w ASID
-
 	GET_CURR_TASK_ON_CPU  r3
 	ld r0, [r3, TASK_ACT_MM]
 	ld r0, [r0, MM_CTXT+MM_CTXT_ASID]
+	breq r0, 0, 55f	; Error if no ASID allocated
 
 	lr r1, [ARC_REG_PID]
 	and r1, r1, 0xFF
-	breq r1, r0, 5f
 
+	and r2, r0, 0xFF	; MMU PID bits only for comparison
+	breq r1, r2, 5f
+
+55:
 	; Error if H/w and S/w ASID don't match, but NOT if in kernel mode
-	lr  r0, [erstatus]
-	bbit0 r0, STATUS_U_BIT, 5f
+	lr  r2, [erstatus]
+	bbit0 r2, STATUS_U_BIT, 5f
 
 	; We sure are in troubled waters, Flag the error, but to do so
 	; need to switch to kernel mode stack to call error routine
 	GET_TSK_STACK_BASE   r3, sp
 
 	; Call printk to shoutout aloud
-	mov r0, 1
+	mov r2, 1
 	j print_asid_mismatch
 
-5:   ; ASIDs match so proceed normally
+5:	; ASIDs match so proceed normally
 	nop
 
 #endif
@@ -161,13 +219,17 @@
 ; IN: r0 = PTE, r1 = ptr to PTE
 
 .macro CONV_PTE_TO_TLB
-	and r3, r0, PTE_BITS_IN_PD1 ; Extract permission flags+PFN from PTE
-	sr  r3, [ARC_REG_TLBPD1]    ; these go in PD1
+	and    r3, r0, PTE_BITS_RWX	;       r w x
+	lsl    r2, r3, 3		; r w x 0 0 0
+	and.f  0,  r0, _PAGE_GLOBAL
+	or.z   r2, r2, r3		; r w x r w x
+
+	and r3, r0, PTE_BITS_NON_RWX_IN_PD1 ; Extract PFN+cache bits from PTE
+	or  r3, r3, r2
+
+	sr  r3, [ARC_REG_TLBPD1]    	; these go in PD1
 
 	and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb
-#if (CONFIG_ARC_MMU_VER <= 2)   /* Neednot be done with v3 onwards */
-	lsr r2, r2                  ; shift PTE flags to match layout in PD0
-#endif
 
 	lr  r3,[ARC_REG_TLBPD0]     ; MMU prepares PD0 with vaddr and asid
 
@@ -191,68 +253,6 @@
 #endif
 .endm
 
-;-----------------------------------------------------------------
-; ARC700 Exception Handling doesn't auto-switch stack and it only provides
-; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0"
-;
-; For Non-SMP, the scratch AUX reg is repurposed to cache task PGD, so a
-; "global" is used to free-up FIRST core reg to be able to code the rest of
-; exception prologue (IRQ auto-disabled on Exceptions, so it's IRQ-safe).
-; Since the Fast Path TLB Miss handler is coded with 4 regs, the remaining 3
-; need to be saved as well by extending the "global" to be 4 words. Hence
-;	".size   ex_saved_reg1, 16"
-; [All of this dance is to avoid stack switching for each TLB Miss, since we
-; only need to save only a handful of regs, as opposed to complete reg file]
-;
-; For ARC700 SMP, the "global" obviously can't be used for free up the FIRST
-; core reg as it will not be SMP safe.
-; Thus scratch AUX reg is used (and no longer used to cache task PGD).
-; To save the rest of 3 regs - per cpu, the global is made "per-cpu".
-; Epilogue thus has to locate the "per-cpu" storage for regs.
-; To avoid cache line bouncing the per-cpu global is aligned/sized per
-; L1_CACHE_SHIFT, despite fundamentally needing to be 12 bytes only. Hence
-;	".size   ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)"
-
-; As simple as that....
-
-.macro TLBMISS_FREEUP_REGS
-#ifdef CONFIG_SMP
-	sr  r0, [ARC_REG_SCRATCH_DATA0]	; freeup r0 to code with
-	GET_CPU_ID  r0			; get to per cpu scratch mem,
-	lsl r0, r0, L1_CACHE_SHIFT	; cache line wide per cpu
-	add r0, @ex_saved_reg1, r0
-#else
-	st    r0, [@ex_saved_reg1]
-	mov_s r0, @ex_saved_reg1
-#endif
-	st_s  r1, [r0, 4]
-	st_s  r2, [r0, 8]
-	st_s  r3, [r0, 12]
-
-	; VERIFY if the ASID in MMU-PID Reg is same as
-	; one in Linux data structures
-
-	DBG_ASID_MISMATCH
-.endm
-
-;-----------------------------------------------------------------
-.macro TLBMISS_RESTORE_REGS
-#ifdef CONFIG_SMP
-	GET_CPU_ID  r0			; get to per cpu scratch mem
-	lsl r0, r0, L1_CACHE_SHIFT	; each is cache line wide
-	add r0, @ex_saved_reg1, r0
-	ld_s  r3, [r0,12]
-	ld_s  r2, [r0, 8]
-	ld_s  r1, [r0, 4]
-	lr    r0, [ARC_REG_SCRATCH_DATA0]
-#else
-	mov_s r0, @ex_saved_reg1
-	ld_s  r3, [r0,12]
-	ld_s  r2, [r0, 8]
-	ld_s  r1, [r0, 4]
-	ld_s  r0, [r0]
-#endif
-.endm
 
 ARCFP_CODE	;Fast Path Code, candidate for ICCM
 
@@ -277,8 +277,8 @@
 	;----------------------------------------------------------------
 	; VERIFY_PTE: Check if PTE permissions approp for executing code
 	cmp_s   r2, VMALLOC_START
-	mov.lo  r2, (_PAGE_PRESENT | _PAGE_U_EXECUTE)
-	mov.hs  r2, (_PAGE_PRESENT | _PAGE_K_EXECUTE)
+	mov_s   r2, (_PAGE_PRESENT | _PAGE_EXECUTE)
+	or.hs   r2, r2, _PAGE_GLOBAL
 
 	and     r3, r0, r2  ; Mask out NON Flag bits from PTE
 	xor.f   r3, r3, r2  ; check ( ( pte & flags_test ) == flags_test )
@@ -317,26 +317,21 @@
 	;----------------------------------------------------------------
 	; VERIFY_PTE: Chk if PTE permissions approp for data access (R/W/R+W)
 
-	mov_s   r2, 0
+	cmp_s	r2, VMALLOC_START
+	mov_s   r2, _PAGE_PRESENT	; common bit for K/U PTE
+	or.hs	r2, r2, _PAGE_GLOBAL	; kernel PTE only
+
+	; Linux PTE [RWX] bits are semantically overloaded:
+	; -If PAGE_GLOBAL set, they refer to kernel-only flags (vmalloc)
+	; -Otherwise they are user-mode permissions, and those are exactly
+	;  same for kernel mode as well (e.g. copy_(to|from)_user)
+
 	lr      r3, [ecr]
 	btst_s  r3, ECR_C_BIT_DTLB_LD_MISS	; Read Access
-	or.nz   r2, r2, _PAGE_U_READ      	; chk for Read flag in PTE
+	or.nz   r2, r2, _PAGE_READ      	; chk for Read flag in PTE
 	btst_s  r3, ECR_C_BIT_DTLB_ST_MISS	; Write Access
-	or.nz   r2, r2, _PAGE_U_WRITE     	; chk for Write flag in PTE
-	; Above laddering takes care of XCHG access
-	;   which is both Read and Write
-
-	; If kernel mode access, ; make _PAGE_xx flags as _PAGE_K_xx
-	; For copy_(to|from)_user, despite exception taken in kernel mode,
-	; this code is not hit, because EFA would still be the user mode
-	; address (EFA < 0x6000_0000).
-	; This code is for legit kernel mode faults, vmalloc specifically
-	; (EFA: 0x7000_0000 to 0x7FFF_FFFF)
-
-	lr      r3, [efa]
-	cmp     r3, VMALLOC_START - 1   ; If kernel mode access
-	asl.hi  r2, r2, 3               ; make _PAGE_xx flags as _PAGE_K_xx
-	or      r2, r2, _PAGE_PRESENT   ; Common flag for K/U mode
+	or.nz   r2, r2, _PAGE_WRITE     	; chk for Write flag in PTE
+	; Above laddering takes care of XCHG access (both R and W)
 
 	; By now, r2 setup with all the Flags we need to check in PTE
 	and     r3, r0, r2              ; Mask out NON Flag bits from PTE
@@ -371,13 +366,7 @@
 
 	; Slow path TLB Miss handled as a regular ARC Exception
 	; (stack switching / save the complete reg-file).
-	; That requires freeing up r9
-	EXCPN_PROLOG_FREEUP_REG r9
-
-	lr  r9, [erstatus]
-
-	SWITCH_TO_KERNEL_STK
-	SAVE_ALL_SYS
+	EXCEPTION_PROLOGUE
 
 	; ------- setup args for Linux Page fault Hanlder ---------
 	mov_s r0, sp
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a00f4c1..c8a916f 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -557,6 +557,7 @@
 	select GENERIC_CLOCKEVENTS
 	select GPIO_PXA
 	select IRQ_DOMAIN
+	select MULTI_IRQ_HANDLER
 	select NEED_MACH_GPIO_H
 	select PINCTRL
 	select PLAT_PXA
diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index 000cf76..cc0f1fb 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -198,12 +198,16 @@
 	emev2-kzm9d-reference.dtb \
 	r8a7740-armadillo800eva.dtb \
 	r8a7778-bockw.dtb \
+	r8a7778-bockw-reference.dtb \
 	r8a7740-armadillo800eva-reference.dtb \
+	r8a7779-marzen.dtb \
 	r8a7779-marzen-reference.dtb \
 	r8a7790-lager.dtb \
+	r8a7790-lager-reference.dtb \
 	sh73a0-kzm9g.dtb \
 	sh73a0-kzm9g-reference.dtb \
 	r8a73a4-ape6evm.dtb \
+	r8a73a4-ape6evm-reference.dtb \
 	sh7372-mackerel.dtb
 dtb-$(CONFIG_ARCH_SHMOBILE_MULTI) += emev2-kzm9d-reference.dtb
 dtb-$(CONFIG_ARCH_SOCFPGA) += socfpga_cyclone5.dtb \
@@ -227,6 +231,7 @@
 	sun5i-a10s-olinuxino-micro.dtb \
 	sun5i-a13-olinuxino.dtb \
 	sun6i-a31-colombus.dtb \
+	sun7i-a20-cubieboard2.dtb \
 	sun7i-a20-olinuxino-micro.dtb
 dtb-$(CONFIG_ARCH_TEGRA) += tegra20-harmony.dtb \
 	tegra20-iris-512.dtb \
diff --git a/arch/arm/boot/dts/emev2-kzm9d-reference.dts b/arch/arm/boot/dts/emev2-kzm9d-reference.dts
index bed676b..cceefda 100644
--- a/arch/arm/boot/dts/emev2-kzm9d-reference.dts
+++ b/arch/arm/boot/dts/emev2-kzm9d-reference.dts
@@ -21,7 +21,7 @@
 	};
 
 	chosen {
-		bootargs = "console=ttyS1,115200n81 ignore_loglevel root=/dev/nfs ip=dhcp nfsroot=,rsize=4096,wsize=4096";
+		bootargs = "console=ttyS1,115200n81 ignore_loglevel root=/dev/nfs ip=dhcp";
 	};
 
 	reg_1p8v: regulator@0 {
diff --git a/arch/arm/boot/dts/emev2-kzm9d.dts b/arch/arm/boot/dts/emev2-kzm9d.dts
index dda13bc..f92e812 100644
--- a/arch/arm/boot/dts/emev2-kzm9d.dts
+++ b/arch/arm/boot/dts/emev2-kzm9d.dts
@@ -21,6 +21,6 @@
 	};
 
 	chosen {
-		bootargs = "console=ttyS1,115200n81 ignore_loglevel root=/dev/nfs ip=dhcp nfsroot=,rsize=4096,wsize=4096";
+		bootargs = "console=ttyS1,115200n81 ignore_loglevel root=/dev/nfs ip=dhcp";
 	};
 };
diff --git a/arch/arm/boot/dts/emev2.dtsi b/arch/arm/boot/dts/emev2.dtsi
index 99ad2b2..9063a443 100644
--- a/arch/arm/boot/dts/emev2.dtsi
+++ b/arch/arm/boot/dts/emev2.dtsi
@@ -46,6 +46,12 @@
 		      <0xe0020000 0x0100>;
 	};
 
+	pmu {
+		compatible = "arm,cortex-a9-pmu";
+		interrupts = <0 120 4>,
+			     <0 121 4>;
+	};
+
 	sti@e0180000 {
 		compatible = "renesas,em-sti";
 		reg = <0xe0180000 0x54>;
diff --git a/arch/arm/boot/dts/exynos4.dtsi b/arch/arm/boot/dts/exynos4.dtsi
index 93c2501..caadc02 100644
--- a/arch/arm/boot/dts/exynos4.dtsi
+++ b/arch/arm/boot/dts/exynos4.dtsi
@@ -448,6 +448,8 @@
 		compatible = "samsung,exynos4210-pwm";
 		reg = <0x139D0000 0x1000>;
 		interrupts = <0 37 0>, <0 38 0>, <0 39 0>, <0 40 0>, <0 41 0>;
+		clocks = <&clock 336>;
+		clock-names = "timers";
 		#pwm-cells = <2>;
 		status = "disabled";
 	};
diff --git a/arch/arm/boot/dts/exynos5.dtsi b/arch/arm/boot/dts/exynos5.dtsi
index 6afa57d..074739d 100644
--- a/arch/arm/boot/dts/exynos5.dtsi
+++ b/arch/arm/boot/dts/exynos5.dtsi
@@ -95,7 +95,7 @@
 		interrupts = <0 54 0>;
 	};
 
-	rtc {
+	rtc@101E0000 {
 		compatible = "samsung,s3c6410-rtc";
 		reg = <0x101E0000 0x100>;
 		interrupts = <0 43 0>, <0 44 0>;
diff --git a/arch/arm/boot/dts/exynos5250-arndale.dts b/arch/arm/boot/dts/exynos5250-arndale.dts
index 452d0b0..cee55fa 100644
--- a/arch/arm/boot/dts/exynos5250-arndale.dts
+++ b/arch/arm/boot/dts/exynos5250-arndale.dts
@@ -538,10 +538,6 @@
 		};
 	};
 
-	rtc {
-		status = "okay";
-	};
-
 	usb_hub_bus {
 		compatible = "simple-bus";
 		#address-cells = <1>;
diff --git a/arch/arm/boot/dts/exynos5250-snow.dts b/arch/arm/boot/dts/exynos5250-snow.dts
index e79331d..fd711e2 100644
--- a/arch/arm/boot/dts/exynos5250-snow.dts
+++ b/arch/arm/boot/dts/exynos5250-snow.dts
@@ -171,10 +171,6 @@
 		};
 	};
 
-	rtc {
-		status = "okay";
-	};
-
 	/*
 	 * On Snow we've got SIP WiFi and so can keep drive strengths low to
 	 * reduce EMI.
diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi
index f7e2d34..7d7cc77 100644
--- a/arch/arm/boot/dts/exynos5250.dtsi
+++ b/arch/arm/boot/dts/exynos5250.dtsi
@@ -180,9 +180,10 @@
 		clock-names = "mfc";
 	};
 
-	rtc {
+	rtc@101E0000 {
 		clocks = <&clock 337>;
 		clock-names = "rtc";
+		status = "okay";
 	};
 
 	tmu@10060000 {
@@ -638,4 +639,15 @@
 		clocks = <&clock 133>, <&clock 339>;
 		clock-names = "sclk_fimd", "fimd";
 	};
+
+	adc: adc@12D10000 {
+		compatible = "samsung,exynos-adc-v1";
+		reg = <0x12D10000 0x100>, <0x10040718 0x4>;
+		interrupts = <0 106 0>;
+		clocks = <&clock 303>;
+		clock-names = "adc";
+		#io-channel-cells = <1>;
+		io-channel-ranges;
+		status = "disabled";
+	};
 };
diff --git a/arch/arm/boot/dts/exynos5420.dtsi b/arch/arm/boot/dts/exynos5420.dtsi
index 5353e32..d537cd7 100644
--- a/arch/arm/boot/dts/exynos5420.dtsi
+++ b/arch/arm/boot/dts/exynos5420.dtsi
@@ -180,6 +180,12 @@
 		interrupts = <0 47 0>;
 	};
 
+	rtc@101E0000 {
+		clocks = <&clock 317>;
+		clock-names = "rtc";
+		status = "okay";
+	};
+
 	serial@12C00000 {
 		clocks = <&clock 257>, <&clock 128>;
 		clock-names = "uart", "clk_uart_baud0";
@@ -218,4 +224,15 @@
 		clocks = <&clock 147>, <&clock 421>;
 		clock-names = "sclk_fimd", "fimd";
 	};
+
+	adc: adc@12D10000 {
+		compatible = "samsung,exynos-adc-v2";
+		reg = <0x12D10000 0x100>, <0x10040720 0x4>;
+		interrupts = <0 106 0>;
+		clocks = <&clock 270>;
+		clock-names = "adc";
+		#io-channel-cells = <1>;
+		io-channel-ranges;
+		status = "disabled";
+	};
 };
diff --git a/arch/arm/boot/dts/r8a73a4-ape6evm-reference.dts b/arch/arm/boot/dts/r8a73a4-ape6evm-reference.dts
new file mode 100644
index 0000000..f444624
--- /dev/null
+++ b/arch/arm/boot/dts/r8a73a4-ape6evm-reference.dts
@@ -0,0 +1,65 @@
+/*
+ * Device Tree Source for the APE6EVM board
+ *
+ * Copyright (C) 2013 Renesas Solutions Corp.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+/dts-v1/;
+/include/ "r8a73a4.dtsi"
+
+/ {
+	model = "APE6EVM";
+	compatible = "renesas,ape6evm-reference", "renesas,r8a73a4";
+
+	chosen {
+		bootargs = "console=ttySC0,115200 ignore_loglevel rw";
+	};
+
+	memory@40000000 {
+		device_type = "memory";
+		reg = <0 0x40000000 0 0x40000000>;
+	};
+
+	lbsc {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges = <0 0 0 0x80000000>;
+	};
+};
+
+&i2c5 {
+	vdd_dvfs: max8973@1b {
+		compatible = "maxim,max8973";
+		reg = <0x1b>;
+
+		regulator-min-microvolt = <935000>;
+		regulator-max-microvolt = <1200000>;
+		regulator-boot-on;
+		regulator-always-on;
+	};
+};
+
+&cpu0 {
+	cpu0-supply = <&vdd_dvfs>;
+	operating-points = <
+		/* kHz  uV */
+		1950000 1115000
+		1462500  995000
+	>;
+	voltage-tolerance = <1>; /* 1% */
+};
+
+&pfc {
+	pinctrl-0 = <&scifa0_pins>;
+	pinctrl-names = "default";
+
+	scifa0_pins: scifa0 {
+		renesas,groups = "scifa0_data";
+		renesas,function = "scifa0";
+	};
+};
diff --git a/arch/arm/boot/dts/r8a73a4-ape6evm.dts b/arch/arm/boot/dts/r8a73a4-ape6evm.dts
index e657a9d..72f867e 100644
--- a/arch/arm/boot/dts/r8a73a4-ape6evm.dts
+++ b/arch/arm/boot/dts/r8a73a4-ape6evm.dts
@@ -16,7 +16,7 @@
 	compatible = "renesas,ape6evm", "renesas,r8a73a4";
 
 	chosen {
-		bootargs = "console=ttySC0,115200 ignore_loglevel root=/dev/nfs ip=dhcp";
+		bootargs = "console=ttySC0,115200 ignore_loglevel root=/dev/nfs ip=dhcp rw";
 	};
 
 	memory@40000000 {
diff --git a/arch/arm/boot/dts/r8a7740-armadillo800eva-reference.dts b/arch/arm/boot/dts/r8a7740-armadillo800eva-reference.dts
index 366f729..c638e4a 100644
--- a/arch/arm/boot/dts/r8a7740-armadillo800eva-reference.dts
+++ b/arch/arm/boot/dts/r8a7740-armadillo800eva-reference.dts
@@ -17,7 +17,7 @@
 	compatible = "renesas,armadillo800eva-reference", "renesas,r8a7740";
 
 	chosen {
-		bootargs = "console=tty0 console=ttySC1,115200 earlyprintk=sh-sci.1,115200 ignore_loglevel root=/dev/nfs ip=dhcp nfsroot=,rsize=4096,wsize=4096 rw";
+		bootargs = "console=tty0 console=ttySC1,115200 earlyprintk=sh-sci.1,115200 ignore_loglevel root=/dev/nfs ip=dhcp rw";
 	};
 
 	memory {
diff --git a/arch/arm/boot/dts/r8a7740-armadillo800eva.dts b/arch/arm/boot/dts/r8a7740-armadillo800eva.dts
index 93da655..426cd9c 100644
--- a/arch/arm/boot/dts/r8a7740-armadillo800eva.dts
+++ b/arch/arm/boot/dts/r8a7740-armadillo800eva.dts
@@ -16,7 +16,7 @@
 	compatible = "renesas,armadillo800eva";
 
 	chosen {
-		bootargs = "console=tty0 console=ttySC1,115200 earlyprintk=sh-sci.1,115200 ignore_loglevel root=/dev/nfs ip=dhcp nfsroot=,rsize=4096,wsize=4096 rw";
+		bootargs = "console=tty0 console=ttySC1,115200 earlyprintk=sh-sci.1,115200 ignore_loglevel root=/dev/nfs ip=dhcp rw";
 	};
 
 	memory {
diff --git a/arch/arm/boot/dts/r8a7740.dtsi b/arch/arm/boot/dts/r8a7740.dtsi
index e18a195..44d3d52 100644
--- a/arch/arm/boot/dts/r8a7740.dtsi
+++ b/arch/arm/boot/dts/r8a7740.dtsi
@@ -32,6 +32,11 @@
 		      <0xc2000000 0x1000>;
 	};
 
+	pmu {
+		compatible = "arm,cortex-a9-pmu";
+		interrupts = <0 83 4>;
+	};
+
 	/* irqpin0: IRQ0 - IRQ7 */
 	irqpin0: irqpin@e6900000 {
 		compatible = "renesas,intc-irqpin";
@@ -147,4 +152,11 @@
 		gpio-controller;
 		#gpio-cells = <2>;
 	};
+
+	tpu: pwm@e6600000 {
+		compatible = "renesas,tpu-r8a7740", "renesas,tpu";
+		reg = <0xe6600000 0x100>;
+		status = "disabled";
+		#pwm-cells = <3>;
+	};
 };
diff --git a/arch/arm/boot/dts/r8a7778-bockw-reference.dts b/arch/arm/boot/dts/r8a7778-bockw-reference.dts
new file mode 100644
index 0000000..9bb903a
--- /dev/null
+++ b/arch/arm/boot/dts/r8a7778-bockw-reference.dts
@@ -0,0 +1,32 @@
+/*
+ * Reference Device Tree Source for the Bock-W board
+ *
+ * Copyright (C) 2013  Renesas Solutions Corp.
+ * Copyright (C) 2013  Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+ *
+ * based on r8a7779
+ *
+ * Copyright (C) 2013 Renesas Solutions Corp.
+ * Copyright (C) 2013 Simon Horman
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+/dts-v1/;
+/include/ "r8a7778.dtsi"
+
+/ {
+	model = "bockw";
+	compatible = "renesas,bockw-reference", "renesas,r8a7778";
+
+	chosen {
+		bootargs = "console=ttySC0,115200 ignore_loglevel rw";
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x60000000 0x10000000>;
+	};
+};
diff --git a/arch/arm/boot/dts/r8a7778-bockw.dts b/arch/arm/boot/dts/r8a7778-bockw.dts
index 0076b1e..12bbebc 100644
--- a/arch/arm/boot/dts/r8a7778-bockw.dts
+++ b/arch/arm/boot/dts/r8a7778-bockw.dts
@@ -22,7 +22,7 @@
 	compatible = "renesas,bockw", "renesas,r8a7778";
 
 	chosen {
-		bootargs = "console=ttySC0,115200 ignore_loglevel ip=dhcp root=/dev/nfs";
+		bootargs = "console=ttySC0,115200 ignore_loglevel ip=dhcp root=/dev/nfs rw";
 	};
 
 	memory {
diff --git a/arch/arm/boot/dts/r8a7779-marzen-reference.dts b/arch/arm/boot/dts/r8a7779-marzen-reference.dts
index b64705b..6d55083 100644
--- a/arch/arm/boot/dts/r8a7779-marzen-reference.dts
+++ b/arch/arm/boot/dts/r8a7779-marzen-reference.dts
@@ -18,7 +18,7 @@
 	compatible = "renesas,marzen-reference", "renesas,r8a7779";
 
 	chosen {
-		bootargs = "console=ttySC2,115200 earlyprintk=sh-sci.2,115200 ignore_loglevel root=/dev/nfs ip=on";
+		bootargs = "console=ttySC2,115200 earlyprintk=sh-sci.2,115200 ignore_loglevel root=/dev/nfs ip=on rw";
 	};
 
 	memory {
diff --git a/arch/arm/boot/dts/r8a7779-marzen.dts b/arch/arm/boot/dts/r8a7779-marzen.dts
new file mode 100644
index 0000000..f3f7f79
--- /dev/null
+++ b/arch/arm/boot/dts/r8a7779-marzen.dts
@@ -0,0 +1,27 @@
+/*
+ * Device Tree Source for the Marzen board
+ *
+ * Copyright (C) 2013 Renesas Solutions Corp.
+ * Copyright (C) 2013 Simon Horman
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+/dts-v1/;
+/include/ "r8a7779.dtsi"
+
+/ {
+	model = "marzen";
+	compatible = "renesas,marzen", "renesas,r8a7779";
+
+	chosen {
+		bootargs = "console=ttySC2,115200 earlyprintk=sh-sci.2,115200 ignore_loglevel root=/dev/nfs ip=on";
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x60000000 0x40000000>;
+	};
+};
diff --git a/arch/arm/boot/dts/r8a7779.dtsi b/arch/arm/boot/dts/r8a7779.dtsi
index e9fbe3d..23a6244 100644
--- a/arch/arm/boot/dts/r8a7779.dtsi
+++ b/arch/arm/boot/dts/r8a7779.dtsi
@@ -149,7 +149,7 @@
 		sense-bitfield-width = <2>;
 	};
 
-	i2c0: i2c@0xffc70000 {
+	i2c0: i2c@ffc70000 {
 		#address-cells = <1>;
 		#size-cells = <0>;
 		compatible = "renesas,rmobile-iic";
@@ -158,7 +158,7 @@
 		interrupts = <0 79 0x4>;
 	};
 
-	i2c1: i2c@0xffc71000 {
+	i2c1: i2c@ffc71000 {
 		#address-cells = <1>;
 		#size-cells = <0>;
 		compatible = "renesas,rmobile-iic";
@@ -167,7 +167,7 @@
 		interrupts = <0 82 0x4>;
 	};
 
-	i2c2: i2c@0xffc72000 {
+	i2c2: i2c@ffc72000 {
 		#address-cells = <1>;
 		#size-cells = <0>;
 		compatible = "renesas,rmobile-iic";
@@ -176,7 +176,7 @@
 		interrupts = <0 80 0x4>;
 	};
 
-	i2c3: i2c@0xffc73000 {
+	i2c3: i2c@ffc73000 {
 		#address-cells = <1>;
 		#size-cells = <0>;
 		compatible = "renesas,rmobile-iic";
diff --git a/arch/arm/boot/dts/r8a7790-lager-reference.dts b/arch/arm/boot/dts/r8a7790-lager-reference.dts
new file mode 100644
index 0000000..c462ef1
--- /dev/null
+++ b/arch/arm/boot/dts/r8a7790-lager-reference.dts
@@ -0,0 +1,45 @@
+/*
+ * Device Tree Source for the Lager board
+ *
+ * Copyright (C) 2013 Renesas Solutions Corp.
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+/dts-v1/;
+/include/ "r8a7790.dtsi"
+#include <dt-bindings/gpio/gpio.h>
+
+/ {
+	model = "Lager";
+	compatible = "renesas,lager-reference", "renesas,r8a7790";
+
+	chosen {
+		bootargs = "console=ttySC6,115200 ignore_loglevel rw";
+	};
+
+	memory@40000000 {
+		device_type = "memory";
+		reg = <0 0x40000000 0 0x80000000>;
+	};
+
+	lbsc {
+		#address-cells = <1>;
+		#size-cells = <1>;
+	};
+
+	leds {
+		compatible = "gpio-leds";
+		led6 {
+			gpios = <&gpio4 22 GPIO_ACTIVE_HIGH>;
+		};
+		led7 {
+			gpios = <&gpio4 23 GPIO_ACTIVE_HIGH>;
+		};
+		led8 {
+			gpios = <&gpio5 17 GPIO_ACTIVE_HIGH>;
+		};
+	};
+};
diff --git a/arch/arm/boot/dts/r8a7790-lager.dts b/arch/arm/boot/dts/r8a7790-lager.dts
index 09a84fc..203bd08 100644
--- a/arch/arm/boot/dts/r8a7790-lager.dts
+++ b/arch/arm/boot/dts/r8a7790-lager.dts
@@ -16,7 +16,7 @@
 	compatible = "renesas,lager", "renesas,r8a7790";
 
 	chosen {
-		bootargs = "console=ttySC6,115200 ignore_loglevel";
+		bootargs = "console=ttySC6,115200 ignore_loglevel rw root=/dev/nfs ip=dhcp";
 	};
 
 	memory@40000000 {
diff --git a/arch/arm/boot/dts/sama5d3.dtsi b/arch/arm/boot/dts/sama5d3.dtsi
index ff63fbb..b7f4961 100644
--- a/arch/arm/boot/dts/sama5d3.dtsi
+++ b/arch/arm/boot/dts/sama5d3.dtsi
@@ -1034,21 +1034,30 @@
 			compatible = "atmel,at91rm9200-nand";
 			#address-cells = <1>;
 			#size-cells = <1>;
+			ranges;
 			reg = <	0x60000000 0x01000000	/* EBI CS3 */
 				0xffffc070 0x00000490	/* SMC PMECC regs */
 				0xffffc500 0x00000100	/* SMC PMECC Error Location regs */
-				0x00100000 0x00100000	/* ROM code */
-				0x70000000 0x10000000	/* NFC Command Registers */
-				0xffffc000 0x00000070	/* NFC HSMC regs */
-				0x00200000 0x00100000	/* NFC SRAM banks */
+				0x00110000 0x00018000	/* ROM code */
 				>;
 			interrupts = <5 IRQ_TYPE_LEVEL_HIGH 6>;
 			atmel,nand-addr-offset = <21>;
 			atmel,nand-cmd-offset = <22>;
 			pinctrl-names = "default";
 			pinctrl-0 = <&pinctrl_nand0_ale_cle>;
-			atmel,pmecc-lookup-table-offset = <0x10000 0x18000>;
+			atmel,pmecc-lookup-table-offset = <0x0 0x8000>;
 			status = "disabled";
+
+			nfc@70000000 {
+				compatible = "atmel,sama5d3-nfc";
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <
+					0x70000000 0x10000000	/* NFC Command Registers */
+					0xffffc000 0x00000070	/* NFC HSMC regs */
+					0x00200000 0x00100000	/* NFC SRAM banks */
+					>;
+			};
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/sama5d3xcm.dtsi b/arch/arm/boot/dts/sama5d3xcm.dtsi
index 1f80508..31ed9e3 100644
--- a/arch/arm/boot/dts/sama5d3xcm.dtsi
+++ b/arch/arm/boot/dts/sama5d3xcm.dtsi
@@ -47,8 +47,6 @@
 			atmel,has-pmecc;
 			atmel,pmecc-cap = <4>;
 			atmel,pmecc-sector-size = <512>;
-			atmel,has-nfc;
-			atmel,use-nfc-sram;
 			nand-on-flash-bbt;
 			status = "okay";
 
diff --git a/arch/arm/boot/dts/sh73a0-kzm9g-reference.dts b/arch/arm/boot/dts/sh73a0-kzm9g-reference.dts
index b99e890..2122306 100644
--- a/arch/arm/boot/dts/sh73a0-kzm9g-reference.dts
+++ b/arch/arm/boot/dts/sh73a0-kzm9g-reference.dts
@@ -33,7 +33,7 @@
 	};
 
 	chosen {
-		bootargs = "console=tty0 console=ttySC4,115200 root=/dev/nfs ip=dhcp ignore_loglevel earlyprintk=sh-sci.4,115200";
+		bootargs = "console=tty0 console=ttySC4,115200 root=/dev/nfs ip=dhcp ignore_loglevel earlyprintk=sh-sci.4,115200 rw";
 	};
 
 	memory {
diff --git a/arch/arm/boot/dts/sh73a0-kzm9g.dts b/arch/arm/boot/dts/sh73a0-kzm9g.dts
index 7c4071e..0f1ca77 100644
--- a/arch/arm/boot/dts/sh73a0-kzm9g.dts
+++ b/arch/arm/boot/dts/sh73a0-kzm9g.dts
@@ -16,7 +16,7 @@
 	compatible = "renesas,kzm9g", "renesas,sh73a0";
 
 	chosen {
-		bootargs = "console=tty0 console=ttySC4,115200 root=/dev/nfs ip=dhcp ignore_loglevel earlyprintk=sh-sci.4,115200";
+		bootargs = "console=tty0 console=ttySC4,115200 root=/dev/nfs ip=dhcp ignore_loglevel earlyprintk=sh-sci.4,115200 rw";
 	};
 
 	memory {
diff --git a/arch/arm/boot/dts/sh73a0.dtsi b/arch/arm/boot/dts/sh73a0.dtsi
index 86e79fe..ba59a58 100644
--- a/arch/arm/boot/dts/sh73a0.dtsi
+++ b/arch/arm/boot/dts/sh73a0.dtsi
@@ -38,6 +38,12 @@
 		      <0xf0000100 0x100>;
 	};
 
+	pmu {
+		compatible = "arm,cortex-a9-pmu";
+		interrupts = <0 55 4>,
+			     <0 56 4>;
+	};
+
 	irqpin0: irqpin@e6900000 {
 		compatible = "renesas,intc-irqpin";
 		#interrupt-cells = <2>;
diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi
index ee0ff9b..3b4a057 100644
--- a/arch/arm/boot/dts/sun5i-a10s.dtsi
+++ b/arch/arm/boot/dts/sun5i-a10s.dtsi
@@ -95,20 +95,16 @@
 
 		ahb_gates: ahb_gates@01c20060 {
 			#clock-cells = <1>;
-			compatible = "allwinner,sun4i-ahb-gates-clk";
+			compatible = "allwinner,sun5i-a10s-ahb-gates-clk";
 			reg = <0x01c20060 0x8>;
 			clocks = <&ahb>;
-			clock-output-names = "ahb_usb0", "ahb_ehci0",
-				"ahb_ohci0", "ahb_ehci1", "ahb_ohci1", "ahb_ss",
-				"ahb_dma", "ahb_bist", "ahb_mmc0", "ahb_mmc1",
-				"ahb_mmc2", "ahb_mmc3", "ahb_ms", "ahb_nand",
-				"ahb_sdram", "ahb_ace",	"ahb_emac", "ahb_ts",
-				"ahb_spi0", "ahb_spi1", "ahb_spi2", "ahb_spi3",
-				"ahb_pata", "ahb_sata", "ahb_gps", "ahb_ve",
-				"ahb_tvd", "ahb_tve0", "ahb_tve1", "ahb_lcd0",
-				"ahb_lcd1", "ahb_csi0", "ahb_csi1", "ahb_hdmi",
-				"ahb_de_be0", "ahb_de_be1", "ahb_de_fe0",
-				"ahb_de_fe1", "ahb_mp", "ahb_mali400";
+			clock-output-names = "ahb_usbotg", "ahb_ehci", "ahb_ohci",
+				"ahb_ss", "ahb_dma", "ahb_bist", "ahb_mmc0",
+				"ahb_mmc1", "ahb_mmc2", "ahb_nand", "ahb_sdram",
+				"ahb_emac", "ahb_ts", "ahb_spi0", "ahb_spi1",
+				"ahb_spi2", "ahb_gps", "ahb_stimer", "ahb_ve",
+				"ahb_tve", "ahb_lcd", "ahb_csi", "ahb_hdmi",
+				"ahb_de_be", "ahb_de_fe", "ahb_iep", "ahb_mali400";
 		};
 
 		apb0: apb0@01c20054 {
@@ -120,12 +116,11 @@
 
 		apb0_gates: apb0_gates@01c20068 {
 			#clock-cells = <1>;
-			compatible = "allwinner,sun4i-apb0-gates-clk";
+			compatible = "allwinner,sun5i-a10s-apb0-gates-clk";
 			reg = <0x01c20068 0x4>;
 			clocks = <&apb0>;
-			clock-output-names = "apb0_codec", "apb0_spdif",
-				"apb0_ac97", "apb0_iis", "apb0_pio", "apb0_ir0",
-				"apb0_ir1", "apb0_keypad";
+			clock-output-names = "apb0_codec", "apb0_iis", "apb0_pio",
+				"apb0_ir", "apb0_keypad";
 		};
 
 		/* dummy is pll62 */
@@ -145,15 +140,12 @@
 
 		apb1_gates: apb1_gates@01c2006c {
 			#clock-cells = <1>;
-			compatible = "allwinner,sun4i-apb1-gates-clk";
+			compatible = "allwinner,sun5i-a10s-apb1-gates-clk";
 			reg = <0x01c2006c 0x4>;
 			clocks = <&apb1>;
 			clock-output-names = "apb1_i2c0", "apb1_i2c1",
-				"apb1_i2c2", "apb1_can", "apb1_scr",
-				"apb1_ps20", "apb1_ps21", "apb1_uart0",
-				"apb1_uart1", "apb1_uart2", "apb1_uart3",
-				"apb1_uart4", "apb1_uart5", "apb1_uart6",
-				"apb1_uart7";
+				"apb1_i2c2", "apb1_uart0", "apb1_uart1",
+				"apb1_uart2", "apb1_uart3";
 		};
 	};
 
diff --git a/arch/arm/boot/dts/sun6i-a31-colombus.dts b/arch/arm/boot/dts/sun6i-a31-colombus.dts
index 99c4b18..e5adae3 100644
--- a/arch/arm/boot/dts/sun6i-a31-colombus.dts
+++ b/arch/arm/boot/dts/sun6i-a31-colombus.dts
@@ -24,6 +24,8 @@
 
 	soc@01c00000 {
 		uart0: serial@01c28000 {
+			pinctrl-names = "default";
+			pinctrl-0 = <&uart0_pins_a>;
 			status = "okay";
 		};
 	};
diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi
index 4d076ec..f244f5f 100644
--- a/arch/arm/boot/dts/sun6i-a31.dtsi
+++ b/arch/arm/boot/dts/sun6i-a31.dtsi
@@ -51,13 +51,137 @@
 
 	clocks {
 		#address-cells = <1>;
-		#size-cells = <0>;
+		#size-cells = <1>;
+		ranges;
 
-		osc: oscillator {
+		osc24M: osc24M {
 			#clock-cells = <0>;
 			compatible = "fixed-clock";
 			clock-frequency = <24000000>;
 		};
+
+		osc32k: osc32k {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <32768>;
+		};
+
+		pll1: pll1@01c20000 {
+			#clock-cells = <0>;
+			compatible = "allwinner,sun6i-a31-pll1-clk";
+			reg = <0x01c20000 0x4>;
+			clocks = <&osc24M>;
+		};
+
+		/*
+		 * This is a dummy clock, to be used as placeholder on
+		 * other mux clocks when a specific parent clock is not
+		 * yet implemented. It should be dropped when the driver
+		 * is complete.
+		 */
+		pll6: pll6 {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <0>;
+		};
+
+		cpu: cpu@01c20050 {
+			#clock-cells = <0>;
+			compatible = "allwinner,sun4i-cpu-clk";
+			reg = <0x01c20050 0x4>;
+
+			/*
+			 * PLL1 is listed twice here.
+			 * While it looks suspicious, it's actually documented
+			 * that way both in the datasheet and in the code from
+			 * Allwinner.
+			 */
+			clocks = <&osc32k>, <&osc24M>, <&pll1>, <&pll1>;
+		};
+
+		axi: axi@01c20050 {
+			#clock-cells = <0>;
+			compatible = "allwinner,sun4i-axi-clk";
+			reg = <0x01c20050 0x4>;
+			clocks = <&cpu>;
+		};
+
+		ahb1_mux: ahb1_mux@01c20054 {
+			#clock-cells = <0>;
+			compatible = "allwinner,sun6i-a31-ahb1-mux-clk";
+			reg = <0x01c20054 0x4>;
+			clocks = <&osc32k>, <&osc24M>, <&axi>, <&pll6>;
+		};
+
+		ahb1: ahb1@01c20054 {
+			#clock-cells = <0>;
+			compatible = "allwinner,sun4i-ahb-clk";
+			reg = <0x01c20054 0x4>;
+			clocks = <&ahb1_mux>;
+		};
+
+		ahb1_gates: ahb1_gates@01c20060 {
+			#clock-cells = <1>;
+			compatible = "allwinner,sun6i-a31-ahb1-gates-clk";
+			reg = <0x01c20060 0x8>;
+			clocks = <&ahb1>;
+			clock-output-names = "ahb1_mipidsi", "ahb1_ss",
+					"ahb1_dma", "ahb1_mmc0", "ahb1_mmc1",
+					"ahb1_mmc2", "ahb1_mmc3", "ahb1_nand1",
+					"ahb1_nand0", "ahb1_sdram",
+					"ahb1_gmac", "ahb1_ts", "ahb1_hstimer",
+					"ahb1_spi0", "ahb1_spi1", "ahb1_spi2",
+					"ahb1_spi3", "ahb1_otg", "ahb1_ehci0",
+					"ahb1_ehci1", "ahb1_ohci0",
+					"ahb1_ohci1", "ahb1_ohci2", "ahb1_ve",
+					"ahb1_lcd0", "ahb1_lcd1", "ahb1_csi",
+					"ahb1_hdmi", "ahb1_de0", "ahb1_de1",
+					"ahb1_fe0", "ahb1_fe1", "ahb1_mp",
+					"ahb1_gpu", "ahb1_deu0", "ahb1_deu1",
+					"ahb1_drc0", "ahb1_drc1";
+		};
+
+		apb1: apb1@01c20054 {
+			#clock-cells = <0>;
+			compatible = "allwinner,sun4i-apb0-clk";
+			reg = <0x01c20054 0x4>;
+			clocks = <&ahb1>;
+		};
+
+		apb1_gates: apb1_gates@01c20060 {
+			#clock-cells = <1>;
+			compatible = "allwinner,sun6i-a31-apb1-gates-clk";
+			reg = <0x01c20068 0x4>;
+			clocks = <&apb1>;
+			clock-output-names = "apb1_codec", "apb1_digital_mic",
+					"apb1_pio", "apb1_daudio0",
+					"apb1_daudio1";
+		};
+
+		apb2_mux: apb2_mux@01c20058 {
+			#clock-cells = <0>;
+			compatible = "allwinner,sun4i-apb1-mux-clk";
+			reg = <0x01c20058 0x4>;
+			clocks = <&osc32k>, <&osc24M>, <&pll6>, <&pll6>;
+		};
+
+		apb2: apb2@01c20058 {
+			#clock-cells = <0>;
+			compatible = "allwinner,sun6i-a31-apb2-div-clk";
+			reg = <0x01c20058 0x4>;
+			clocks = <&apb2_mux>;
+		};
+
+		apb2_gates: apb2_gates@01c2006c {
+			#clock-cells = <1>;
+			compatible = "allwinner,sun6i-a31-apb2-gates-clk";
+			reg = <0x01c2006c 0x8>;
+			clocks = <&apb2>;
+			clock-output-names = "apb2_i2c0", "apb2_i2c1",
+					"apb2_i2c2", "apb2_i2c3", "apb2_uart0",
+					"apb2_uart1", "apb2_uart2", "apb2_uart3",
+					"apb2_uart4", "apb2_uart5";
+		};
 	};
 
 	soc@01c00000 {
@@ -66,6 +190,25 @@
 		#size-cells = <1>;
 		ranges;
 
+		pio: pinctrl@01c20800 {
+			compatible = "allwinner,sun6i-a31-pinctrl";
+			reg = <0x01c20800 0x400>;
+			interrupts = <0 11 1>, <0 15 1>, <0 16 1>, <0 17 1>;
+			clocks = <&apb1_gates 5>;
+			gpio-controller;
+			interrupt-controller;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			#gpio-cells = <3>;
+
+			uart0_pins_a: uart0@0 {
+				allwinner,pins = "PH20", "PH21";
+				allwinner,function = "uart0";
+				allwinner,drive = <0>;
+				allwinner,pull = <0>;
+			};
+		};
+
 		timer@01c20c00 {
 			compatible = "allwinner,sun4i-timer";
 			reg = <0x01c20c00 0xa0>;
@@ -74,7 +217,7 @@
 				     <0 20 1>,
 				     <0 21 1>,
 				     <0 22 1>;
-			clocks = <&osc>;
+			clocks = <&osc24M>;
 		};
 
 		wdt1: watchdog@01c20ca0 {
@@ -88,7 +231,7 @@
 			interrupts = <0 0 1>;
 			reg-shift = <2>;
 			reg-io-width = <4>;
-			clocks = <&osc>;
+			clocks = <&apb2_gates 16>;
 			status = "disabled";
 		};
 
@@ -98,7 +241,7 @@
 			interrupts = <0 1 1>;
 			reg-shift = <2>;
 			reg-io-width = <4>;
-			clocks = <&osc>;
+			clocks = <&apb2_gates 17>;
 			status = "disabled";
 		};
 
@@ -108,7 +251,7 @@
 			interrupts = <0 2 1>;
 			reg-shift = <2>;
 			reg-io-width = <4>;
-			clocks = <&osc>;
+			clocks = <&apb2_gates 18>;
 			status = "disabled";
 		};
 
@@ -118,7 +261,7 @@
 			interrupts = <0 3 1>;
 			reg-shift = <2>;
 			reg-io-width = <4>;
-			clocks = <&osc>;
+			clocks = <&apb2_gates 19>;
 			status = "disabled";
 		};
 
@@ -128,7 +271,7 @@
 			interrupts = <0 4 1>;
 			reg-shift = <2>;
 			reg-io-width = <4>;
-			clocks = <&osc>;
+			clocks = <&apb2_gates 20>;
 			status = "disabled";
 		};
 
@@ -138,7 +281,7 @@
 			interrupts = <0 5 1>;
 			reg-shift = <2>;
 			reg-io-width = <4>;
-			clocks = <&osc>;
+			clocks = <&apb2_gates 21>;
 			status = "disabled";
 		};
 
diff --git a/arch/arm/boot/dts/sun7i-a20-cubieboard2.dts b/arch/arm/boot/dts/sun7i-a20-cubieboard2.dts
new file mode 100644
index 0000000..31b76f0
--- /dev/null
+++ b/arch/arm/boot/dts/sun7i-a20-cubieboard2.dts
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2013 Maxime Ripard
+ *
+ * Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+/dts-v1/;
+/include/ "sun7i-a20.dtsi"
+
+/ {
+	model = "Cubietech Cubieboard2";
+	compatible = "cubietech,cubieboard2", "allwinner,sun7i-a20";
+
+	soc@01c00000 {
+		pinctrl@01c20800 {
+			led_pins_cubieboard2: led_pins@0 {
+				allwinner,pins = "PH20", "PH21";
+				allwinner,function = "gpio_out";
+				allwinner,drive = <0>;
+				allwinner,pull = <0>;
+			};
+		};
+
+		uart0: serial@01c28000 {
+			pinctrl-names = "default";
+			pinctrl-0 = <&uart0_pins_a>;
+			status = "okay";
+		};
+	};
+
+	leds {
+		compatible = "gpio-leds";
+		pinctrl-names = "default";
+		pinctrl-0 = <&led_pins_cubieboard2>;
+
+		blue {
+			label = "cubieboard2:blue:usr";
+			gpios = <&pio 7 21 0>;
+		};
+
+		green {
+			label = "cubieboard2:green:usr";
+			gpios = <&pio 7 20 0>;
+		};
+	};
+};
diff --git a/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts b/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts
index d339584..34a6c02 100644
--- a/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts
+++ b/arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts
@@ -19,16 +19,43 @@
 	compatible = "olimex,a20-olinuxino-micro", "allwinner,sun7i-a20";
 
 	soc@01c00000 {
+		pinctrl@01c20800 {
+			led_pins_olinuxino: led_pins@0 {
+				allwinner,pins = "PH2";
+				allwinner,function = "gpio_out";
+				allwinner,drive = <1>;
+				allwinner,pull = <0>;
+			};
+		};
+
 		uart0: serial@01c28000 {
+			pinctrl-names = "default";
+			pinctrl-0 = <&uart0_pins_a>;
 			status = "okay";
 		};
 
 		uart6: serial@01c29800 {
+			pinctrl-names = "default";
+			pinctrl-0 = <&uart6_pins_a>;
 			status = "okay";
 		};
 
 		uart7: serial@01c29c00 {
+			pinctrl-names = "default";
+			pinctrl-0 = <&uart7_pins_a>;
 			status = "okay";
 		};
 	};
+
+	leds {
+		compatible = "gpio-leds";
+		pinctrl-names = "default";
+		pinctrl-0 = <&led_pins_olinuxino>;
+
+		green {
+			label = "a20-olinuxino-micro:green:usr";
+			gpios = <&pio 7 2 0>;
+			default-state = "on";
+		};
+	};
 };
diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi
index 3339151..999ff45 100644
--- a/arch/arm/boot/dts/sun7i-a20.dtsi
+++ b/arch/arm/boot/dts/sun7i-a20.dtsi
@@ -44,7 +44,8 @@
 
 		osc24M: osc24M@01c20050 {
 			#clock-cells = <0>;
-			compatible = "fixed-clock";
+			compatible = "allwinner,sun4i-osc-clk";
+			reg = <0x01c20050 0x4>;
 			clock-frequency = <24000000>;
 		};
 
@@ -53,6 +54,111 @@
 			compatible = "fixed-clock";
 			clock-frequency = <32768>;
 		};
+
+		pll1: pll1@01c20000 {
+			#clock-cells = <0>;
+			compatible = "allwinner,sun4i-pll1-clk";
+			reg = <0x01c20000 0x4>;
+			clocks = <&osc24M>;
+		};
+
+		/*
+		 * This is a dummy clock, to be used as placeholder on
+		 * other mux clocks when a specific parent clock is not
+		 * yet implemented. It should be dropped when the driver
+		 * is complete.
+		 */
+		pll6: pll6 {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <0>;
+		};
+
+		cpu: cpu@01c20054 {
+			#clock-cells = <0>;
+			compatible = "allwinner,sun4i-cpu-clk";
+			reg = <0x01c20054 0x4>;
+			clocks = <&osc32k>, <&osc24M>, <&pll1>, <&pll6>;
+		};
+
+		axi: axi@01c20054 {
+			#clock-cells = <0>;
+			compatible = "allwinner,sun4i-axi-clk";
+			reg = <0x01c20054 0x4>;
+			clocks = <&cpu>;
+		};
+
+		ahb: ahb@01c20054 {
+			#clock-cells = <0>;
+			compatible = "allwinner,sun4i-ahb-clk";
+			reg = <0x01c20054 0x4>;
+			clocks = <&axi>;
+		};
+
+		ahb_gates: ahb_gates@01c20060 {
+			#clock-cells = <1>;
+			compatible = "allwinner,sun7i-a20-ahb-gates-clk";
+			reg = <0x01c20060 0x8>;
+			clocks = <&ahb>;
+			clock-output-names = "ahb_usb0", "ahb_ehci0",
+				"ahb_ohci0", "ahb_ehci1", "ahb_ohci1",
+				"ahb_ss", "ahb_dma", "ahb_bist", "ahb_mmc0",
+				"ahb_mmc1", "ahb_mmc2", "ahb_mmc3", "ahb_ms",
+				"ahb_nand", "ahb_sdram", "ahb_ace",
+				"ahb_emac", "ahb_ts", "ahb_spi0", "ahb_spi1",
+				"ahb_spi2", "ahb_spi3", "ahb_sata",
+				"ahb_hstimer", "ahb_ve", "ahb_tvd", "ahb_tve0",
+				"ahb_tve1", "ahb_lcd0", "ahb_lcd1", "ahb_csi0",
+				"ahb_csi1", "ahb_hdmi1", "ahb_hdmi0",
+				"ahb_de_be0", "ahb_de_be1", "ahb_de_fe0",
+				"ahb_de_fe1", "ahb_gmac", "ahb_mp",
+				"ahb_mali";
+		};
+
+		apb0: apb0@01c20054 {
+			#clock-cells = <0>;
+			compatible = "allwinner,sun4i-apb0-clk";
+			reg = <0x01c20054 0x4>;
+			clocks = <&ahb>;
+		};
+
+		apb0_gates: apb0_gates@01c20068 {
+			#clock-cells = <1>;
+			compatible = "allwinner,sun7i-a20-apb0-gates-clk";
+			reg = <0x01c20068 0x4>;
+			clocks = <&apb0>;
+			clock-output-names = "apb0_codec", "apb0_spdif",
+				"apb0_ac97", "apb0_iis0", "apb0_iis1",
+				"apb0_pio", "apb0_ir0", "apb0_ir1",
+				"apb0_iis2", "apb0_keypad";
+		};
+
+		apb1_mux: apb1_mux@01c20058 {
+			#clock-cells = <0>;
+			compatible = "allwinner,sun4i-apb1-mux-clk";
+			reg = <0x01c20058 0x4>;
+			clocks = <&osc24M>, <&pll6>, <&osc32k>;
+		};
+
+		apb1: apb1@01c20058 {
+			#clock-cells = <0>;
+			compatible = "allwinner,sun4i-apb1-clk";
+			reg = <0x01c20058 0x4>;
+			clocks = <&apb1_mux>;
+		};
+
+		apb1_gates: apb1_gates@01c2006c {
+			#clock-cells = <1>;
+			compatible = "allwinner,sun7i-a20-apb1-gates-clk";
+			reg = <0x01c2006c 0x4>;
+			clocks = <&apb1>;
+			clock-output-names = "apb1_i2c0", "apb1_i2c1",
+				"apb1_i2c2", "apb1_i2c3", "apb1_can",
+				"apb1_scr", "apb1_ps20", "apb1_ps21",
+				"apb1_i2c4", "apb1_uart0", "apb1_uart1",
+				"apb1_uart2", "apb1_uart3", "apb1_uart4",
+				"apb1_uart5", "apb1_uart6", "apb1_uart7";
+		};
 	};
 
 	soc@01c00000 {
@@ -61,6 +167,39 @@
 		#size-cells = <1>;
 		ranges;
 
+		pio: pinctrl@01c20800 {
+			compatible = "allwinner,sun7i-a20-pinctrl";
+			reg = <0x01c20800 0x400>;
+			interrupts = <0 28 1>;
+			clocks = <&apb0_gates 5>;
+			gpio-controller;
+			interrupt-controller;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			#gpio-cells = <3>;
+
+			uart0_pins_a: uart0@0 {
+				allwinner,pins = "PB22", "PB23";
+				allwinner,function = "uart0";
+				allwinner,drive = <0>;
+				allwinner,pull = <0>;
+			};
+
+			uart6_pins_a: uart6@0 {
+				allwinner,pins = "PI12", "PI13";
+				allwinner,function = "uart6";
+				allwinner,drive = <0>;
+				allwinner,pull = <0>;
+			};
+
+			uart7_pins_a: uart7@0 {
+				allwinner,pins = "PI20", "PI21";
+				allwinner,function = "uart7";
+				allwinner,drive = <0>;
+				allwinner,pull = <0>;
+			};
+		};
+
 		timer@01c20c00 {
 			compatible = "allwinner,sun4i-timer";
 			reg = <0x01c20c00 0x90>;
@@ -84,7 +223,7 @@
 			interrupts = <0 1 1>;
 			reg-shift = <2>;
 			reg-io-width = <4>;
-			clocks = <&osc24M>;
+			clocks = <&apb1_gates 16>;
 			status = "disabled";
 		};
 
@@ -94,7 +233,7 @@
 			interrupts = <0 2 1>;
 			reg-shift = <2>;
 			reg-io-width = <4>;
-			clocks = <&osc24M>;
+			clocks = <&apb1_gates 17>;
 			status = "disabled";
 		};
 
@@ -104,7 +243,7 @@
 			interrupts = <0 3 1>;
 			reg-shift = <2>;
 			reg-io-width = <4>;
-			clocks = <&osc24M>;
+			clocks = <&apb1_gates 18>;
 			status = "disabled";
 		};
 
@@ -114,7 +253,7 @@
 			interrupts = <0 4 1>;
 			reg-shift = <2>;
 			reg-io-width = <4>;
-			clocks = <&osc24M>;
+			clocks = <&apb1_gates 19>;
 			status = "disabled";
 		};
 
@@ -124,7 +263,7 @@
 			interrupts = <0 17 1>;
 			reg-shift = <2>;
 			reg-io-width = <4>;
-			clocks = <&osc24M>;
+			clocks = <&apb1_gates 20>;
 			status = "disabled";
 		};
 
@@ -134,7 +273,7 @@
 			interrupts = <0 18 1>;
 			reg-shift = <2>;
 			reg-io-width = <4>;
-			clocks = <&osc24M>;
+			clocks = <&apb1_gates 21>;
 			status = "disabled";
 		};
 
@@ -144,7 +283,7 @@
 			interrupts = <0 19 1>;
 			reg-shift = <2>;
 			reg-io-width = <4>;
-			clocks = <&osc24M>;
+			clocks = <&apb1_gates 22>;
 			status = "disabled";
 		};
 
@@ -154,7 +293,7 @@
 			interrupts = <0 20 1>;
 			reg-shift = <2>;
 			reg-io-width = <4>;
-			clocks = <&osc24M>;
+			clocks = <&apb1_gates 23>;
 			status = "disabled";
 		};
 
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
index 759b0cd..15f98cb 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
@@ -37,30 +37,35 @@
 			device_type = "cpu";
 			compatible = "arm,cortex-a15";
 			reg = <0>;
+			cci-control-port = <&cci_control1>;
 		};
 
 		cpu1: cpu@1 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a15";
 			reg = <1>;
+			cci-control-port = <&cci_control1>;
 		};
 
 		cpu2: cpu@2 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a7";
 			reg = <0x100>;
+			cci-control-port = <&cci_control2>;
 		};
 
 		cpu3: cpu@3 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a7";
 			reg = <0x101>;
+			cci-control-port = <&cci_control2>;
 		};
 
 		cpu4: cpu@4 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a7";
 			reg = <0x102>;
+			cci-control-port = <&cci_control2>;
 		};
 	};
 
@@ -104,6 +109,26 @@
 		interrupts = <1 9 0xf04>;
 	};
 
+	cci@2c090000 {
+		compatible = "arm,cci-400";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		reg = <0 0x2c090000 0 0x1000>;
+		ranges = <0x0 0x0 0x2c090000 0x10000>;
+
+		cci_control1: slave-if@4000 {
+			compatible = "arm,cci-400-ctrl-if";
+			interface-type = "ace";
+			reg = <0x4000 0x1000>;
+		};
+
+		cci_control2: slave-if@5000 {
+			compatible = "arm,cci-400-ctrl-if";
+			interface-type = "ace";
+			reg = <0x5000 0x1000>;
+		};
+	};
+
 	memory-controller@7ffd0000 {
 		compatible = "arm,pl354", "arm,primecell";
 		reg = <0 0x7ffd0000 0 0x1000>;
diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index 39ad030..117f955 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -1235,6 +1235,23 @@
 }
 EXPORT_SYMBOL(edma_resume);
 
+int edma_trigger_channel(unsigned channel)
+{
+	unsigned ctlr;
+	unsigned int mask;
+
+	ctlr = EDMA_CTLR(channel);
+	channel = EDMA_CHAN_SLOT(channel);
+	mask = BIT(channel & 0x1f);
+
+	edma_shadow0_write_array(ctlr, SH_ESR, (channel >> 5), mask);
+
+	pr_debug("EDMA: ESR%d %08x\n", (channel >> 5),
+		 edma_shadow0_read_array(ctlr, SH_ESR, (channel >> 5)));
+	return 0;
+}
+EXPORT_SYMBOL(edma_trigger_channel);
+
 /**
  * edma_start - start dma on a channel
  * @channel: channel being activated
diff --git a/arch/arm/configs/ag5evm_defconfig b/arch/arm/configs/ag5evm_defconfig
deleted file mode 100644
index 212ead3..0000000
--- a/arch/arm/configs/ag5evm_defconfig
+++ /dev/null
@@ -1,83 +0,0 @@
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=16
-CONFIG_NAMESPACES=y
-# CONFIG_UTS_NS is not set
-# CONFIG_IPC_NS is not set
-# CONFIG_USER_NS is not set
-# CONFIG_PID_NS is not set
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE=""
-CONFIG_EXPERT=y
-CONFIG_SLAB=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ARCH_SHMOBILE=y
-CONFIG_ARCH_SH73A0=y
-CONFIG_MACH_AG5EVM=y
-CONFIG_MEMORY_SIZE=0x10000000
-CONFIG_CPU_BPREDICT_DISABLE=y
-CONFIG_ARM_ERRATA_430973=y
-CONFIG_ARM_ERRATA_458693=y
-CONFIG_NO_HZ=y
-CONFIG_AEABI=y
-# CONFIG_OABI_COMPAT is not set
-CONFIG_HIGHMEM=y
-CONFIG_ZBOOT_ROM_TEXT=0x0
-CONFIG_ZBOOT_ROM_BSS=0x0
-CONFIG_CMDLINE="console=tty0 console=ttySC2,115200 earlyprintk=sh-sci.2,115200 ignore_loglevel"
-CONFIG_CMDLINE_FORCE=y
-CONFIG_KEXEC=y
-# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM=y
-# CONFIG_SUSPEND is not set
-CONFIG_PM_RUNTIME=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
-# CONFIG_INET_DIAG is not set
-# CONFIG_IPV6 is not set
-# CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_BLK_DEV is not set
-CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_SMSC911X=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
-# CONFIG_WLAN is not set
-CONFIG_INPUT_SPARSEKMAP=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-CONFIG_INPUT_EVDEV=y
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-CONFIG_SERIAL_SH_SCI=y
-CONFIG_SERIAL_SH_SCI_NR_UARTS=9
-CONFIG_SERIAL_SH_SCI_CONSOLE=y
-# CONFIG_LEGACY_PTYS is not set
-# CONFIG_HW_RANDOM is not set
-CONFIG_I2C=y
-CONFIG_I2C_SH_MOBILE=y
-# CONFIG_HWMON is not set
-# CONFIG_MFD_SUPPORT is not set
-CONFIG_FB=y
-CONFIG_FB_SH_MOBILE_LCDC=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
-# CONFIG_HID_SUPPORT is not set
-# CONFIG_USB_SUPPORT is not set
-# CONFIG_DNOTIFY is not set
-# CONFIG_INOTIFY_USER is not set
-CONFIG_TMPFS=y
-# CONFIG_MISC_FILESYSTEMS is not set
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
-# CONFIG_FTRACE is not set
diff --git a/arch/arm/configs/at91_dt_defconfig b/arch/arm/configs/at91_dt_defconfig
index 75fd842..690e892 100644
--- a/arch/arm/configs/at91_dt_defconfig
+++ b/arch/arm/configs/at91_dt_defconfig
@@ -14,11 +14,13 @@
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
 CONFIG_ARCH_AT91=y
+CONFIG_SOC_AT91RM9200=y
 CONFIG_SOC_AT91SAM9260=y
 CONFIG_SOC_AT91SAM9263=y
 CONFIG_SOC_AT91SAM9G45=y
 CONFIG_SOC_AT91SAM9X5=y
 CONFIG_SOC_AT91SAM9N12=y
+CONFIG_MACH_AT91RM9200_DT=y
 CONFIG_MACH_AT91SAM9_DT=y
 CONFIG_AT91_PROGRAMMABLE_CLOCKS=y
 CONFIG_AT91_TIMER_HZ=128
@@ -62,6 +64,7 @@
 CONFIG_MTD_CMDLINE_PARTS=y
 CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
+CONFIG_MTD_DATAFLASH=y
 CONFIG_MTD_NAND=y
 CONFIG_MTD_NAND_ATMEL=y
 CONFIG_MTD_UBI=y
@@ -78,7 +81,6 @@
 CONFIG_SCSI_MULTI_LUN=y
 # CONFIG_SCSI_LOWLEVEL is not set
 CONFIG_NETDEVICES=y
-CONFIG_MII=y
 CONFIG_MACB=y
 # CONFIG_NET_VENDOR_BROADCOM is not set
 # CONFIG_NET_VENDOR_FARADAY is not set
diff --git a/arch/arm/configs/kota2_defconfig b/arch/arm/configs/kota2_defconfig
deleted file mode 100644
index 57ad3d4..0000000
--- a/arch/arm/configs/kota2_defconfig
+++ /dev/null
@@ -1,121 +0,0 @@
-# CONFIG_ARM_PATCH_PHYS_VIRT is not set
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=16
-CONFIG_CGROUPS=y
-CONFIG_CPUSETS=y
-CONFIG_NAMESPACES=y
-# CONFIG_UTS_NS is not set
-# CONFIG_IPC_NS is not set
-# CONFIG_USER_NS is not set
-# CONFIG_PID_NS is not set
-CONFIG_SYSCTL_SYSCALL=y
-CONFIG_EMBEDDED=y
-CONFIG_SLAB=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ARCH_SHMOBILE=y
-CONFIG_KEYBOARD_GPIO_POLLED=y
-CONFIG_ARCH_SH73A0=y
-CONFIG_MACH_KOTA2=y
-CONFIG_MEMORY_SIZE=0x1e000000
-# CONFIG_SH_TIMER_TMU is not set
-# CONFIG_SWP_EMULATE is not set
-CONFIG_CPU_BPREDICT_DISABLE=y
-CONFIG_ARM_ERRATA_460075=y
-CONFIG_ARM_ERRATA_742230=y
-CONFIG_ARM_ERRATA_742231=y
-CONFIG_PL310_ERRATA_588369=y
-CONFIG_ARM_ERRATA_720789=y
-CONFIG_PL310_ERRATA_727915=y
-CONFIG_ARM_ERRATA_743622=y
-CONFIG_ARM_ERRATA_751472=y
-CONFIG_PL310_ERRATA_753970=y
-CONFIG_ARM_ERRATA_754322=y
-CONFIG_PL310_ERRATA_769419=y
-CONFIG_NO_HZ=y
-CONFIG_SMP=y
-CONFIG_AEABI=y
-# CONFIG_OABI_COMPAT is not set
-CONFIG_HIGHMEM=y
-CONFIG_ZBOOT_ROM_TEXT=0x0
-CONFIG_ZBOOT_ROM_BSS=0x0
-CONFIG_CMDLINE="console=ttySC2,115200 earlyprintk=sh-sci.2,115200 ignore_loglevel"
-CONFIG_CMDLINE_FORCE=y
-CONFIG_KEXEC=y
-CONFIG_CPU_IDLE=y
-# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
-# CONFIG_INET_DIAG is not set
-# CONFIG_IPV6 is not set
-CONFIG_CFG80211=y
-CONFIG_WIRELESS_EXT_SYSFS=y
-CONFIG_MAC80211=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_BLK_DEV is not set
-CONFIG_NETDEVICES=y
-# CONFIG_NET_VENDOR_BROADCOM is not set
-# CONFIG_NET_VENDOR_CHELSIO is not set
-# CONFIG_NET_VENDOR_FARADAY is not set
-# CONFIG_NET_VENDOR_INTEL is not set
-# CONFIG_NET_VENDOR_MARVELL is not set
-# CONFIG_NET_VENDOR_MICREL is not set
-# CONFIG_NET_VENDOR_NATSEMI is not set
-# CONFIG_NET_VENDOR_SEEQ is not set
-CONFIG_SMSC911X=y
-# CONFIG_NET_VENDOR_STMICRO is not set
-CONFIG_B43=y
-CONFIG_B43_PHY_N=y
-CONFIG_B43_DEBUG=y
-CONFIG_INPUT_SPARSEKMAP=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-CONFIG_INPUT_EVDEV=y
-# CONFIG_KEYBOARD_ATKBD is not set
-CONFIG_KEYBOARD_GPIO=y
-CONFIG_KEYBOARD_SH_KEYSC=y
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_SERIAL_SH_SCI=y
-CONFIG_SERIAL_SH_SCI_NR_UARTS=9
-CONFIG_SERIAL_SH_SCI_CONSOLE=y
-# CONFIG_HW_RANDOM is not set
-CONFIG_I2C_SH_MOBILE=y
-# CONFIG_HWMON is not set
-CONFIG_BCMA=y
-CONFIG_BCMA_DEBUG=y
-CONFIG_FB=y
-CONFIG_FB_SH_MOBILE_LCDC=y
-CONFIG_LCD_PLATFORM=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
-# CONFIG_HID_SUPPORT is not set
-# CONFIG_USB_SUPPORT is not set
-CONFIG_MMC=y
-CONFIG_MMC_SDHI=y
-CONFIG_MMC_SH_MMCIF=y
-CONFIG_NEW_LEDS=y
-CONFIG_LEDS_CLASS=y
-CONFIG_LEDS_GPIO=y
-CONFIG_LEDS_RENESAS_TPU=y
-CONFIG_LEDS_TRIGGERS=y
-# CONFIG_DNOTIFY is not set
-CONFIG_TMPFS=y
-# CONFIG_MISC_FILESYSTEMS is not set
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_INFO_REDUCED=y
-# CONFIG_FTRACE is not set
-CONFIG_DEBUG_USER=y
diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h
index e072bb2..4f8e9e5 100644
--- a/arch/arm/include/asm/dma-contiguous.h
+++ b/arch/arm/include/asm/dma-contiguous.h
@@ -5,7 +5,6 @@
 #ifdef CONFIG_DMA_CMA
 
 #include <linux/types.h>
-#include <asm-generic/dma-contiguous.h>
 
 void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size);
 
diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index 69b879a..402a2bc 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -35,7 +35,7 @@
 	unsigned int		nr_irqs;	/* number of IRQs */
 
 #ifdef CONFIG_ZONE_DMA
-	unsigned long		dma_zone_size;	/* size of DMA-able area */
+	phys_addr_t		dma_zone_size;	/* size of DMA-able area */
 #endif
 
 	unsigned int		video_start;	/* start of video RAM	*/
diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h
index 12f71a1..f94784f 100644
--- a/arch/arm/include/asm/outercache.h
+++ b/arch/arm/include/asm/outercache.h
@@ -37,10 +37,10 @@
 	void (*resume)(void);
 };
 
-#ifdef CONFIG_OUTER_CACHE
-
 extern struct outer_cache_fns outer_cache;
 
+#ifdef CONFIG_OUTER_CACHE
+
 static inline void outer_inv_range(phys_addr_t start, phys_addr_t end)
 {
 	if (outer_cache.inv_range)
diff --git a/arch/arm/mach-at91/include/mach/hardware.h b/arch/arm/mach-at91/include/mach/hardware.h
index a832e07..f17aa31 100644
--- a/arch/arm/mach-at91/include/mach/hardware.h
+++ b/arch/arm/mach-at91/include/mach/hardware.h
@@ -33,6 +33,7 @@
 #include <mach/at91sam9g45.h>
 #include <mach/at91sam9x5.h>
 #include <mach/at91sam9n12.h>
+#include <mach/sama5d3.h>
 
 /*
  * On all at91 except rm9200 and x40 have the System Controller starts
diff --git a/arch/arm/mach-at91/include/mach/sama5d3.h b/arch/arm/mach-at91/include/mach/sama5d3.h
index 6dc81ee..31096a8 100644
--- a/arch/arm/mach-at91/include/mach/sama5d3.h
+++ b/arch/arm/mach-at91/include/mach/sama5d3.h
@@ -65,6 +65,14 @@
 #define SAMA5D3_ID_IRQ0		47	/* Advanced Interrupt Controller (IRQ0) */
 
 /*
+ * User Peripheral physical base addresses.
+ */
+#define SAMA5D3_BASE_USART0	0xf001c000
+#define SAMA5D3_BASE_USART1	0xf0020000
+#define SAMA5D3_BASE_USART2	0xf8020000
+#define SAMA5D3_BASE_USART3	0xf8024000
+
+/*
  * Internal Memory
  */
 #define SAMA5D3_SRAM_BASE	0x00300000	/* Internal SRAM base address */
diff --git a/arch/arm/mach-at91/include/mach/uncompress.h b/arch/arm/mach-at91/include/mach/uncompress.h
index 5659f7c..4bb644f 100644
--- a/arch/arm/mach-at91/include/mach/uncompress.h
+++ b/arch/arm/mach-at91/include/mach/uncompress.h
@@ -94,6 +94,15 @@
 	0,
 };
 
+static const u32 uarts_sama5[] = {
+	AT91_BASE_DBGU1,
+	SAMA5D3_BASE_USART0,
+	SAMA5D3_BASE_USART1,
+	SAMA5D3_BASE_USART2,
+	SAMA5D3_BASE_USART3,
+	0,
+};
+
 static inline const u32* decomp_soc_detect(void __iomem *dbgu_base)
 {
 	u32 cidr, socid;
@@ -121,8 +130,12 @@
 	case ARCH_ID_AT91SAM9RL64:
 		return uarts_sam9rl;
 
+	case ARCH_ID_AT91SAM9N12:
 	case ARCH_ID_AT91SAM9X5:
 		return uarts_sam9x5;
+
+	case ARCH_ID_SAMA5D3:
+		return uarts_sama5;
 	}
 
 	/* at91sam9g10 */
diff --git a/arch/arm/mach-ep93xx/vision_ep9307.c b/arch/arm/mach-ep93xx/vision_ep9307.c
index 64f2e50..6bc1c18 100644
--- a/arch/arm/mach-ep93xx/vision_ep9307.c
+++ b/arch/arm/mach-ep93xx/vision_ep9307.c
@@ -224,62 +224,15 @@
 #define VISION_SPI_MMC_WP	EP93XX_GPIO_LINE_F(0)
 #define VISION_SPI_MMC_CD	EP93XX_GPIO_LINE_EGPIO15
 
-static struct gpio vision_spi_mmc_gpios[] = {
-	{ VISION_SPI_MMC_WP, GPIOF_DIR_IN, "mmc_spi:wp" },
-	{ VISION_SPI_MMC_CD, GPIOF_DIR_IN, "mmc_spi:cd" },
-};
-
-static int vision_spi_mmc_init(struct device *pdev,
-			irqreturn_t (*func)(int, void *), void *pdata)
-{
-	int err;
-
-	err = gpio_request_array(vision_spi_mmc_gpios,
-				 ARRAY_SIZE(vision_spi_mmc_gpios));
-	if (err)
-		return err;
-
-	err = gpio_set_debounce(VISION_SPI_MMC_CD, 1);
-	if (err)
-		goto exit_err;
-
-	err = request_irq(gpio_to_irq(VISION_SPI_MMC_CD), func,
-			IRQ_TYPE_EDGE_BOTH, "mmc_spi:cd", pdata);
-	if (err)
-		goto exit_err;
-
-	return 0;
-
-exit_err:
-	gpio_free_array(vision_spi_mmc_gpios, ARRAY_SIZE(vision_spi_mmc_gpios));
-	return err;
-
-}
-
-static void vision_spi_mmc_exit(struct device *pdev, void *pdata)
-{
-	free_irq(gpio_to_irq(VISION_SPI_MMC_CD), pdata);
-	gpio_free_array(vision_spi_mmc_gpios, ARRAY_SIZE(vision_spi_mmc_gpios));
-}
-
-static int vision_spi_mmc_get_ro(struct device *pdev)
-{
-	return !!gpio_get_value(VISION_SPI_MMC_WP);
-}
-
-static int vision_spi_mmc_get_cd(struct device *pdev)
-{
-	return !gpio_get_value(VISION_SPI_MMC_CD);
-}
-
 static struct mmc_spi_platform_data vision_spi_mmc_data = {
-	.init		= vision_spi_mmc_init,
-	.exit		= vision_spi_mmc_exit,
-	.get_ro		= vision_spi_mmc_get_ro,
-	.get_cd		= vision_spi_mmc_get_cd,
 	.detect_delay	= 100,
 	.powerup_msecs	= 100,
 	.ocr_mask	= MMC_VDD_32_33 | MMC_VDD_33_34,
+	.flags		= MMC_SPI_USE_CD_GPIO | MMC_SPI_USE_RO_GPIO,
+	.cd_gpio	= VISION_SPI_MMC_CD,
+	.cd_debounce	= 1,
+	.ro_gpio	= VISION_SPI_MMC_WP,
+	.caps2		= MMC_CAP2_RO_ACTIVE_HIGH,
 };
 
 static int vision_spi_mmc_hw_setup(struct spi_device *spi)
diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index 5952e68..56fe819 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -36,6 +36,7 @@
 	bool "SAMSUNG EXYNOS4210"
 	default y
 	depends on ARCH_EXYNOS4
+	select ARCH_HAS_BANDGAP
 	select ARM_CPU_SUSPEND if PM
 	select PINCTRL_EXYNOS
 	select PM_GENERIC_DOMAINS if PM
@@ -49,7 +50,9 @@
 	bool "SAMSUNG EXYNOS4212"
 	default y
 	depends on ARCH_EXYNOS4
+	select ARCH_HAS_BANDGAP
 	select PINCTRL_EXYNOS
+	select PM_GENERIC_DOMAINS if PM
 	select S5P_PM if PM
 	select S5P_SLEEP if PM
 	select SAMSUNG_DMADEV
@@ -60,7 +63,9 @@
 	bool "SAMSUNG EXYNOS4412"
 	default y
 	depends on ARCH_EXYNOS4
+	select ARCH_HAS_BANDGAP
 	select PINCTRL_EXYNOS
+	select PM_GENERIC_DOMAINS if PM
 	select SAMSUNG_DMADEV
 	help
 	  Enable EXYNOS4412 SoC support
@@ -69,6 +74,7 @@
 	bool "SAMSUNG EXYNOS5250"
 	default y
 	depends on ARCH_EXYNOS5
+	select ARCH_HAS_BANDGAP
 	select PINCTRL_EXYNOS
 	select PM_GENERIC_DOMAINS if PM
 	select S5P_PM if PM
@@ -93,6 +99,7 @@
 	default y
 	depends on ARCH_EXYNOS5
 	select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE
+	select ARCH_HAS_BANDGAP
 	select ARCH_HAS_OPP
 	select HAVE_ARM_ARCH_TIMER
 	select AUTO_ZRELADDR
diff --git a/arch/arm/mach-exynos/cpuidle.c b/arch/arm/mach-exynos/cpuidle.c
index 225ee84..ac139226 100644
--- a/arch/arm/mach-exynos/cpuidle.c
+++ b/arch/arm/mach-exynos/cpuidle.c
@@ -200,6 +200,9 @@
 	if (soc_is_exynos5250())
 		exynos5_core_down_clk();
 
+	if (soc_is_exynos5440())
+		exynos4_idle_driver.state_count = 1;
+
 	ret = cpuidle_register_driver(&exynos4_idle_driver);
 	if (ret) {
 		printk(KERN_ERR "CPUidle failed to register driver\n");
diff --git a/arch/arm/mach-highbank/Kconfig b/arch/arm/mach-highbank/Kconfig
index 6acbdab..8e8437d 100644
--- a/arch/arm/mach-highbank/Kconfig
+++ b/arch/arm/mach-highbank/Kconfig
@@ -1,9 +1,14 @@
 config ARCH_HIGHBANK
 	bool "Calxeda ECX-1000/2000 (Highbank/Midway)" if ARCH_MULTI_V7
+	select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE
 	select ARCH_HAS_CPUFREQ
+	select ARCH_HAS_HOLES_MEMORYMODEL
 	select ARCH_HAS_OPP
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARM_AMBA
+	select ARM_ERRATA_764369
+	select ARM_ERRATA_775420
+	select ARM_ERRATA_798181
 	select ARM_GIC
 	select ARM_TIMER_SP804
 	select CACHE_L2X0
@@ -18,3 +23,4 @@
 	select PL320_MBOX
 	select SPARSE_IRQ
 	select USE_OF
+	select ZONE_DMA if ARM_LPAE
diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c
index 8881579..8e63ccd 100644
--- a/arch/arm/mach-highbank/highbank.c
+++ b/arch/arm/mach-highbank/highbank.c
@@ -18,14 +18,11 @@
 #include <linux/clocksource.h>
 #include <linux/dma-mapping.h>
 #include <linux/io.h>
-#include <linux/irq.h>
 #include <linux/irqchip.h>
-#include <linux/irqdomain.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/of_address.h>
-#include <linux/smp.h>
 #include <linux/amba/bus.h>
 #include <linux/clk-provider.h>
 
@@ -35,7 +32,6 @@
 #include <asm/hardware/cache-l2x0.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
-#include <asm/mach/time.h>
 
 #include "core.h"
 #include "sysregs.h"
@@ -65,13 +61,11 @@
 			  HB_JUMP_TABLE_PHYS(cpu) + 15);
 }
 
-#ifdef CONFIG_CACHE_L2X0
 static void highbank_l2x0_disable(void)
 {
 	/* Disable PL310 L2 Cache controller */
 	highbank_smc1(0x102, 0x0);
 }
-#endif
 
 static void __init highbank_init_irq(void)
 {
@@ -80,12 +74,13 @@
 	if (of_find_compatible_node(NULL, NULL, "arm,cortex-a9"))
 		highbank_scu_map_io();
 
-#ifdef CONFIG_CACHE_L2X0
 	/* Enable PL310 L2 Cache controller */
-	highbank_smc1(0x102, 0x1);
-	l2x0_of_init(0, ~0UL);
-	outer_cache.disable = highbank_l2x0_disable;
-#endif
+	if (IS_ENABLED(CONFIG_CACHE_L2X0) &&
+	    of_find_compatible_node(NULL, NULL, "arm,pl310-cache")) {
+		highbank_smc1(0x102, 0x1);
+		l2x0_of_init(0, ~0UL);
+		outer_cache.disable = highbank_l2x0_disable;
+	}
 }
 
 static void __init highbank_timer_init(void)
@@ -176,6 +171,9 @@
 };
 
 DT_MACHINE_START(HIGHBANK, "Highbank")
+#if defined(CONFIG_ZONE_DMA) && defined(CONFIG_ARM_LPAE)
+	.dma_zone_size	= (4ULL * SZ_1G),
+#endif
 	.smp		= smp_ops(highbank_smp_ops),
 	.init_irq	= highbank_init_irq,
 	.init_time	= highbank_timer_init,
diff --git a/arch/arm/mach-imx/clk.h b/arch/arm/mach-imx/clk.h
index 3451f1f..048c5ad8 100644
--- a/arch/arm/mach-imx/clk.h
+++ b/arch/arm/mach-imx/clk.h
@@ -89,7 +89,8 @@
 static inline struct clk *imx_clk_mux(const char *name, void __iomem *reg,
 		u8 shift, u8 width, const char **parents, int num_parents)
 {
-	return clk_register_mux(NULL, name, parents, num_parents, 0, reg, shift,
+	return clk_register_mux(NULL, name, parents, num_parents,
+			CLK_SET_RATE_NO_REPARENT, reg, shift,
 			width, 0, &imx_ccm_lock);
 }
 
@@ -98,7 +99,7 @@
 		int num_parents, unsigned long flags)
 {
 	return clk_register_mux(NULL, name, parents, num_parents,
-			flags, reg, shift, width, 0,
+			flags | CLK_SET_RATE_NO_REPARENT, reg, shift, width, 0,
 			&imx_ccm_lock);
 }
 
diff --git a/arch/arm/mach-imx/mm-imx25.c b/arch/arm/mach-imx/mm-imx25.c
index e065c11..5211f62 100644
--- a/arch/arm/mach-imx/mm-imx25.c
+++ b/arch/arm/mach-imx/mm-imx25.c
@@ -61,25 +61,8 @@
 	mxc_init_irq(MX25_IO_ADDRESS(MX25_AVIC_BASE_ADDR));
 }
 
-static struct sdma_script_start_addrs imx25_sdma_script __initdata = {
-	.ap_2_ap_addr = 729,
-	.uart_2_mcu_addr = 904,
-	.per_2_app_addr = 1255,
-	.mcu_2_app_addr = 834,
-	.uartsh_2_mcu_addr = 1120,
-	.per_2_shp_addr = 1329,
-	.mcu_2_shp_addr = 1048,
-	.ata_2_mcu_addr = 1560,
-	.mcu_2_ata_addr = 1479,
-	.app_2_per_addr = 1189,
-	.app_2_mcu_addr = 770,
-	.shp_2_per_addr = 1407,
-	.shp_2_mcu_addr = 979,
-};
-
 static struct sdma_platform_data imx25_sdma_pdata __initdata = {
 	.fw_name = "sdma-imx25.bin",
-	.script_addrs = &imx25_sdma_script,
 };
 
 static const struct resource imx25_audmux_res[] __initconst = {
diff --git a/arch/arm/mach-imx/mm-imx5.c b/arch/arm/mach-imx/mm-imx5.c
index a8229b7..eb3cce3 100644
--- a/arch/arm/mach-imx/mm-imx5.c
+++ b/arch/arm/mach-imx/mm-imx5.c
@@ -103,22 +103,8 @@
 	tzic_init_irq(MX53_IO_ADDRESS(MX53_TZIC_BASE_ADDR));
 }
 
-static struct sdma_script_start_addrs imx51_sdma_script __initdata = {
-	.ap_2_ap_addr = 642,
-	.uart_2_mcu_addr = 817,
-	.mcu_2_app_addr = 747,
-	.mcu_2_shp_addr = 961,
-	.ata_2_mcu_addr = 1473,
-	.mcu_2_ata_addr = 1392,
-	.app_2_per_addr = 1033,
-	.app_2_mcu_addr = 683,
-	.shp_2_per_addr = 1251,
-	.shp_2_mcu_addr = 892,
-};
-
 static struct sdma_platform_data imx51_sdma_pdata __initdata = {
 	.fw_name = "sdma-imx51.bin",
-	.script_addrs = &imx51_sdma_script,
 };
 
 static const struct resource imx51_audmux_res[] __initconst = {
diff --git a/arch/arm/mach-mmp/Makefile b/arch/arm/mach-mmp/Makefile
index 095c155..9b702a1 100644
--- a/arch/arm/mach-mmp/Makefile
+++ b/arch/arm/mach-mmp/Makefile
@@ -2,7 +2,7 @@
 # Makefile for Marvell's PXA168 processors line
 #
 
-obj-y				+= common.o devices.o time.o irq.o
+obj-y				+= common.o devices.o time.o
 
 # SoC support
 obj-$(CONFIG_CPU_PXA168)	+= pxa168.o
diff --git a/arch/arm/mach-mmp/common.h b/arch/arm/mach-mmp/common.h
index 991d7e9..cf445ba 100644
--- a/arch/arm/mach-mmp/common.h
+++ b/arch/arm/mach-mmp/common.h
@@ -3,7 +3,6 @@
 
 extern void timer_init(int irq);
 
-extern void __init icu_init_irq(void);
 extern void __init mmp_map_io(void);
 extern void mmp_restart(enum reboot_mode, const char *);
 extern void __init pxa168_clk_init(void);
diff --git a/arch/arm/mach-mmp/include/mach/entry-macro.S b/arch/arm/mach-mmp/include/mach/entry-macro.S
deleted file mode 100644
index bd152e2..0000000
--- a/arch/arm/mach-mmp/include/mach/entry-macro.S
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * linux/arch/arm/mach-mmp/include/mach/entry-macro.S
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <asm/irq.h>
-#include <mach/regs-icu.h>
-
-	.macro	get_irqnr_preamble, base, tmp
-	mrc	p15, 0, \tmp, c0, c0, 0		@ CPUID
-	and	\tmp, \tmp, #0xff00
-	cmp	\tmp, #0x5800
-	ldr	\base, =mmp_icu_base
-	ldr	\base, [\base, #0]
-	addne	\base, \base, #0x10c		@ PJ1 AP INT SEL register
-	addeq	\base, \base, #0x104		@ PJ4 IRQ SEL register
-	.endm
-
-	.macro	get_irqnr_and_base, irqnr, irqstat, base, tmp
-	ldr	\tmp, [\base, #0]
-	and	\irqnr, \tmp, #0x3f
-	tst	\tmp, #(1 << 6)
-	.endm
diff --git a/arch/arm/mach-mmp/include/mach/pxa168.h b/arch/arm/mach-mmp/include/mach/pxa168.h
index 459c2d0..a83ba7c 100644
--- a/arch/arm/mach-mmp/include/mach/pxa168.h
+++ b/arch/arm/mach-mmp/include/mach/pxa168.h
@@ -4,6 +4,7 @@
 #include <linux/reboot.h>
 
 extern void pxa168_timer_init(void);
+extern void __init icu_init_irq(void);
 extern void __init pxa168_init_irq(void);
 extern void pxa168_restart(enum reboot_mode, const char *);
 extern void pxa168_clear_keypad_wakeup(void);
diff --git a/arch/arm/mach-mmp/include/mach/pxa910.h b/arch/arm/mach-mmp/include/mach/pxa910.h
index b914afa..9225320 100644
--- a/arch/arm/mach-mmp/include/mach/pxa910.h
+++ b/arch/arm/mach-mmp/include/mach/pxa910.h
@@ -2,6 +2,7 @@
 #define __ASM_MACH_PXA910_H
 
 extern void pxa910_timer_init(void);
+extern void __init icu_init_irq(void);
 extern void __init pxa910_init_irq(void);
 
 #include <linux/i2c.h>
diff --git a/arch/arm/mach-mmp/mmp-dt.c b/arch/arm/mach-mmp/mmp-dt.c
index b37915d..cca529c 100644
--- a/arch/arm/mach-mmp/mmp-dt.c
+++ b/arch/arm/mach-mmp/mmp-dt.c
@@ -9,17 +9,13 @@
  *  publishhed by the Free Software Foundation.
  */
 
-#include <linux/irq.h>
-#include <linux/irqdomain.h>
-#include <linux/of_irq.h>
+#include <linux/irqchip.h>
 #include <linux/of_platform.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/time.h>
-#include <mach/irqs.h>
 
 #include "common.h"
 
-extern void __init mmp_dt_irq_init(void);
 extern void __init mmp_dt_init_timer(void);
 
 static const struct of_dev_auxdata pxa168_auxdata_lookup[] __initconst = {
@@ -64,7 +60,6 @@
 
 DT_MACHINE_START(PXA168_DT, "Marvell PXA168 (Device Tree Support)")
 	.map_io		= mmp_map_io,
-	.init_irq	= mmp_dt_irq_init,
 	.init_time	= mmp_dt_init_timer,
 	.init_machine	= pxa168_dt_init,
 	.dt_compat	= mmp_dt_board_compat,
@@ -72,7 +67,6 @@
 
 DT_MACHINE_START(PXA910_DT, "Marvell PXA910 (Device Tree Support)")
 	.map_io		= mmp_map_io,
-	.init_irq	= mmp_dt_irq_init,
 	.init_time	= mmp_dt_init_timer,
 	.init_machine	= pxa910_dt_init,
 	.dt_compat	= mmp_dt_board_compat,
diff --git a/arch/arm/mach-mmp/mmp2-dt.c b/arch/arm/mach-mmp/mmp2-dt.c
index 4ac2567..023cb45 100644
--- a/arch/arm/mach-mmp/mmp2-dt.c
+++ b/arch/arm/mach-mmp/mmp2-dt.c
@@ -10,18 +10,13 @@
  */
 
 #include <linux/io.h>
-#include <linux/irq.h>
-#include <linux/irqdomain.h>
-#include <linux/of_irq.h>
+#include <linux/irqchip.h>
 #include <linux/of_platform.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/time.h>
-#include <mach/irqs.h>
-#include <mach/regs-apbc.h>
 
 #include "common.h"
 
-extern void __init mmp_dt_irq_init(void);
 extern void __init mmp_dt_init_timer(void);
 
 static const struct of_dev_auxdata mmp2_auxdata_lookup[] __initconst = {
@@ -49,7 +44,6 @@
 
 DT_MACHINE_START(MMP2_DT, "Marvell MMP2 (Device Tree Support)")
 	.map_io		= mmp_map_io,
-	.init_irq	= mmp_dt_irq_init,
 	.init_time	= mmp_dt_init_timer,
 	.init_machine	= mmp2_dt_init,
 	.dt_compat	= mmp2_dt_board_compat,
diff --git a/arch/arm/mach-mmp/mmp2.c b/arch/arm/mach-mmp/mmp2.c
index c7592f1..a70b553 100644
--- a/arch/arm/mach-mmp/mmp2.c
+++ b/arch/arm/mach-mmp/mmp2.c
@@ -13,6 +13,8 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqchip/mmp.h>
 #include <linux/platform_device.h>
 
 #include <asm/hardware/cache-tauros2.h>
@@ -26,6 +28,7 @@
 #include <mach/mfp.h>
 #include <mach/devices.h>
 #include <mach/mmp2.h>
+#include <mach/pm-mmp2.h>
 
 #include "common.h"
 
@@ -94,6 +97,9 @@
 void __init mmp2_init_irq(void)
 {
 	mmp2_init_icu();
+#ifdef CONFIG_PM
+	icu_irq_chip.irq_set_wake = mmp2_set_wake;
+#endif
 }
 
 static int __init mmp2_init(void)
diff --git a/arch/arm/mach-mmp/pxa910.c b/arch/arm/mach-mmp/pxa910.c
index ce6393a..eb57ee1 100644
--- a/arch/arm/mach-mmp/pxa910.c
+++ b/arch/arm/mach-mmp/pxa910.c
@@ -12,6 +12,8 @@
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqchip/mmp.h>
 #include <linux/platform_device.h>
 
 #include <asm/hardware/cache-tauros2.h>
@@ -23,6 +25,8 @@
 #include <mach/dma.h>
 #include <mach/mfp.h>
 #include <mach/devices.h>
+#include <mach/pm-pxa910.h>
+#include <mach/pxa910.h>
 
 #include "common.h"
 
@@ -79,6 +83,9 @@
 void __init pxa910_init_irq(void)
 {
 	icu_init_irq();
+#ifdef CONFIG_PM
+	icu_irq_chip.irq_set_wake = pxa910_set_wake;
+#endif
 }
 
 static int __init pxa910_init(void)
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index cc36bfe..afb457c 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -63,6 +63,7 @@
 obj-$(CONFIG_ARCH_OMAP3)		+= omap3-restart.o
 obj-$(CONFIG_ARCH_OMAP4)		+= omap4-restart.o
 obj-$(CONFIG_SOC_OMAP5)			+= omap4-restart.o
+obj-$(CONFIG_SOC_DRA7XX)		+= omap4-restart.o
 
 # Pin multiplexing
 obj-$(CONFIG_SOC_OMAP2420)		+= mux2420.o
@@ -148,6 +149,7 @@
 obj-$(CONFIG_SOC_OMAP5)			+= $(powerdomain-common)
 obj-$(CONFIG_SOC_OMAP5)			+= powerdomains54xx_data.o
 obj-$(CONFIG_SOC_DRA7XX)		+= $(powerdomain-common)
+obj-$(CONFIG_SOC_DRA7XX)		+= powerdomains7xx_data.o
 
 # PRCM clockdomain control
 clockdomain-common			+= clockdomain.o
@@ -166,6 +168,7 @@
 obj-$(CONFIG_SOC_OMAP5)			+= $(clockdomain-common)
 obj-$(CONFIG_SOC_OMAP5)			+= clockdomains54xx_data.o
 obj-$(CONFIG_SOC_DRA7XX)		+= $(clockdomain-common)
+obj-$(CONFIG_SOC_DRA7XX)		+= clockdomains7xx_data.o
 
 # Clock framework
 obj-$(CONFIG_ARCH_OMAP2)		+= $(clock-common) clock2xxx.o
@@ -209,6 +212,7 @@
 obj-$(CONFIG_SOC_AM33XX)		+= omap_hwmod_33xx_data.o
 obj-$(CONFIG_ARCH_OMAP4)		+= omap_hwmod_44xx_data.o
 obj-$(CONFIG_SOC_OMAP5)			+= omap_hwmod_54xx_data.o
+obj-$(CONFIG_SOC_DRA7XX)		+= omap_hwmod_7xx_data.o
 
 # EMU peripherals
 obj-$(CONFIG_OMAP3_EMU)			+= emu.o
diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c
index b89e55b..39c7838 100644
--- a/arch/arm/mach-omap2/board-generic.c
+++ b/arch/arm/mach-omap2/board-generic.c
@@ -238,5 +238,6 @@
 	.init_machine	= omap_generic_init,
 	.init_time	= omap5_realtime_timer_init,
 	.dt_compat	= dra7xx_boards_compat,
+	.restart	= omap44xx_restart,
 MACHINE_END
 #endif
diff --git a/arch/arm/mach-omap2/cclock33xx_data.c b/arch/arm/mach-omap2/cclock33xx_data.c
index ba6534d..865d30e 100644
--- a/arch/arm/mach-omap2/cclock33xx_data.c
+++ b/arch/arm/mach-omap2/cclock33xx_data.c
@@ -421,6 +421,10 @@
 DEFINE_STRUCT_CLK_HW_OMAP(aes0_fck, NULL);
 DEFINE_STRUCT_CLK(aes0_fck, dpll_core_ck_parents, clk_ops_null);
 
+static struct clk rng_fck;
+DEFINE_STRUCT_CLK_HW_OMAP(rng_fck, NULL);
+DEFINE_STRUCT_CLK(rng_fck, dpll_core_ck_parents, clk_ops_null);
+
 /*
  * Modules clock nodes
  *
@@ -966,6 +970,7 @@
 	CLK(NULL,	"smartreflex1_fck",	&smartreflex1_fck),
 	CLK(NULL,	"sha0_fck",		&sha0_fck),
 	CLK(NULL,	"aes0_fck",		&aes0_fck),
+	CLK(NULL,	"rng_fck",		&rng_fck),
 	CLK(NULL,	"timer1_fck",		&timer1_fck),
 	CLK(NULL,	"timer2_fck",		&timer2_fck),
 	CLK(NULL,	"timer3_fck",		&timer3_fck),
diff --git a/arch/arm/mach-omap2/cclock44xx_data.c b/arch/arm/mach-omap2/cclock44xx_data.c
index 88e37a47..1d5b529 100644
--- a/arch/arm/mach-omap2/cclock44xx_data.c
+++ b/arch/arm/mach-omap2/cclock44xx_data.c
@@ -1707,6 +1707,18 @@
 	omap2_clk_disable_autoidle_all();
 
 	/*
+	 * A set rate of ABE DPLL inturn triggers a set rate of USB DPLL
+	 * when its in bypass. So always lock USB before ABE DPLL.
+	 */
+	/*
+	 * Lock USB DPLL on OMAP4 devices so that the L3INIT power
+	 * domain can transition to retention state when not in use.
+	 */
+	rc = clk_set_rate(&dpll_usb_ck, OMAP4_DPLL_USB_DEFFREQ);
+	if (rc)
+		pr_err("%s: failed to configure USB DPLL!\n", __func__);
+
+	/*
 	 * On OMAP4460 the ABE DPLL fails to turn on if in idle low-power
 	 * state when turning the ABE clock domain. Workaround this by
 	 * locking the ABE DPLL on boot.
@@ -1718,13 +1730,5 @@
 	if (rc)
 		pr_err("%s: failed to configure ABE DPLL!\n", __func__);
 
-	/*
-	 * Lock USB DPLL on OMAP4 devices so that the L3INIT power
-	 * domain can transition to retention state when not in use.
-	 */
-	rc = clk_set_rate(&dpll_usb_ck, OMAP4_DPLL_USB_DEFFREQ);
-	if (rc)
-		pr_err("%s: failed to configure USB DPLL!\n", __func__);
-
 	return 0;
 }
diff --git a/arch/arm/mach-omap2/clockdomain.h b/arch/arm/mach-omap2/clockdomain.h
index daeecf1..4b03394 100644
--- a/arch/arm/mach-omap2/clockdomain.h
+++ b/arch/arm/mach-omap2/clockdomain.h
@@ -217,6 +217,7 @@
 extern void __init am33xx_clockdomains_init(void);
 extern void __init omap44xx_clockdomains_init(void);
 extern void __init omap54xx_clockdomains_init(void);
+extern void __init dra7xx_clockdomains_init(void);
 
 extern void clkdm_add_autodeps(struct clockdomain *clkdm);
 extern void clkdm_del_autodeps(struct clockdomain *clkdm);
diff --git a/arch/arm/mach-omap2/clockdomains7xx_data.c b/arch/arm/mach-omap2/clockdomains7xx_data.c
new file mode 100644
index 0000000..57d5df0
--- /dev/null
+++ b/arch/arm/mach-omap2/clockdomains7xx_data.c
@@ -0,0 +1,740 @@
+/*
+ * DRA7xx Clock domains framework
+ *
+ * Copyright (C) 2009-2013 Texas Instruments, Inc.
+ * Copyright (C) 2009-2011 Nokia Corporation
+ *
+ * Generated by code originally written by:
+ * Abhijit Pagare (abhijitpagare@ti.com)
+ * Benoit Cousson (b-cousson@ti.com)
+ * Paul Walmsley (paul@pwsan.com)
+ *
+ * This file is automatically generated from the OMAP hardware databases.
+ * We respectfully ask that any modifications to this file be coordinated
+ * with the public linux-omap@vger.kernel.org mailing list and the
+ * authors above to ensure that the autogeneration scripts are kept
+ * up-to-date with the file contents.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+
+#include "clockdomain.h"
+#include "cm1_7xx.h"
+#include "cm2_7xx.h"
+
+#include "cm-regbits-7xx.h"
+#include "prm7xx.h"
+#include "prcm44xx.h"
+#include "prcm_mpu7xx.h"
+
+/* Static Dependencies for DRA7xx Clock Domains */
+
+static struct clkdm_dep cam_wkup_sleep_deps[] = {
+	{ .clkdm_name = "emif_clkdm" },
+	{ NULL },
+};
+
+static struct clkdm_dep dma_wkup_sleep_deps[] = {
+	{ .clkdm_name = "dss_clkdm" },
+	{ .clkdm_name = "emif_clkdm" },
+	{ .clkdm_name = "ipu_clkdm" },
+	{ .clkdm_name = "ipu1_clkdm" },
+	{ .clkdm_name = "ipu2_clkdm" },
+	{ .clkdm_name = "iva_clkdm" },
+	{ .clkdm_name = "l3init_clkdm" },
+	{ .clkdm_name = "l4cfg_clkdm" },
+	{ .clkdm_name = "l4per_clkdm" },
+	{ .clkdm_name = "l4per2_clkdm" },
+	{ .clkdm_name = "l4per3_clkdm" },
+	{ .clkdm_name = "l4sec_clkdm" },
+	{ .clkdm_name = "pcie_clkdm" },
+	{ .clkdm_name = "wkupaon_clkdm" },
+	{ NULL },
+};
+
+static struct clkdm_dep dsp1_wkup_sleep_deps[] = {
+	{ .clkdm_name = "atl_clkdm" },
+	{ .clkdm_name = "cam_clkdm" },
+	{ .clkdm_name = "dsp2_clkdm" },
+	{ .clkdm_name = "dss_clkdm" },
+	{ .clkdm_name = "emif_clkdm" },
+	{ .clkdm_name = "eve1_clkdm" },
+	{ .clkdm_name = "eve2_clkdm" },
+	{ .clkdm_name = "eve3_clkdm" },
+	{ .clkdm_name = "eve4_clkdm" },
+	{ .clkdm_name = "gmac_clkdm" },
+	{ .clkdm_name = "gpu_clkdm" },
+	{ .clkdm_name = "ipu_clkdm" },
+	{ .clkdm_name = "ipu1_clkdm" },
+	{ .clkdm_name = "ipu2_clkdm" },
+	{ .clkdm_name = "iva_clkdm" },
+	{ .clkdm_name = "l3init_clkdm" },
+	{ .clkdm_name = "l4per_clkdm" },
+	{ .clkdm_name = "l4per2_clkdm" },
+	{ .clkdm_name = "l4per3_clkdm" },
+	{ .clkdm_name = "l4sec_clkdm" },
+	{ .clkdm_name = "pcie_clkdm" },
+	{ .clkdm_name = "vpe_clkdm" },
+	{ .clkdm_name = "wkupaon_clkdm" },
+	{ NULL },
+};
+
+static struct clkdm_dep dsp2_wkup_sleep_deps[] = {
+	{ .clkdm_name = "atl_clkdm" },
+	{ .clkdm_name = "cam_clkdm" },
+	{ .clkdm_name = "dsp1_clkdm" },
+	{ .clkdm_name = "dss_clkdm" },
+	{ .clkdm_name = "emif_clkdm" },
+	{ .clkdm_name = "eve1_clkdm" },
+	{ .clkdm_name = "eve2_clkdm" },
+	{ .clkdm_name = "eve3_clkdm" },
+	{ .clkdm_name = "eve4_clkdm" },
+	{ .clkdm_name = "gmac_clkdm" },
+	{ .clkdm_name = "gpu_clkdm" },
+	{ .clkdm_name = "ipu_clkdm" },
+	{ .clkdm_name = "ipu1_clkdm" },
+	{ .clkdm_name = "ipu2_clkdm" },
+	{ .clkdm_name = "iva_clkdm" },
+	{ .clkdm_name = "l3init_clkdm" },
+	{ .clkdm_name = "l4per_clkdm" },
+	{ .clkdm_name = "l4per2_clkdm" },
+	{ .clkdm_name = "l4per3_clkdm" },
+	{ .clkdm_name = "l4sec_clkdm" },
+	{ .clkdm_name = "pcie_clkdm" },
+	{ .clkdm_name = "vpe_clkdm" },
+	{ .clkdm_name = "wkupaon_clkdm" },
+	{ NULL },
+};
+
+static struct clkdm_dep dss_wkup_sleep_deps[] = {
+	{ .clkdm_name = "emif_clkdm" },
+	{ .clkdm_name = "iva_clkdm" },
+	{ NULL },
+};
+
+static struct clkdm_dep eve1_wkup_sleep_deps[] = {
+	{ .clkdm_name = "emif_clkdm" },
+	{ .clkdm_name = "eve2_clkdm" },
+	{ .clkdm_name = "eve3_clkdm" },
+	{ .clkdm_name = "eve4_clkdm" },
+	{ .clkdm_name = "iva_clkdm" },
+	{ NULL },
+};
+
+static struct clkdm_dep eve2_wkup_sleep_deps[] = {
+	{ .clkdm_name = "emif_clkdm" },
+	{ .clkdm_name = "eve1_clkdm" },
+	{ .clkdm_name = "eve3_clkdm" },
+	{ .clkdm_name = "eve4_clkdm" },
+	{ .clkdm_name = "iva_clkdm" },
+	{ NULL },
+};
+
+static struct clkdm_dep eve3_wkup_sleep_deps[] = {
+	{ .clkdm_name = "emif_clkdm" },
+	{ .clkdm_name = "eve1_clkdm" },
+	{ .clkdm_name = "eve2_clkdm" },
+	{ .clkdm_name = "eve4_clkdm" },
+	{ .clkdm_name = "iva_clkdm" },
+	{ NULL },
+};
+
+static struct clkdm_dep eve4_wkup_sleep_deps[] = {
+	{ .clkdm_name = "emif_clkdm" },
+	{ .clkdm_name = "eve1_clkdm" },
+	{ .clkdm_name = "eve2_clkdm" },
+	{ .clkdm_name = "eve3_clkdm" },
+	{ .clkdm_name = "iva_clkdm" },
+	{ NULL },
+};
+
+static struct clkdm_dep gmac_wkup_sleep_deps[] = {
+	{ .clkdm_name = "emif_clkdm" },
+	{ .clkdm_name = "l4per2_clkdm" },
+	{ NULL },
+};
+
+static struct clkdm_dep gpu_wkup_sleep_deps[] = {
+	{ .clkdm_name = "emif_clkdm" },
+	{ .clkdm_name = "iva_clkdm" },
+	{ NULL },
+};
+
+static struct clkdm_dep ipu1_wkup_sleep_deps[] = {
+	{ .clkdm_name = "atl_clkdm" },
+	{ .clkdm_name = "dsp1_clkdm" },
+	{ .clkdm_name = "dsp2_clkdm" },
+	{ .clkdm_name = "dss_clkdm" },
+	{ .clkdm_name = "emif_clkdm" },
+	{ .clkdm_name = "eve1_clkdm" },
+	{ .clkdm_name = "eve2_clkdm" },
+	{ .clkdm_name = "eve3_clkdm" },
+	{ .clkdm_name = "eve4_clkdm" },
+	{ .clkdm_name = "gmac_clkdm" },
+	{ .clkdm_name = "gpu_clkdm" },
+	{ .clkdm_name = "ipu_clkdm" },
+	{ .clkdm_name = "ipu2_clkdm" },
+	{ .clkdm_name = "iva_clkdm" },
+	{ .clkdm_name = "l3init_clkdm" },
+	{ .clkdm_name = "l3main1_clkdm" },
+	{ .clkdm_name = "l4cfg_clkdm" },
+	{ .clkdm_name = "l4per_clkdm" },
+	{ .clkdm_name = "l4per2_clkdm" },
+	{ .clkdm_name = "l4per3_clkdm" },
+	{ .clkdm_name = "l4sec_clkdm" },
+	{ .clkdm_name = "pcie_clkdm" },
+	{ .clkdm_name = "vpe_clkdm" },
+	{ .clkdm_name = "wkupaon_clkdm" },
+	{ NULL },
+};
+
+static struct clkdm_dep ipu2_wkup_sleep_deps[] = {
+	{ .clkdm_name = "atl_clkdm" },
+	{ .clkdm_name = "dsp1_clkdm" },
+	{ .clkdm_name = "dsp2_clkdm" },
+	{ .clkdm_name = "dss_clkdm" },
+	{ .clkdm_name = "emif_clkdm" },
+	{ .clkdm_name = "eve1_clkdm" },
+	{ .clkdm_name = "eve2_clkdm" },
+	{ .clkdm_name = "eve3_clkdm" },
+	{ .clkdm_name = "eve4_clkdm" },
+	{ .clkdm_name = "gmac_clkdm" },
+	{ .clkdm_name = "gpu_clkdm" },
+	{ .clkdm_name = "ipu_clkdm" },
+	{ .clkdm_name = "ipu1_clkdm" },
+	{ .clkdm_name = "iva_clkdm" },
+	{ .clkdm_name = "l3init_clkdm" },
+	{ .clkdm_name = "l3main1_clkdm" },
+	{ .clkdm_name = "l4cfg_clkdm" },
+	{ .clkdm_name = "l4per_clkdm" },
+	{ .clkdm_name = "l4per2_clkdm" },
+	{ .clkdm_name = "l4per3_clkdm" },
+	{ .clkdm_name = "l4sec_clkdm" },
+	{ .clkdm_name = "pcie_clkdm" },
+	{ .clkdm_name = "vpe_clkdm" },
+	{ .clkdm_name = "wkupaon_clkdm" },
+	{ NULL },
+};
+
+static struct clkdm_dep iva_wkup_sleep_deps[] = {
+	{ .clkdm_name = "emif_clkdm" },
+	{ NULL },
+};
+
+static struct clkdm_dep l3init_wkup_sleep_deps[] = {
+	{ .clkdm_name = "emif_clkdm" },
+	{ .clkdm_name = "iva_clkdm" },
+	{ .clkdm_name = "l4cfg_clkdm" },
+	{ .clkdm_name = "l4per_clkdm" },
+	{ .clkdm_name = "l4per3_clkdm" },
+	{ .clkdm_name = "l4sec_clkdm" },
+	{ .clkdm_name = "wkupaon_clkdm" },
+	{ NULL },
+};
+
+static struct clkdm_dep l4per2_wkup_sleep_deps[] = {
+	{ .clkdm_name = "dsp1_clkdm" },
+	{ .clkdm_name = "dsp2_clkdm" },
+	{ .clkdm_name = "ipu1_clkdm" },
+	{ .clkdm_name = "ipu2_clkdm" },
+	{ NULL },
+};
+
+static struct clkdm_dep l4sec_wkup_sleep_deps[] = {
+	{ .clkdm_name = "emif_clkdm" },
+	{ .clkdm_name = "l4per_clkdm" },
+	{ NULL },
+};
+
+static struct clkdm_dep mpu_wkup_sleep_deps[] = {
+	{ .clkdm_name = "cam_clkdm" },
+	{ .clkdm_name = "dsp1_clkdm" },
+	{ .clkdm_name = "dsp2_clkdm" },
+	{ .clkdm_name = "dss_clkdm" },
+	{ .clkdm_name = "emif_clkdm" },
+	{ .clkdm_name = "eve1_clkdm" },
+	{ .clkdm_name = "eve2_clkdm" },
+	{ .clkdm_name = "eve3_clkdm" },
+	{ .clkdm_name = "eve4_clkdm" },
+	{ .clkdm_name = "gmac_clkdm" },
+	{ .clkdm_name = "gpu_clkdm" },
+	{ .clkdm_name = "ipu_clkdm" },
+	{ .clkdm_name = "ipu1_clkdm" },
+	{ .clkdm_name = "ipu2_clkdm" },
+	{ .clkdm_name = "iva_clkdm" },
+	{ .clkdm_name = "l3init_clkdm" },
+	{ .clkdm_name = "l3main1_clkdm" },
+	{ .clkdm_name = "l4cfg_clkdm" },
+	{ .clkdm_name = "l4per_clkdm" },
+	{ .clkdm_name = "l4per2_clkdm" },
+	{ .clkdm_name = "l4per3_clkdm" },
+	{ .clkdm_name = "l4sec_clkdm" },
+	{ .clkdm_name = "pcie_clkdm" },
+	{ .clkdm_name = "vpe_clkdm" },
+	{ .clkdm_name = "wkupaon_clkdm" },
+	{ NULL },
+};
+
+static struct clkdm_dep pcie_wkup_sleep_deps[] = {
+	{ .clkdm_name = "atl_clkdm" },
+	{ .clkdm_name = "cam_clkdm" },
+	{ .clkdm_name = "dsp1_clkdm" },
+	{ .clkdm_name = "dsp2_clkdm" },
+	{ .clkdm_name = "dss_clkdm" },
+	{ .clkdm_name = "emif_clkdm" },
+	{ .clkdm_name = "eve1_clkdm" },
+	{ .clkdm_name = "eve2_clkdm" },
+	{ .clkdm_name = "eve3_clkdm" },
+	{ .clkdm_name = "eve4_clkdm" },
+	{ .clkdm_name = "gmac_clkdm" },
+	{ .clkdm_name = "gpu_clkdm" },
+	{ .clkdm_name = "ipu_clkdm" },
+	{ .clkdm_name = "ipu1_clkdm" },
+	{ .clkdm_name = "iva_clkdm" },
+	{ .clkdm_name = "l3init_clkdm" },
+	{ .clkdm_name = "l4cfg_clkdm" },
+	{ .clkdm_name = "l4per_clkdm" },
+	{ .clkdm_name = "l4per2_clkdm" },
+	{ .clkdm_name = "l4per3_clkdm" },
+	{ .clkdm_name = "l4sec_clkdm" },
+	{ .clkdm_name = "vpe_clkdm" },
+	{ NULL },
+};
+
+static struct clkdm_dep vpe_wkup_sleep_deps[] = {
+	{ .clkdm_name = "emif_clkdm" },
+	{ .clkdm_name = "l4per3_clkdm" },
+	{ NULL },
+};
+
+static struct clockdomain l4per3_7xx_clkdm = {
+	.name		  = "l4per3_clkdm",
+	.pwrdm		  = { .name = "l4per_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_L4PER_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_L4PER_L4PER3_CDOFFS,
+	.dep_bit	  = DRA7XX_L4PER3_STATDEP_SHIFT,
+	.flags		  = CLKDM_CAN_HWSUP_SWSUP,
+};
+
+static struct clockdomain l4per2_7xx_clkdm = {
+	.name		  = "l4per2_clkdm",
+	.pwrdm		  = { .name = "l4per_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_L4PER_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_L4PER_L4PER2_CDOFFS,
+	.dep_bit	  = DRA7XX_L4PER2_STATDEP_SHIFT,
+	.wkdep_srcs	  = l4per2_wkup_sleep_deps,
+	.sleepdep_srcs	  = l4per2_wkup_sleep_deps,
+	.flags		  = CLKDM_CAN_HWSUP_SWSUP,
+};
+
+static struct clockdomain mpu0_7xx_clkdm = {
+	.name		  = "mpu0_clkdm",
+	.pwrdm		  = { .name = "cpu0_pwrdm" },
+	.prcm_partition	  = DRA7XX_MPU_PRCM_PARTITION,
+	.cm_inst	  = DRA7XX_MPU_PRCM_CM_C0_INST,
+	.clkdm_offs	  = DRA7XX_MPU_PRCM_CM_C0_CPU0_CDOFFS,
+	.flags		  = CLKDM_CAN_FORCE_WAKEUP | CLKDM_CAN_HWSUP,
+};
+
+static struct clockdomain iva_7xx_clkdm = {
+	.name		  = "iva_clkdm",
+	.pwrdm		  = { .name = "iva_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_IVA_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_IVA_IVA_CDOFFS,
+	.dep_bit	  = DRA7XX_IVA_STATDEP_SHIFT,
+	.wkdep_srcs	  = iva_wkup_sleep_deps,
+	.sleepdep_srcs	  = iva_wkup_sleep_deps,
+	.flags		  = CLKDM_CAN_HWSUP_SWSUP,
+};
+
+static struct clockdomain coreaon_7xx_clkdm = {
+	.name		  = "coreaon_clkdm",
+	.pwrdm		  = { .name = "coreaon_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_COREAON_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_COREAON_COREAON_CDOFFS,
+	.flags		  = CLKDM_CAN_FORCE_WAKEUP | CLKDM_CAN_HWSUP,
+};
+
+static struct clockdomain ipu1_7xx_clkdm = {
+	.name		  = "ipu1_clkdm",
+	.pwrdm		  = { .name = "ipu_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_AON_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_AON_IPU_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_AON_IPU_IPU1_CDOFFS,
+	.dep_bit	  = DRA7XX_IPU1_STATDEP_SHIFT,
+	.wkdep_srcs	  = ipu1_wkup_sleep_deps,
+	.sleepdep_srcs	  = ipu1_wkup_sleep_deps,
+	.flags		  = CLKDM_CAN_HWSUP_SWSUP,
+};
+
+static struct clockdomain ipu2_7xx_clkdm = {
+	.name		  = "ipu2_clkdm",
+	.pwrdm		  = { .name = "core_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_CORE_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_CORE_IPU2_CDOFFS,
+	.dep_bit	  = DRA7XX_IPU2_STATDEP_SHIFT,
+	.wkdep_srcs	  = ipu2_wkup_sleep_deps,
+	.sleepdep_srcs	  = ipu2_wkup_sleep_deps,
+	.flags		  = CLKDM_CAN_HWSUP_SWSUP,
+};
+
+static struct clockdomain l3init_7xx_clkdm = {
+	.name		  = "l3init_clkdm",
+	.pwrdm		  = { .name = "l3init_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_L3INIT_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_L3INIT_L3INIT_CDOFFS,
+	.dep_bit	  = DRA7XX_L3INIT_STATDEP_SHIFT,
+	.wkdep_srcs	  = l3init_wkup_sleep_deps,
+	.sleepdep_srcs	  = l3init_wkup_sleep_deps,
+	.flags		  = CLKDM_CAN_HWSUP_SWSUP,
+};
+
+static struct clockdomain l4sec_7xx_clkdm = {
+	.name		  = "l4sec_clkdm",
+	.pwrdm		  = { .name = "l4per_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_L4PER_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_L4PER_L4SEC_CDOFFS,
+	.dep_bit	  = DRA7XX_L4SEC_STATDEP_SHIFT,
+	.wkdep_srcs	  = l4sec_wkup_sleep_deps,
+	.sleepdep_srcs	  = l4sec_wkup_sleep_deps,
+	.flags		  = CLKDM_CAN_HWSUP_SWSUP,
+};
+
+static struct clockdomain l3main1_7xx_clkdm = {
+	.name		  = "l3main1_clkdm",
+	.pwrdm		  = { .name = "core_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_CORE_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_CORE_L3MAIN1_CDOFFS,
+	.dep_bit	  = DRA7XX_L3MAIN1_STATDEP_SHIFT,
+	.flags		  = CLKDM_CAN_HWSUP,
+};
+
+static struct clockdomain vpe_7xx_clkdm = {
+	.name		  = "vpe_clkdm",
+	.pwrdm		  = { .name = "vpe_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_AON_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_AON_VPE_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_AON_VPE_VPE_CDOFFS,
+	.dep_bit	  = DRA7XX_VPE_STATDEP_SHIFT,
+	.wkdep_srcs	  = vpe_wkup_sleep_deps,
+	.sleepdep_srcs	  = vpe_wkup_sleep_deps,
+	.flags		  = CLKDM_CAN_HWSUP_SWSUP,
+};
+
+static struct clockdomain mpu_7xx_clkdm = {
+	.name		  = "mpu_clkdm",
+	.pwrdm		  = { .name = "mpu_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_AON_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_AON_MPU_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_AON_MPU_MPU_CDOFFS,
+	.wkdep_srcs	  = mpu_wkup_sleep_deps,
+	.sleepdep_srcs	  = mpu_wkup_sleep_deps,
+	.flags		  = CLKDM_CAN_FORCE_WAKEUP | CLKDM_CAN_HWSUP,
+};
+
+static struct clockdomain custefuse_7xx_clkdm = {
+	.name		  = "custefuse_clkdm",
+	.pwrdm		  = { .name = "custefuse_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_CUSTEFUSE_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_CUSTEFUSE_CUSTEFUSE_CDOFFS,
+	.flags		  = CLKDM_CAN_FORCE_WAKEUP | CLKDM_CAN_HWSUP,
+};
+
+static struct clockdomain ipu_7xx_clkdm = {
+	.name		  = "ipu_clkdm",
+	.pwrdm		  = { .name = "ipu_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_AON_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_AON_IPU_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_AON_IPU_IPU_CDOFFS,
+	.dep_bit	  = DRA7XX_IPU_STATDEP_SHIFT,
+	.flags		  = CLKDM_CAN_HWSUP_SWSUP,
+};
+
+static struct clockdomain mpu1_7xx_clkdm = {
+	.name		  = "mpu1_clkdm",
+	.pwrdm		  = { .name = "cpu1_pwrdm" },
+	.prcm_partition	  = DRA7XX_MPU_PRCM_PARTITION,
+	.cm_inst	  = DRA7XX_MPU_PRCM_CM_C1_INST,
+	.clkdm_offs	  = DRA7XX_MPU_PRCM_CM_C1_CPU1_CDOFFS,
+	.flags		  = CLKDM_CAN_FORCE_WAKEUP | CLKDM_CAN_HWSUP,
+};
+
+static struct clockdomain gmac_7xx_clkdm = {
+	.name		  = "gmac_clkdm",
+	.pwrdm		  = { .name = "l3init_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_L3INIT_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_L3INIT_GMAC_CDOFFS,
+	.dep_bit	  = DRA7XX_GMAC_STATDEP_SHIFT,
+	.wkdep_srcs	  = gmac_wkup_sleep_deps,
+	.sleepdep_srcs	  = gmac_wkup_sleep_deps,
+	.flags		  = CLKDM_CAN_HWSUP_SWSUP,
+};
+
+static struct clockdomain l4cfg_7xx_clkdm = {
+	.name		  = "l4cfg_clkdm",
+	.pwrdm		  = { .name = "core_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_CORE_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_CORE_L4CFG_CDOFFS,
+	.dep_bit	  = DRA7XX_L4CFG_STATDEP_SHIFT,
+	.flags		  = CLKDM_CAN_HWSUP,
+};
+
+static struct clockdomain dma_7xx_clkdm = {
+	.name		  = "dma_clkdm",
+	.pwrdm		  = { .name = "core_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_CORE_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_CORE_DMA_CDOFFS,
+	.wkdep_srcs	  = dma_wkup_sleep_deps,
+	.sleepdep_srcs	  = dma_wkup_sleep_deps,
+	.flags		  = CLKDM_CAN_FORCE_WAKEUP | CLKDM_CAN_HWSUP,
+};
+
+static struct clockdomain rtc_7xx_clkdm = {
+	.name		  = "rtc_clkdm",
+	.pwrdm		  = { .name = "rtc_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_AON_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_AON_RTC_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_AON_RTC_RTC_CDOFFS,
+	.flags		  = CLKDM_CAN_FORCE_WAKEUP | CLKDM_CAN_HWSUP,
+};
+
+static struct clockdomain pcie_7xx_clkdm = {
+	.name		  = "pcie_clkdm",
+	.pwrdm		  = { .name = "l3init_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_L3INIT_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_L3INIT_PCIE_CDOFFS,
+	.dep_bit	  = DRA7XX_PCIE_STATDEP_SHIFT,
+	.wkdep_srcs	  = pcie_wkup_sleep_deps,
+	.sleepdep_srcs	  = pcie_wkup_sleep_deps,
+	.flags		  = CLKDM_CAN_HWSUP_SWSUP,
+};
+
+static struct clockdomain atl_7xx_clkdm = {
+	.name		  = "atl_clkdm",
+	.pwrdm		  = { .name = "core_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_CORE_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_CORE_ATL_CDOFFS,
+	.dep_bit	  = DRA7XX_ATL_STATDEP_SHIFT,
+	.flags		  = CLKDM_CAN_FORCE_WAKEUP | CLKDM_CAN_HWSUP,
+};
+
+static struct clockdomain l3instr_7xx_clkdm = {
+	.name		  = "l3instr_clkdm",
+	.pwrdm		  = { .name = "core_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_CORE_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_CORE_L3INSTR_CDOFFS,
+};
+
+static struct clockdomain dss_7xx_clkdm = {
+	.name		  = "dss_clkdm",
+	.pwrdm		  = { .name = "dss_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_DSS_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_DSS_DSS_CDOFFS,
+	.dep_bit	  = DRA7XX_DSS_STATDEP_SHIFT,
+	.wkdep_srcs	  = dss_wkup_sleep_deps,
+	.sleepdep_srcs	  = dss_wkup_sleep_deps,
+	.flags		  = CLKDM_CAN_HWSUP_SWSUP,
+};
+
+static struct clockdomain emif_7xx_clkdm = {
+	.name		  = "emif_clkdm",
+	.pwrdm		  = { .name = "core_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_CORE_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_CORE_EMIF_CDOFFS,
+	.dep_bit	  = DRA7XX_EMIF_STATDEP_SHIFT,
+	.flags		  = CLKDM_CAN_FORCE_WAKEUP | CLKDM_CAN_HWSUP,
+};
+
+static struct clockdomain emu_7xx_clkdm = {
+	.name		  = "emu_clkdm",
+	.pwrdm		  = { .name = "emu_pwrdm" },
+	.prcm_partition	  = DRA7XX_PRM_PARTITION,
+	.cm_inst	  = DRA7XX_PRM_EMU_CM_INST,
+	.clkdm_offs	  = DRA7XX_PRM_EMU_CM_EMU_CDOFFS,
+	.flags		  = CLKDM_CAN_FORCE_WAKEUP | CLKDM_CAN_HWSUP,
+};
+
+static struct clockdomain dsp2_7xx_clkdm = {
+	.name		  = "dsp2_clkdm",
+	.pwrdm		  = { .name = "dsp2_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_AON_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_AON_DSP2_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_AON_DSP2_DSP2_CDOFFS,
+	.dep_bit	  = DRA7XX_DSP2_STATDEP_SHIFT,
+	.wkdep_srcs	  = dsp2_wkup_sleep_deps,
+	.sleepdep_srcs	  = dsp2_wkup_sleep_deps,
+	.flags		  = CLKDM_CAN_HWSUP_SWSUP,
+};
+
+static struct clockdomain dsp1_7xx_clkdm = {
+	.name		  = "dsp1_clkdm",
+	.pwrdm		  = { .name = "dsp1_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_AON_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_AON_DSP1_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_AON_DSP1_DSP1_CDOFFS,
+	.dep_bit	  = DRA7XX_DSP1_STATDEP_SHIFT,
+	.wkdep_srcs	  = dsp1_wkup_sleep_deps,
+	.sleepdep_srcs	  = dsp1_wkup_sleep_deps,
+	.flags		  = CLKDM_CAN_HWSUP_SWSUP,
+};
+
+static struct clockdomain cam_7xx_clkdm = {
+	.name		  = "cam_clkdm",
+	.pwrdm		  = { .name = "cam_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_CAM_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_CAM_CAM_CDOFFS,
+	.dep_bit	  = DRA7XX_CAM_STATDEP_SHIFT,
+	.wkdep_srcs	  = cam_wkup_sleep_deps,
+	.sleepdep_srcs	  = cam_wkup_sleep_deps,
+	.flags		  = CLKDM_CAN_HWSUP_SWSUP,
+};
+
+static struct clockdomain l4per_7xx_clkdm = {
+	.name		  = "l4per_clkdm",
+	.pwrdm		  = { .name = "l4per_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_L4PER_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_L4PER_L4PER_CDOFFS,
+	.dep_bit	  = DRA7XX_L4PER_STATDEP_SHIFT,
+	.flags		  = CLKDM_CAN_HWSUP_SWSUP,
+};
+
+static struct clockdomain gpu_7xx_clkdm = {
+	.name		  = "gpu_clkdm",
+	.pwrdm		  = { .name = "gpu_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_GPU_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_GPU_GPU_CDOFFS,
+	.dep_bit	  = DRA7XX_GPU_STATDEP_SHIFT,
+	.wkdep_srcs	  = gpu_wkup_sleep_deps,
+	.sleepdep_srcs	  = gpu_wkup_sleep_deps,
+	.flags		  = CLKDM_CAN_HWSUP_SWSUP,
+};
+
+static struct clockdomain eve4_7xx_clkdm = {
+	.name		  = "eve4_clkdm",
+	.pwrdm		  = { .name = "eve4_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_AON_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_AON_EVE4_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_AON_EVE4_EVE4_CDOFFS,
+	.dep_bit	  = DRA7XX_EVE4_STATDEP_SHIFT,
+	.wkdep_srcs	  = eve4_wkup_sleep_deps,
+	.sleepdep_srcs	  = eve4_wkup_sleep_deps,
+	.flags		  = CLKDM_CAN_HWSUP_SWSUP,
+};
+
+static struct clockdomain eve2_7xx_clkdm = {
+	.name		  = "eve2_clkdm",
+	.pwrdm		  = { .name = "eve2_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_AON_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_AON_EVE2_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_AON_EVE2_EVE2_CDOFFS,
+	.dep_bit	  = DRA7XX_EVE2_STATDEP_SHIFT,
+	.wkdep_srcs	  = eve2_wkup_sleep_deps,
+	.sleepdep_srcs	  = eve2_wkup_sleep_deps,
+	.flags		  = CLKDM_CAN_HWSUP_SWSUP,
+};
+
+static struct clockdomain eve3_7xx_clkdm = {
+	.name		  = "eve3_clkdm",
+	.pwrdm		  = { .name = "eve3_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_AON_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_AON_EVE3_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_AON_EVE3_EVE3_CDOFFS,
+	.dep_bit	  = DRA7XX_EVE3_STATDEP_SHIFT,
+	.wkdep_srcs	  = eve3_wkup_sleep_deps,
+	.sleepdep_srcs	  = eve3_wkup_sleep_deps,
+	.flags		  = CLKDM_CAN_HWSUP_SWSUP,
+};
+
+static struct clockdomain wkupaon_7xx_clkdm = {
+	.name		  = "wkupaon_clkdm",
+	.pwrdm		  = { .name = "wkupaon_pwrdm" },
+	.prcm_partition	  = DRA7XX_PRM_PARTITION,
+	.cm_inst	  = DRA7XX_PRM_WKUPAON_CM_INST,
+	.clkdm_offs	  = DRA7XX_PRM_WKUPAON_CM_WKUPAON_CDOFFS,
+	.dep_bit	  = DRA7XX_WKUPAON_STATDEP_SHIFT,
+	.flags		  = CLKDM_CAN_FORCE_WAKEUP | CLKDM_CAN_HWSUP,
+};
+
+static struct clockdomain eve1_7xx_clkdm = {
+	.name		  = "eve1_clkdm",
+	.pwrdm		  = { .name = "eve1_pwrdm" },
+	.prcm_partition	  = DRA7XX_CM_CORE_AON_PARTITION,
+	.cm_inst	  = DRA7XX_CM_CORE_AON_EVE1_INST,
+	.clkdm_offs	  = DRA7XX_CM_CORE_AON_EVE1_EVE1_CDOFFS,
+	.dep_bit	  = DRA7XX_EVE1_STATDEP_SHIFT,
+	.wkdep_srcs	  = eve1_wkup_sleep_deps,
+	.sleepdep_srcs	  = eve1_wkup_sleep_deps,
+	.flags		  = CLKDM_CAN_HWSUP_SWSUP,
+};
+
+/* As clockdomains are added or removed above, this list must also be changed */
+static struct clockdomain *clockdomains_dra7xx[] __initdata = {
+	&l4per3_7xx_clkdm,
+	&l4per2_7xx_clkdm,
+	&mpu0_7xx_clkdm,
+	&iva_7xx_clkdm,
+	&coreaon_7xx_clkdm,
+	&ipu1_7xx_clkdm,
+	&ipu2_7xx_clkdm,
+	&l3init_7xx_clkdm,
+	&l4sec_7xx_clkdm,
+	&l3main1_7xx_clkdm,
+	&vpe_7xx_clkdm,
+	&mpu_7xx_clkdm,
+	&custefuse_7xx_clkdm,
+	&ipu_7xx_clkdm,
+	&mpu1_7xx_clkdm,
+	&gmac_7xx_clkdm,
+	&l4cfg_7xx_clkdm,
+	&dma_7xx_clkdm,
+	&rtc_7xx_clkdm,
+	&pcie_7xx_clkdm,
+	&atl_7xx_clkdm,
+	&l3instr_7xx_clkdm,
+	&dss_7xx_clkdm,
+	&emif_7xx_clkdm,
+	&emu_7xx_clkdm,
+	&dsp2_7xx_clkdm,
+	&dsp1_7xx_clkdm,
+	&cam_7xx_clkdm,
+	&l4per_7xx_clkdm,
+	&gpu_7xx_clkdm,
+	&eve4_7xx_clkdm,
+	&eve2_7xx_clkdm,
+	&eve3_7xx_clkdm,
+	&wkupaon_7xx_clkdm,
+	&eve1_7xx_clkdm,
+	NULL
+};
+
+void __init dra7xx_clockdomains_init(void)
+{
+	clkdm_register_platform_funcs(&omap4_clkdm_operations);
+	clkdm_register_clkdms(clockdomains_dra7xx);
+	clkdm_complete_init();
+}
diff --git a/arch/arm/mach-omap2/cm-regbits-7xx.h b/arch/arm/mach-omap2/cm-regbits-7xx.h
new file mode 100644
index 0000000..ad8f81c
--- /dev/null
+++ b/arch/arm/mach-omap2/cm-regbits-7xx.h
@@ -0,0 +1,51 @@
+/*
+ * DRA7xx Clock Management register bits
+ *
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com
+ *
+ * Generated by code originally written by:
+ * Paul Walmsley (paul@pwsan.com)
+ * Rajendra Nayak (rnayak@ti.com)
+ * Benoit Cousson (b-cousson@ti.com)
+ *
+ * This file is automatically generated from the OMAP hardware databases.
+ * We respectfully ask that any modifications to this file be coordinated
+ * with the public linux-omap@vger.kernel.org mailing list and the
+ * authors above to ensure that the autogeneration scripts are kept
+ * up-to-date with the file contents.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ARCH_ARM_MACH_OMAP2_CM_REGBITS_7XX_H
+#define __ARCH_ARM_MACH_OMAP2_CM_REGBITS_7XX_H
+
+#define DRA7XX_ATL_STATDEP_SHIFT				30
+#define DRA7XX_CAM_STATDEP_SHIFT				9
+#define DRA7XX_DSP1_STATDEP_SHIFT				1
+#define DRA7XX_DSP2_STATDEP_SHIFT				18
+#define DRA7XX_DSS_STATDEP_SHIFT				8
+#define DRA7XX_EMIF_STATDEP_SHIFT				4
+#define DRA7XX_EVE1_STATDEP_SHIFT				19
+#define DRA7XX_EVE2_STATDEP_SHIFT				20
+#define DRA7XX_EVE3_STATDEP_SHIFT				21
+#define DRA7XX_EVE4_STATDEP_SHIFT				22
+#define DRA7XX_GMAC_STATDEP_SHIFT				25
+#define DRA7XX_GPU_STATDEP_SHIFT				10
+#define DRA7XX_IPU1_STATDEP_SHIFT				23
+#define DRA7XX_IPU2_STATDEP_SHIFT				0
+#define DRA7XX_IPU_STATDEP_SHIFT				24
+#define DRA7XX_IVA_STATDEP_SHIFT				2
+#define DRA7XX_L3INIT_STATDEP_SHIFT				7
+#define DRA7XX_L3MAIN1_STATDEP_SHIFT				5
+#define DRA7XX_L4CFG_STATDEP_SHIFT				12
+#define DRA7XX_L4PER2_STATDEP_SHIFT				26
+#define DRA7XX_L4PER3_STATDEP_SHIFT				27
+#define DRA7XX_L4PER_STATDEP_SHIFT				13
+#define DRA7XX_L4SEC_STATDEP_SHIFT				14
+#define DRA7XX_PCIE_STATDEP_SHIFT				29
+#define DRA7XX_VPE_STATDEP_SHIFT				28
+#define DRA7XX_WKUPAON_STATDEP_SHIFT				15
+#endif
diff --git a/arch/arm/mach-omap2/cm1_7xx.h b/arch/arm/mach-omap2/cm1_7xx.h
new file mode 100644
index 0000000..ca6fa1f
--- /dev/null
+++ b/arch/arm/mach-omap2/cm1_7xx.h
@@ -0,0 +1,324 @@
+/*
+ * DRA7xx CM1 instance offset macros
+ *
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com
+ *
+ * Generated by code originally written by:
+ * Paul Walmsley (paul@pwsan.com)
+ * Rajendra Nayak (rnayak@ti.com)
+ * Benoit Cousson (b-cousson@ti.com)
+ *
+ * This file is automatically generated from the OMAP hardware databases.
+ * We respectfully ask that any modifications to this file be coordinated
+ * with the public linux-omap@vger.kernel.org mailing list and the
+ * authors above to ensure that the autogeneration scripts are kept
+ * up-to-date with the file contents.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __ARCH_ARM_MACH_OMAP2_CM1_7XX_H
+#define __ARCH_ARM_MACH_OMAP2_CM1_7XX_H
+
+#include "cm_44xx_54xx.h"
+
+/* CM1 base address */
+#define DRA7XX_CM_CORE_AON_BASE		0x4a005000
+
+#define DRA7XX_CM_CORE_AON_REGADDR(inst, reg)				\
+	OMAP2_L4_IO_ADDRESS(DRA7XX_CM_CORE_AON_BASE + (inst) + (reg))
+
+/* CM_CORE_AON instances */
+#define DRA7XX_CM_CORE_AON_OCP_SOCKET_INST	0x0000
+#define DRA7XX_CM_CORE_AON_CKGEN_INST		0x0100
+#define DRA7XX_CM_CORE_AON_MPU_INST		0x0300
+#define DRA7XX_CM_CORE_AON_DSP1_INST		0x0400
+#define DRA7XX_CM_CORE_AON_IPU_INST		0x0500
+#define DRA7XX_CM_CORE_AON_DSP2_INST		0x0600
+#define DRA7XX_CM_CORE_AON_EVE1_INST		0x0640
+#define DRA7XX_CM_CORE_AON_EVE2_INST		0x0680
+#define DRA7XX_CM_CORE_AON_EVE3_INST		0x06c0
+#define DRA7XX_CM_CORE_AON_EVE4_INST		0x0700
+#define DRA7XX_CM_CORE_AON_RTC_INST		0x0740
+#define DRA7XX_CM_CORE_AON_VPE_INST		0x0760
+#define DRA7XX_CM_CORE_AON_RESTORE_INST		0x0e00
+#define DRA7XX_CM_CORE_AON_INSTR_INST		0x0f00
+
+/* CM_CORE_AON clockdomain register offsets (from instance start) */
+#define DRA7XX_CM_CORE_AON_MPU_MPU_CDOFFS	0x0000
+#define DRA7XX_CM_CORE_AON_DSP1_DSP1_CDOFFS	0x0000
+#define DRA7XX_CM_CORE_AON_IPU_IPU1_CDOFFS	0x0000
+#define DRA7XX_CM_CORE_AON_IPU_IPU_CDOFFS	0x0040
+#define DRA7XX_CM_CORE_AON_DSP2_DSP2_CDOFFS	0x0000
+#define DRA7XX_CM_CORE_AON_EVE1_EVE1_CDOFFS	0x0000
+#define DRA7XX_CM_CORE_AON_EVE2_EVE2_CDOFFS	0x0000
+#define DRA7XX_CM_CORE_AON_EVE3_EVE3_CDOFFS	0x0000
+#define DRA7XX_CM_CORE_AON_EVE4_EVE4_CDOFFS	0x0000
+#define DRA7XX_CM_CORE_AON_RTC_RTC_CDOFFS	0x0000
+#define DRA7XX_CM_CORE_AON_VPE_VPE_CDOFFS	0x0000
+
+/* CM_CORE_AON */
+
+/* CM_CORE_AON.OCP_SOCKET_CM_CORE_AON register offsets */
+#define DRA7XX_REVISION_CM_CORE_AON_OFFSET		0x0000
+#define DRA7XX_CM_CM_CORE_AON_PROFILING_CLKCTRL_OFFSET	0x0040
+#define DRA7XX_CM_CM_CORE_AON_PROFILING_CLKCTRL		DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_OCP_SOCKET_INST, 0x0040)
+#define DRA7XX_CM_CORE_AON_DEBUG_OUT_OFFSET		0x00ec
+#define DRA7XX_CM_CORE_AON_DEBUG_CFG0_OFFSET		0x00f0
+#define DRA7XX_CM_CORE_AON_DEBUG_CFG1_OFFSET		0x00f4
+#define DRA7XX_CM_CORE_AON_DEBUG_CFG2_OFFSET		0x00f8
+#define DRA7XX_CM_CORE_AON_DEBUG_CFG3_OFFSET		0x00fc
+
+/* CM_CORE_AON.CKGEN_CM_CORE_AON register offsets */
+#define DRA7XX_CM_CLKSEL_CORE_OFFSET			0x0000
+#define DRA7XX_CM_CLKSEL_CORE				DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0000)
+#define DRA7XX_CM_CLKSEL_ABE_OFFSET			0x0008
+#define DRA7XX_CM_CLKSEL_ABE				DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0008)
+#define DRA7XX_CM_DLL_CTRL_OFFSET			0x0010
+#define DRA7XX_CM_CLKMODE_DPLL_CORE_OFFSET		0x0020
+#define DRA7XX_CM_CLKMODE_DPLL_CORE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0020)
+#define DRA7XX_CM_IDLEST_DPLL_CORE_OFFSET		0x0024
+#define DRA7XX_CM_IDLEST_DPLL_CORE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0024)
+#define DRA7XX_CM_AUTOIDLE_DPLL_CORE_OFFSET		0x0028
+#define DRA7XX_CM_AUTOIDLE_DPLL_CORE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0028)
+#define DRA7XX_CM_CLKSEL_DPLL_CORE_OFFSET		0x002c
+#define DRA7XX_CM_CLKSEL_DPLL_CORE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x002c)
+#define DRA7XX_CM_DIV_M2_DPLL_CORE_OFFSET		0x0030
+#define DRA7XX_CM_DIV_M2_DPLL_CORE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0030)
+#define DRA7XX_CM_DIV_M3_DPLL_CORE_OFFSET		0x0034
+#define DRA7XX_CM_DIV_M3_DPLL_CORE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0034)
+#define DRA7XX_CM_DIV_H11_DPLL_CORE_OFFSET		0x0038
+#define DRA7XX_CM_DIV_H11_DPLL_CORE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0038)
+#define DRA7XX_CM_DIV_H12_DPLL_CORE_OFFSET		0x003c
+#define DRA7XX_CM_DIV_H12_DPLL_CORE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x003c)
+#define DRA7XX_CM_DIV_H13_DPLL_CORE_OFFSET		0x0040
+#define DRA7XX_CM_DIV_H13_DPLL_CORE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0040)
+#define DRA7XX_CM_DIV_H14_DPLL_CORE_OFFSET		0x0044
+#define DRA7XX_CM_DIV_H14_DPLL_CORE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0044)
+#define DRA7XX_CM_SSC_DELTAMSTEP_DPLL_CORE_OFFSET	0x0048
+#define DRA7XX_CM_SSC_MODFREQDIV_DPLL_CORE_OFFSET	0x004c
+#define DRA7XX_CM_DIV_H21_DPLL_CORE_OFFSET		0x0050
+#define DRA7XX_CM_DIV_H21_DPLL_CORE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0050)
+#define DRA7XX_CM_DIV_H22_DPLL_CORE_OFFSET		0x0054
+#define DRA7XX_CM_DIV_H22_DPLL_CORE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0054)
+#define DRA7XX_CM_DIV_H23_DPLL_CORE_OFFSET		0x0058
+#define DRA7XX_CM_DIV_H23_DPLL_CORE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0058)
+#define DRA7XX_CM_DIV_H24_DPLL_CORE_OFFSET		0x005c
+#define DRA7XX_CM_DIV_H24_DPLL_CORE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x005c)
+#define DRA7XX_CM_CLKMODE_DPLL_MPU_OFFSET		0x0060
+#define DRA7XX_CM_CLKMODE_DPLL_MPU			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0060)
+#define DRA7XX_CM_IDLEST_DPLL_MPU_OFFSET		0x0064
+#define DRA7XX_CM_IDLEST_DPLL_MPU			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0064)
+#define DRA7XX_CM_AUTOIDLE_DPLL_MPU_OFFSET		0x0068
+#define DRA7XX_CM_AUTOIDLE_DPLL_MPU			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0068)
+#define DRA7XX_CM_CLKSEL_DPLL_MPU_OFFSET		0x006c
+#define DRA7XX_CM_CLKSEL_DPLL_MPU			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x006c)
+#define DRA7XX_CM_DIV_M2_DPLL_MPU_OFFSET		0x0070
+#define DRA7XX_CM_DIV_M2_DPLL_MPU			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0070)
+#define DRA7XX_CM_SSC_DELTAMSTEP_DPLL_MPU_OFFSET	0x0088
+#define DRA7XX_CM_SSC_MODFREQDIV_DPLL_MPU_OFFSET	0x008c
+#define DRA7XX_CM_BYPCLK_DPLL_MPU_OFFSET		0x009c
+#define DRA7XX_CM_BYPCLK_DPLL_MPU			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x009c)
+#define DRA7XX_CM_CLKMODE_DPLL_IVA_OFFSET		0x00a0
+#define DRA7XX_CM_CLKMODE_DPLL_IVA			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x00a0)
+#define DRA7XX_CM_IDLEST_DPLL_IVA_OFFSET		0x00a4
+#define DRA7XX_CM_IDLEST_DPLL_IVA			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x00a4)
+#define DRA7XX_CM_AUTOIDLE_DPLL_IVA_OFFSET		0x00a8
+#define DRA7XX_CM_AUTOIDLE_DPLL_IVA			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x00a8)
+#define DRA7XX_CM_CLKSEL_DPLL_IVA_OFFSET		0x00ac
+#define DRA7XX_CM_CLKSEL_DPLL_IVA			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x00ac)
+#define DRA7XX_CM_DIV_M2_DPLL_IVA_OFFSET		0x00b0
+#define DRA7XX_CM_DIV_M2_DPLL_IVA			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x00b0)
+#define DRA7XX_CM_DIV_M3_DPLL_IVA_OFFSET		0x00b4
+#define DRA7XX_CM_DIV_M3_DPLL_IVA			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x00b4)
+#define DRA7XX_CM_SSC_DELTAMSTEP_DPLL_IVA_OFFSET	0x00c8
+#define DRA7XX_CM_SSC_MODFREQDIV_DPLL_IVA_OFFSET	0x00cc
+#define DRA7XX_CM_BYPCLK_DPLL_IVA_OFFSET		0x00dc
+#define DRA7XX_CM_BYPCLK_DPLL_IVA			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x00dc)
+#define DRA7XX_CM_CLKMODE_DPLL_ABE_OFFSET		0x00e0
+#define DRA7XX_CM_CLKMODE_DPLL_ABE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x00e0)
+#define DRA7XX_CM_IDLEST_DPLL_ABE_OFFSET		0x00e4
+#define DRA7XX_CM_IDLEST_DPLL_ABE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x00e4)
+#define DRA7XX_CM_AUTOIDLE_DPLL_ABE_OFFSET		0x00e8
+#define DRA7XX_CM_AUTOIDLE_DPLL_ABE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x00e8)
+#define DRA7XX_CM_CLKSEL_DPLL_ABE_OFFSET		0x00ec
+#define DRA7XX_CM_CLKSEL_DPLL_ABE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x00ec)
+#define DRA7XX_CM_DIV_M2_DPLL_ABE_OFFSET		0x00f0
+#define DRA7XX_CM_DIV_M2_DPLL_ABE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x00f0)
+#define DRA7XX_CM_DIV_M3_DPLL_ABE_OFFSET		0x00f4
+#define DRA7XX_CM_DIV_M3_DPLL_ABE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x00f4)
+#define DRA7XX_CM_SSC_DELTAMSTEP_DPLL_ABE_OFFSET	0x0108
+#define DRA7XX_CM_SSC_MODFREQDIV_DPLL_ABE_OFFSET	0x010c
+#define DRA7XX_CM_CLKMODE_DPLL_DDR_OFFSET		0x0110
+#define DRA7XX_CM_CLKMODE_DPLL_DDR			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0110)
+#define DRA7XX_CM_IDLEST_DPLL_DDR_OFFSET		0x0114
+#define DRA7XX_CM_IDLEST_DPLL_DDR			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0114)
+#define DRA7XX_CM_AUTOIDLE_DPLL_DDR_OFFSET		0x0118
+#define DRA7XX_CM_AUTOIDLE_DPLL_DDR			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0118)
+#define DRA7XX_CM_CLKSEL_DPLL_DDR_OFFSET		0x011c
+#define DRA7XX_CM_CLKSEL_DPLL_DDR			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x011c)
+#define DRA7XX_CM_DIV_M2_DPLL_DDR_OFFSET		0x0120
+#define DRA7XX_CM_DIV_M2_DPLL_DDR			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0120)
+#define DRA7XX_CM_DIV_M3_DPLL_DDR_OFFSET		0x0124
+#define DRA7XX_CM_DIV_M3_DPLL_DDR			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0124)
+#define DRA7XX_CM_DIV_H11_DPLL_DDR_OFFSET		0x0128
+#define DRA7XX_CM_DIV_H11_DPLL_DDR			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0128)
+#define DRA7XX_CM_SSC_DELTAMSTEP_DPLL_DDR_OFFSET	0x012c
+#define DRA7XX_CM_SSC_MODFREQDIV_DPLL_DDR_OFFSET	0x0130
+#define DRA7XX_CM_CLKMODE_DPLL_DSP_OFFSET		0x0134
+#define DRA7XX_CM_CLKMODE_DPLL_DSP			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0134)
+#define DRA7XX_CM_IDLEST_DPLL_DSP_OFFSET		0x0138
+#define DRA7XX_CM_IDLEST_DPLL_DSP			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0138)
+#define DRA7XX_CM_AUTOIDLE_DPLL_DSP_OFFSET		0x013c
+#define DRA7XX_CM_AUTOIDLE_DPLL_DSP			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x013c)
+#define DRA7XX_CM_CLKSEL_DPLL_DSP_OFFSET		0x0140
+#define DRA7XX_CM_CLKSEL_DPLL_DSP			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0140)
+#define DRA7XX_CM_DIV_M2_DPLL_DSP_OFFSET		0x0144
+#define DRA7XX_CM_DIV_M2_DPLL_DSP			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0144)
+#define DRA7XX_CM_DIV_M3_DPLL_DSP_OFFSET		0x0148
+#define DRA7XX_CM_DIV_M3_DPLL_DSP			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0148)
+#define DRA7XX_CM_SSC_DELTAMSTEP_DPLL_DSP_OFFSET	0x014c
+#define DRA7XX_CM_SSC_MODFREQDIV_DPLL_DSP_OFFSET	0x0150
+#define DRA7XX_CM_BYPCLK_DPLL_DSP_OFFSET		0x0154
+#define DRA7XX_CM_BYPCLK_DPLL_DSP			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0154)
+#define DRA7XX_CM_SHADOW_FREQ_CONFIG1_OFFSET		0x0160
+#define DRA7XX_CM_SHADOW_FREQ_CONFIG2_OFFSET		0x0164
+#define DRA7XX_CM_DYN_DEP_PRESCAL_OFFSET		0x0170
+#define DRA7XX_CM_RESTORE_ST_OFFSET			0x0180
+#define DRA7XX_CM_CLKMODE_DPLL_EVE_OFFSET		0x0184
+#define DRA7XX_CM_CLKMODE_DPLL_EVE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0184)
+#define DRA7XX_CM_IDLEST_DPLL_EVE_OFFSET		0x0188
+#define DRA7XX_CM_IDLEST_DPLL_EVE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0188)
+#define DRA7XX_CM_AUTOIDLE_DPLL_EVE_OFFSET		0x018c
+#define DRA7XX_CM_AUTOIDLE_DPLL_EVE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x018c)
+#define DRA7XX_CM_CLKSEL_DPLL_EVE_OFFSET		0x0190
+#define DRA7XX_CM_CLKSEL_DPLL_EVE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0190)
+#define DRA7XX_CM_DIV_M2_DPLL_EVE_OFFSET		0x0194
+#define DRA7XX_CM_DIV_M2_DPLL_EVE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0194)
+#define DRA7XX_CM_DIV_M3_DPLL_EVE_OFFSET		0x0198
+#define DRA7XX_CM_DIV_M3_DPLL_EVE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x0198)
+#define DRA7XX_CM_SSC_DELTAMSTEP_DPLL_EVE_OFFSET	0x019c
+#define DRA7XX_CM_SSC_MODFREQDIV_DPLL_EVE_OFFSET	0x01a0
+#define DRA7XX_CM_BYPCLK_DPLL_EVE_OFFSET		0x01a4
+#define DRA7XX_CM_BYPCLK_DPLL_EVE			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x01a4)
+#define DRA7XX_CM_CLKMODE_DPLL_GMAC_OFFSET		0x01a8
+#define DRA7XX_CM_CLKMODE_DPLL_GMAC			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x01a8)
+#define DRA7XX_CM_IDLEST_DPLL_GMAC_OFFSET		0x01ac
+#define DRA7XX_CM_IDLEST_DPLL_GMAC			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x01ac)
+#define DRA7XX_CM_AUTOIDLE_DPLL_GMAC_OFFSET		0x01b0
+#define DRA7XX_CM_AUTOIDLE_DPLL_GMAC			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x01b0)
+#define DRA7XX_CM_CLKSEL_DPLL_GMAC_OFFSET		0x01b4
+#define DRA7XX_CM_CLKSEL_DPLL_GMAC			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x01b4)
+#define DRA7XX_CM_DIV_M2_DPLL_GMAC_OFFSET		0x01b8
+#define DRA7XX_CM_DIV_M2_DPLL_GMAC			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x01b8)
+#define DRA7XX_CM_DIV_M3_DPLL_GMAC_OFFSET		0x01bc
+#define DRA7XX_CM_DIV_M3_DPLL_GMAC			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x01bc)
+#define DRA7XX_CM_DIV_H11_DPLL_GMAC_OFFSET		0x01c0
+#define DRA7XX_CM_DIV_H11_DPLL_GMAC			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x01c0)
+#define DRA7XX_CM_DIV_H12_DPLL_GMAC_OFFSET		0x01c4
+#define DRA7XX_CM_DIV_H12_DPLL_GMAC			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x01c4)
+#define DRA7XX_CM_DIV_H13_DPLL_GMAC_OFFSET		0x01c8
+#define DRA7XX_CM_DIV_H13_DPLL_GMAC			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x01c8)
+#define DRA7XX_CM_DIV_H14_DPLL_GMAC_OFFSET		0x01cc
+#define DRA7XX_CM_DIV_H14_DPLL_GMAC			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x01cc)
+#define DRA7XX_CM_SSC_DELTAMSTEP_DPLL_GMAC_OFFSET	0x01d0
+#define DRA7XX_CM_SSC_MODFREQDIV_DPLL_GMAC_OFFSET	0x01d4
+#define DRA7XX_CM_CLKMODE_DPLL_GPU_OFFSET		0x01d8
+#define DRA7XX_CM_CLKMODE_DPLL_GPU			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x01d8)
+#define DRA7XX_CM_IDLEST_DPLL_GPU_OFFSET		0x01dc
+#define DRA7XX_CM_IDLEST_DPLL_GPU			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x01dc)
+#define DRA7XX_CM_AUTOIDLE_DPLL_GPU_OFFSET		0x01e0
+#define DRA7XX_CM_AUTOIDLE_DPLL_GPU			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x01e0)
+#define DRA7XX_CM_CLKSEL_DPLL_GPU_OFFSET		0x01e4
+#define DRA7XX_CM_CLKSEL_DPLL_GPU			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x01e4)
+#define DRA7XX_CM_DIV_M2_DPLL_GPU_OFFSET		0x01e8
+#define DRA7XX_CM_DIV_M2_DPLL_GPU			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x01e8)
+#define DRA7XX_CM_DIV_M3_DPLL_GPU_OFFSET		0x01ec
+#define DRA7XX_CM_DIV_M3_DPLL_GPU			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_CKGEN_INST, 0x01ec)
+#define DRA7XX_CM_SSC_DELTAMSTEP_DPLL_GPU_OFFSET	0x01f0
+#define DRA7XX_CM_SSC_MODFREQDIV_DPLL_GPU_OFFSET	0x01f4
+
+/* CM_CORE_AON.MPU_CM_CORE_AON register offsets */
+#define DRA7XX_CM_MPU_CLKSTCTRL_OFFSET			0x0000
+#define DRA7XX_CM_MPU_STATICDEP_OFFSET			0x0004
+#define DRA7XX_CM_MPU_DYNAMICDEP_OFFSET			0x0008
+#define DRA7XX_CM_MPU_MPU_CLKCTRL_OFFSET		0x0020
+#define DRA7XX_CM_MPU_MPU_CLKCTRL			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_MPU_INST, 0x0020)
+#define DRA7XX_CM_MPU_MPU_MPU_DBG_CLKCTRL_OFFSET	0x0028
+#define DRA7XX_CM_MPU_MPU_MPU_DBG_CLKCTRL		DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_MPU_INST, 0x0028)
+
+/* CM_CORE_AON.DSP1_CM_CORE_AON register offsets */
+#define DRA7XX_CM_DSP1_CLKSTCTRL_OFFSET			0x0000
+#define DRA7XX_CM_DSP1_STATICDEP_OFFSET			0x0004
+#define DRA7XX_CM_DSP1_DYNAMICDEP_OFFSET		0x0008
+#define DRA7XX_CM_DSP1_DSP1_CLKCTRL_OFFSET		0x0020
+#define DRA7XX_CM_DSP1_DSP1_CLKCTRL			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_DSP1_INST, 0x0020)
+
+/* CM_CORE_AON.IPU_CM_CORE_AON register offsets */
+#define DRA7XX_CM_IPU1_CLKSTCTRL_OFFSET			0x0000
+#define DRA7XX_CM_IPU1_STATICDEP_OFFSET			0x0004
+#define DRA7XX_CM_IPU1_DYNAMICDEP_OFFSET		0x0008
+#define DRA7XX_CM_IPU1_IPU1_CLKCTRL_OFFSET		0x0020
+#define DRA7XX_CM_IPU1_IPU1_CLKCTRL			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_IPU_INST, 0x0020)
+#define DRA7XX_CM_IPU_CLKSTCTRL_OFFSET			0x0040
+#define DRA7XX_CM_IPU_MCASP1_CLKCTRL_OFFSET		0x0050
+#define DRA7XX_CM_IPU_MCASP1_CLKCTRL			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_IPU_INST, 0x0050)
+#define DRA7XX_CM_IPU_TIMER5_CLKCTRL_OFFSET		0x0058
+#define DRA7XX_CM_IPU_TIMER5_CLKCTRL			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_IPU_INST, 0x0058)
+#define DRA7XX_CM_IPU_TIMER6_CLKCTRL_OFFSET		0x0060
+#define DRA7XX_CM_IPU_TIMER6_CLKCTRL			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_IPU_INST, 0x0060)
+#define DRA7XX_CM_IPU_TIMER7_CLKCTRL_OFFSET		0x0068
+#define DRA7XX_CM_IPU_TIMER7_CLKCTRL			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_IPU_INST, 0x0068)
+#define DRA7XX_CM_IPU_TIMER8_CLKCTRL_OFFSET		0x0070
+#define DRA7XX_CM_IPU_TIMER8_CLKCTRL			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_IPU_INST, 0x0070)
+#define DRA7XX_CM_IPU_I2C5_CLKCTRL_OFFSET		0x0078
+#define DRA7XX_CM_IPU_I2C5_CLKCTRL			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_IPU_INST, 0x0078)
+#define DRA7XX_CM_IPU_UART6_CLKCTRL_OFFSET		0x0080
+#define DRA7XX_CM_IPU_UART6_CLKCTRL			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_IPU_INST, 0x0080)
+
+/* CM_CORE_AON.DSP2_CM_CORE_AON register offsets */
+#define DRA7XX_CM_DSP2_CLKSTCTRL_OFFSET			0x0000
+#define DRA7XX_CM_DSP2_STATICDEP_OFFSET			0x0004
+#define DRA7XX_CM_DSP2_DYNAMICDEP_OFFSET		0x0008
+#define DRA7XX_CM_DSP2_DSP2_CLKCTRL_OFFSET		0x0020
+#define DRA7XX_CM_DSP2_DSP2_CLKCTRL			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_DSP2_INST, 0x0020)
+
+/* CM_CORE_AON.EVE1_CM_CORE_AON register offsets */
+#define DRA7XX_CM_EVE1_CLKSTCTRL_OFFSET			0x0000
+#define DRA7XX_CM_EVE1_STATICDEP_OFFSET			0x0004
+#define DRA7XX_CM_EVE1_EVE1_CLKCTRL_OFFSET		0x0020
+#define DRA7XX_CM_EVE1_EVE1_CLKCTRL			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_EVE1_INST, 0x0020)
+
+/* CM_CORE_AON.EVE2_CM_CORE_AON register offsets */
+#define DRA7XX_CM_EVE2_CLKSTCTRL_OFFSET			0x0000
+#define DRA7XX_CM_EVE2_STATICDEP_OFFSET			0x0004
+#define DRA7XX_CM_EVE2_EVE2_CLKCTRL_OFFSET		0x0020
+#define DRA7XX_CM_EVE2_EVE2_CLKCTRL			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_EVE2_INST, 0x0020)
+
+/* CM_CORE_AON.EVE3_CM_CORE_AON register offsets */
+#define DRA7XX_CM_EVE3_CLKSTCTRL_OFFSET			0x0000
+#define DRA7XX_CM_EVE3_STATICDEP_OFFSET			0x0004
+#define DRA7XX_CM_EVE3_EVE3_CLKCTRL_OFFSET		0x0020
+#define DRA7XX_CM_EVE3_EVE3_CLKCTRL			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_EVE3_INST, 0x0020)
+
+/* CM_CORE_AON.EVE4_CM_CORE_AON register offsets */
+#define DRA7XX_CM_EVE4_CLKSTCTRL_OFFSET			0x0000
+#define DRA7XX_CM_EVE4_STATICDEP_OFFSET			0x0004
+#define DRA7XX_CM_EVE4_EVE4_CLKCTRL_OFFSET		0x0020
+#define DRA7XX_CM_EVE4_EVE4_CLKCTRL			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_EVE4_INST, 0x0020)
+
+/* CM_CORE_AON.RTC_CM_CORE_AON register offsets */
+#define DRA7XX_CM_RTC_CLKSTCTRL_OFFSET			0x0000
+#define DRA7XX_CM_RTC_RTCSS_CLKCTRL_OFFSET		0x0004
+#define DRA7XX_CM_RTC_RTCSS_CLKCTRL			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_RTC_INST, 0x0004)
+
+/* CM_CORE_AON.VPE_CM_CORE_AON register offsets */
+#define DRA7XX_CM_VPE_CLKSTCTRL_OFFSET			0x0000
+#define DRA7XX_CM_VPE_VPE_CLKCTRL_OFFSET		0x0004
+#define DRA7XX_CM_VPE_VPE_CLKCTRL			DRA7XX_CM_CORE_AON_REGADDR(DRA7XX_CM_CORE_AON_VPE_INST, 0x0004)
+#define DRA7XX_CM_VPE_STATICDEP_OFFSET			0x0008
+
+#endif
diff --git a/arch/arm/mach-omap2/cm2_7xx.h b/arch/arm/mach-omap2/cm2_7xx.h
new file mode 100644
index 0000000..9ad7594
--- /dev/null
+++ b/arch/arm/mach-omap2/cm2_7xx.h
@@ -0,0 +1,513 @@
+/*
+ * DRA7xx CM2 instance offset macros
+ *
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com
+ *
+ * Generated by code originally written by:
+ * Paul Walmsley (paul@pwsan.com)
+ * Rajendra Nayak (rnayak@ti.com)
+ * Benoit Cousson (b-cousson@ti.com)
+ *
+ * This file is automatically generated from the OMAP hardware databases.
+ * We respectfully ask that any modifications to this file be coordinated
+ * with the public linux-omap@vger.kernel.org mailing list and the
+ * authors above to ensure that the autogeneration scripts are kept
+ * up-to-date with the file contents.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ARCH_ARM_MACH_OMAP2_CM2_7XX_H
+#define __ARCH_ARM_MACH_OMAP2_CM2_7XX_H
+
+#include "cm_44xx_54xx.h"
+
+/* CM2 base address */
+#define DRA7XX_CM_CORE_BASE		0x4a008000
+
+#define DRA7XX_CM_CORE_REGADDR(inst, reg)				\
+	OMAP2_L4_IO_ADDRESS(DRA7XX_CM_CORE_BASE + (inst) + (reg))
+
+/* CM_CORE instances */
+#define DRA7XX_CM_CORE_OCP_SOCKET_INST	0x0000
+#define DRA7XX_CM_CORE_CKGEN_INST	0x0104
+#define DRA7XX_CM_CORE_COREAON_INST	0x0600
+#define DRA7XX_CM_CORE_CORE_INST	0x0700
+#define DRA7XX_CM_CORE_IVA_INST		0x0f00
+#define DRA7XX_CM_CORE_CAM_INST		0x1000
+#define DRA7XX_CM_CORE_DSS_INST		0x1100
+#define DRA7XX_CM_CORE_GPU_INST		0x1200
+#define DRA7XX_CM_CORE_L3INIT_INST	0x1300
+#define DRA7XX_CM_CORE_CUSTEFUSE_INST	0x1600
+#define DRA7XX_CM_CORE_L4PER_INST	0x1700
+#define DRA7XX_CM_CORE_RESTORE_INST	0x1e18
+
+/* CM_CORE clockdomain register offsets (from instance start) */
+#define DRA7XX_CM_CORE_COREAON_COREAON_CDOFFS		0x0000
+#define DRA7XX_CM_CORE_CORE_L3MAIN1_CDOFFS		0x0000
+#define DRA7XX_CM_CORE_CORE_IPU2_CDOFFS			0x0200
+#define DRA7XX_CM_CORE_CORE_DMA_CDOFFS			0x0300
+#define DRA7XX_CM_CORE_CORE_EMIF_CDOFFS			0x0400
+#define DRA7XX_CM_CORE_CORE_ATL_CDOFFS			0x0520
+#define DRA7XX_CM_CORE_CORE_L4CFG_CDOFFS		0x0600
+#define DRA7XX_CM_CORE_CORE_L3INSTR_CDOFFS		0x0700
+#define DRA7XX_CM_CORE_IVA_IVA_CDOFFS			0x0000
+#define DRA7XX_CM_CORE_CAM_CAM_CDOFFS			0x0000
+#define DRA7XX_CM_CORE_DSS_DSS_CDOFFS			0x0000
+#define DRA7XX_CM_CORE_GPU_GPU_CDOFFS			0x0000
+#define DRA7XX_CM_CORE_L3INIT_L3INIT_CDOFFS		0x0000
+#define DRA7XX_CM_CORE_L3INIT_PCIE_CDOFFS		0x00a0
+#define DRA7XX_CM_CORE_L3INIT_GMAC_CDOFFS		0x00c0
+#define DRA7XX_CM_CORE_CUSTEFUSE_CUSTEFUSE_CDOFFS	0x0000
+#define DRA7XX_CM_CORE_L4PER_L4PER_CDOFFS		0x0000
+#define DRA7XX_CM_CORE_L4PER_L4SEC_CDOFFS		0x0180
+#define DRA7XX_CM_CORE_L4PER_L4PER2_CDOFFS		0x01fc
+#define DRA7XX_CM_CORE_L4PER_L4PER3_CDOFFS		0x0210
+
+/* CM_CORE */
+
+/* CM_CORE.OCP_SOCKET_CM_CORE register offsets */
+#define DRA7XX_REVISION_CM_CORE_OFFSET				0x0000
+#define DRA7XX_CM_CM_CORE_PROFILING_CLKCTRL_OFFSET		0x0040
+#define DRA7XX_CM_CM_CORE_PROFILING_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_OCP_SOCKET_INST, 0x0040)
+#define DRA7XX_CM_CORE_DEBUG_CFG_OFFSET				0x00f0
+
+/* CM_CORE.CKGEN_CM_CORE register offsets */
+#define DRA7XX_CM_CLKSEL_USB_60MHZ_OFFSET			0x0000
+#define DRA7XX_CM_CLKSEL_USB_60MHZ				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x0000)
+#define DRA7XX_CM_CLKMODE_DPLL_PER_OFFSET			0x003c
+#define DRA7XX_CM_CLKMODE_DPLL_PER				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x003c)
+#define DRA7XX_CM_IDLEST_DPLL_PER_OFFSET			0x0040
+#define DRA7XX_CM_IDLEST_DPLL_PER				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x0040)
+#define DRA7XX_CM_AUTOIDLE_DPLL_PER_OFFSET			0x0044
+#define DRA7XX_CM_AUTOIDLE_DPLL_PER				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x0044)
+#define DRA7XX_CM_CLKSEL_DPLL_PER_OFFSET			0x0048
+#define DRA7XX_CM_CLKSEL_DPLL_PER				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x0048)
+#define DRA7XX_CM_DIV_M2_DPLL_PER_OFFSET			0x004c
+#define DRA7XX_CM_DIV_M2_DPLL_PER				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x004c)
+#define DRA7XX_CM_DIV_M3_DPLL_PER_OFFSET			0x0050
+#define DRA7XX_CM_DIV_M3_DPLL_PER				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x0050)
+#define DRA7XX_CM_DIV_H11_DPLL_PER_OFFSET			0x0054
+#define DRA7XX_CM_DIV_H11_DPLL_PER				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x0054)
+#define DRA7XX_CM_DIV_H12_DPLL_PER_OFFSET			0x0058
+#define DRA7XX_CM_DIV_H12_DPLL_PER				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x0058)
+#define DRA7XX_CM_DIV_H13_DPLL_PER_OFFSET			0x005c
+#define DRA7XX_CM_DIV_H13_DPLL_PER				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x005c)
+#define DRA7XX_CM_DIV_H14_DPLL_PER_OFFSET			0x0060
+#define DRA7XX_CM_DIV_H14_DPLL_PER				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x0060)
+#define DRA7XX_CM_SSC_DELTAMSTEP_DPLL_PER_OFFSET		0x0064
+#define DRA7XX_CM_SSC_MODFREQDIV_DPLL_PER_OFFSET		0x0068
+#define DRA7XX_CM_CLKMODE_DPLL_USB_OFFSET			0x007c
+#define DRA7XX_CM_CLKMODE_DPLL_USB				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x007c)
+#define DRA7XX_CM_IDLEST_DPLL_USB_OFFSET			0x0080
+#define DRA7XX_CM_IDLEST_DPLL_USB				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x0080)
+#define DRA7XX_CM_AUTOIDLE_DPLL_USB_OFFSET			0x0084
+#define DRA7XX_CM_AUTOIDLE_DPLL_USB				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x0084)
+#define DRA7XX_CM_CLKSEL_DPLL_USB_OFFSET			0x0088
+#define DRA7XX_CM_CLKSEL_DPLL_USB				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x0088)
+#define DRA7XX_CM_DIV_M2_DPLL_USB_OFFSET			0x008c
+#define DRA7XX_CM_DIV_M2_DPLL_USB				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x008c)
+#define DRA7XX_CM_SSC_DELTAMSTEP_DPLL_USB_OFFSET		0x00a4
+#define DRA7XX_CM_SSC_MODFREQDIV_DPLL_USB_OFFSET		0x00a8
+#define DRA7XX_CM_CLKDCOLDO_DPLL_USB_OFFSET			0x00b0
+#define DRA7XX_CM_CLKDCOLDO_DPLL_USB				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x00b0)
+#define DRA7XX_CM_CLKMODE_DPLL_PCIE_REF_OFFSET			0x00fc
+#define DRA7XX_CM_CLKMODE_DPLL_PCIE_REF				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x00fc)
+#define DRA7XX_CM_IDLEST_DPLL_PCIE_REF_OFFSET			0x0100
+#define DRA7XX_CM_IDLEST_DPLL_PCIE_REF				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x0100)
+#define DRA7XX_CM_AUTOIDLE_DPLL_PCIE_REF_OFFSET			0x0104
+#define DRA7XX_CM_AUTOIDLE_DPLL_PCIE_REF			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x0104)
+#define DRA7XX_CM_CLKSEL_DPLL_PCIE_REF_OFFSET			0x0108
+#define DRA7XX_CM_CLKSEL_DPLL_PCIE_REF				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x0108)
+#define DRA7XX_CM_DIV_M2_DPLL_PCIE_REF_OFFSET			0x010c
+#define DRA7XX_CM_DIV_M2_DPLL_PCIE_REF				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x010c)
+#define DRA7XX_CM_SSC_DELTAMSTEP_DPLL_PCIE_REF_OFFSET		0x0110
+#define DRA7XX_CM_SSC_MODFREQDIV_DPLL_PCIE_REF_OFFSET		0x0114
+#define DRA7XX_CM_CLKMODE_APLL_PCIE_OFFSET			0x0118
+#define DRA7XX_CM_CLKMODE_APLL_PCIE				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x0118)
+#define DRA7XX_CM_IDLEST_APLL_PCIE_OFFSET			0x011c
+#define DRA7XX_CM_IDLEST_APLL_PCIE				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x011c)
+#define DRA7XX_CM_DIV_M2_APLL_PCIE_OFFSET			0x0120
+#define DRA7XX_CM_DIV_M2_APLL_PCIE				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x0120)
+#define DRA7XX_CM_CLKVCOLDO_APLL_PCIE_OFFSET			0x0124
+#define DRA7XX_CM_CLKVCOLDO_APLL_PCIE				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CKGEN_INST, 0x0124)
+
+/* CM_CORE.COREAON_CM_CORE register offsets */
+#define DRA7XX_CM_COREAON_CLKSTCTRL_OFFSET			0x0000
+#define DRA7XX_CM_COREAON_SMARTREFLEX_MPU_CLKCTRL_OFFSET	0x0028
+#define DRA7XX_CM_COREAON_SMARTREFLEX_MPU_CLKCTRL		DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_COREAON_INST, 0x0028)
+#define DRA7XX_CM_COREAON_SMARTREFLEX_CORE_CLKCTRL_OFFSET	0x0038
+#define DRA7XX_CM_COREAON_SMARTREFLEX_CORE_CLKCTRL		DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_COREAON_INST, 0x0038)
+#define DRA7XX_CM_COREAON_USB_PHY1_CORE_CLKCTRL_OFFSET		0x0040
+#define DRA7XX_CM_COREAON_USB_PHY1_CORE_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_COREAON_INST, 0x0040)
+#define DRA7XX_CM_COREAON_IO_SRCOMP_CLKCTRL_OFFSET		0x0050
+#define DRA7XX_CM_COREAON_IO_SRCOMP_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_COREAON_INST, 0x0050)
+#define DRA7XX_CM_COREAON_SMARTREFLEX_GPU_CLKCTRL_OFFSET	0x0058
+#define DRA7XX_CM_COREAON_SMARTREFLEX_GPU_CLKCTRL		DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_COREAON_INST, 0x0058)
+#define DRA7XX_CM_COREAON_SMARTREFLEX_DSPEVE_CLKCTRL_OFFSET	0x0068
+#define DRA7XX_CM_COREAON_SMARTREFLEX_DSPEVE_CLKCTRL		DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_COREAON_INST, 0x0068)
+#define DRA7XX_CM_COREAON_SMARTREFLEX_IVAHD_CLKCTRL_OFFSET	0x0078
+#define DRA7XX_CM_COREAON_SMARTREFLEX_IVAHD_CLKCTRL		DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_COREAON_INST, 0x0078)
+#define DRA7XX_CM_COREAON_USB_PHY2_CORE_CLKCTRL_OFFSET		0x0088
+#define DRA7XX_CM_COREAON_USB_PHY2_CORE_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_COREAON_INST, 0x0088)
+#define DRA7XX_CM_COREAON_USB_PHY3_CORE_CLKCTRL_OFFSET		0x0098
+#define DRA7XX_CM_COREAON_USB_PHY3_CORE_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_COREAON_INST, 0x0098)
+#define DRA7XX_CM_COREAON_DUMMY_MODULE1_CLKCTRL_OFFSET		0x00a0
+#define DRA7XX_CM_COREAON_DUMMY_MODULE1_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_COREAON_INST, 0x00a0)
+#define DRA7XX_CM_COREAON_DUMMY_MODULE2_CLKCTRL_OFFSET		0x00b0
+#define DRA7XX_CM_COREAON_DUMMY_MODULE2_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_COREAON_INST, 0x00b0)
+#define DRA7XX_CM_COREAON_DUMMY_MODULE3_CLKCTRL_OFFSET		0x00c0
+#define DRA7XX_CM_COREAON_DUMMY_MODULE3_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_COREAON_INST, 0x00c0)
+#define DRA7XX_CM_COREAON_DUMMY_MODULE4_CLKCTRL_OFFSET		0x00d0
+#define DRA7XX_CM_COREAON_DUMMY_MODULE4_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_COREAON_INST, 0x00d0)
+
+/* CM_CORE.CORE_CM_CORE register offsets */
+#define DRA7XX_CM_L3MAIN1_CLKSTCTRL_OFFSET			0x0000
+#define DRA7XX_CM_L3MAIN1_DYNAMICDEP_OFFSET			0x0008
+#define DRA7XX_CM_L3MAIN1_L3_MAIN_1_CLKCTRL_OFFSET		0x0020
+#define DRA7XX_CM_L3MAIN1_L3_MAIN_1_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0020)
+#define DRA7XX_CM_L3MAIN1_GPMC_CLKCTRL_OFFSET			0x0028
+#define DRA7XX_CM_L3MAIN1_GPMC_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0028)
+#define DRA7XX_CM_L3MAIN1_MMU_EDMA_CLKCTRL_OFFSET		0x0030
+#define DRA7XX_CM_L3MAIN1_MMU_EDMA_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0030)
+#define DRA7XX_CM_L3MAIN1_OCMC_RAM1_CLKCTRL_OFFSET		0x0050
+#define DRA7XX_CM_L3MAIN1_OCMC_RAM1_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0050)
+#define DRA7XX_CM_L3MAIN1_OCMC_RAM2_CLKCTRL_OFFSET		0x0058
+#define DRA7XX_CM_L3MAIN1_OCMC_RAM2_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0058)
+#define DRA7XX_CM_L3MAIN1_OCMC_RAM3_CLKCTRL_OFFSET		0x0060
+#define DRA7XX_CM_L3MAIN1_OCMC_RAM3_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0060)
+#define DRA7XX_CM_L3MAIN1_OCMC_ROM_CLKCTRL_OFFSET		0x0068
+#define DRA7XX_CM_L3MAIN1_OCMC_ROM_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0068)
+#define DRA7XX_CM_L3MAIN1_TPCC_CLKCTRL_OFFSET			0x0070
+#define DRA7XX_CM_L3MAIN1_TPCC_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0070)
+#define DRA7XX_CM_L3MAIN1_TPTC1_CLKCTRL_OFFSET			0x0078
+#define DRA7XX_CM_L3MAIN1_TPTC1_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0078)
+#define DRA7XX_CM_L3MAIN1_TPTC2_CLKCTRL_OFFSET			0x0080
+#define DRA7XX_CM_L3MAIN1_TPTC2_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0080)
+#define DRA7XX_CM_L3MAIN1_VCP1_CLKCTRL_OFFSET			0x0088
+#define DRA7XX_CM_L3MAIN1_VCP1_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0088)
+#define DRA7XX_CM_L3MAIN1_VCP2_CLKCTRL_OFFSET			0x0090
+#define DRA7XX_CM_L3MAIN1_VCP2_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0090)
+#define DRA7XX_CM_L3MAIN1_SPARE_CME_CLKCTRL_OFFSET		0x0098
+#define DRA7XX_CM_L3MAIN1_SPARE_CME_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0098)
+#define DRA7XX_CM_L3MAIN1_SPARE_HDMI_CLKCTRL_OFFSET		0x00a0
+#define DRA7XX_CM_L3MAIN1_SPARE_HDMI_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x00a0)
+#define DRA7XX_CM_L3MAIN1_SPARE_ICM_CLKCTRL_OFFSET		0x00a8
+#define DRA7XX_CM_L3MAIN1_SPARE_ICM_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x00a8)
+#define DRA7XX_CM_L3MAIN1_SPARE_IVA2_CLKCTRL_OFFSET		0x00b0
+#define DRA7XX_CM_L3MAIN1_SPARE_IVA2_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x00b0)
+#define DRA7XX_CM_L3MAIN1_SPARE_SATA2_CLKCTRL_OFFSET		0x00b8
+#define DRA7XX_CM_L3MAIN1_SPARE_SATA2_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x00b8)
+#define DRA7XX_CM_L3MAIN1_SPARE_UNKNOWN4_CLKCTRL_OFFSET		0x00c0
+#define DRA7XX_CM_L3MAIN1_SPARE_UNKNOWN4_CLKCTRL		DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x00c0)
+#define DRA7XX_CM_L3MAIN1_SPARE_UNKNOWN5_CLKCTRL_OFFSET		0x00c8
+#define DRA7XX_CM_L3MAIN1_SPARE_UNKNOWN5_CLKCTRL		DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x00c8)
+#define DRA7XX_CM_L3MAIN1_SPARE_UNKNOWN6_CLKCTRL_OFFSET		0x00d0
+#define DRA7XX_CM_L3MAIN1_SPARE_UNKNOWN6_CLKCTRL		DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x00d0)
+#define DRA7XX_CM_L3MAIN1_SPARE_VIDEOPLL1_CLKCTRL_OFFSET	0x00d8
+#define DRA7XX_CM_L3MAIN1_SPARE_VIDEOPLL1_CLKCTRL		DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x00d8)
+#define DRA7XX_CM_L3MAIN1_SPARE_VIDEOPLL2_CLKCTRL_OFFSET	0x00f0
+#define DRA7XX_CM_L3MAIN1_SPARE_VIDEOPLL2_CLKCTRL		DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x00f0)
+#define DRA7XX_CM_L3MAIN1_SPARE_VIDEOPLL3_CLKCTRL_OFFSET	0x00f8
+#define DRA7XX_CM_L3MAIN1_SPARE_VIDEOPLL3_CLKCTRL		DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x00f8)
+#define DRA7XX_CM_IPU2_CLKSTCTRL_OFFSET				0x0200
+#define DRA7XX_CM_IPU2_STATICDEP_OFFSET				0x0204
+#define DRA7XX_CM_IPU2_DYNAMICDEP_OFFSET			0x0208
+#define DRA7XX_CM_IPU2_IPU2_CLKCTRL_OFFSET			0x0220
+#define DRA7XX_CM_IPU2_IPU2_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0220)
+#define DRA7XX_CM_DMA_CLKSTCTRL_OFFSET				0x0300
+#define DRA7XX_CM_DMA_STATICDEP_OFFSET				0x0304
+#define DRA7XX_CM_DMA_DYNAMICDEP_OFFSET				0x0308
+#define DRA7XX_CM_DMA_DMA_SYSTEM_CLKCTRL_OFFSET			0x0320
+#define DRA7XX_CM_DMA_DMA_SYSTEM_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0320)
+#define DRA7XX_CM_EMIF_CLKSTCTRL_OFFSET				0x0400
+#define DRA7XX_CM_EMIF_DMM_CLKCTRL_OFFSET			0x0420
+#define DRA7XX_CM_EMIF_DMM_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0420)
+#define DRA7XX_CM_EMIF_EMIF_OCP_FW_CLKCTRL_OFFSET		0x0428
+#define DRA7XX_CM_EMIF_EMIF_OCP_FW_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0428)
+#define DRA7XX_CM_EMIF_EMIF1_CLKCTRL_OFFSET			0x0430
+#define DRA7XX_CM_EMIF_EMIF1_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0430)
+#define DRA7XX_CM_EMIF_EMIF2_CLKCTRL_OFFSET			0x0438
+#define DRA7XX_CM_EMIF_EMIF2_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0438)
+#define DRA7XX_CM_EMIF_EMIF_DLL_CLKCTRL_OFFSET			0x0440
+#define DRA7XX_CM_EMIF_EMIF_DLL_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0440)
+#define DRA7XX_CM_ATL_ATL_CLKCTRL_OFFSET			0x0500
+#define DRA7XX_CM_ATL_ATL_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0500)
+#define DRA7XX_CM_ATL_CLKSTCTRL_OFFSET				0x0520
+#define DRA7XX_CM_L4CFG_CLKSTCTRL_OFFSET			0x0600
+#define DRA7XX_CM_L4CFG_DYNAMICDEP_OFFSET			0x0608
+#define DRA7XX_CM_L4CFG_L4_CFG_CLKCTRL_OFFSET			0x0620
+#define DRA7XX_CM_L4CFG_L4_CFG_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0620)
+#define DRA7XX_CM_L4CFG_SPINLOCK_CLKCTRL_OFFSET			0x0628
+#define DRA7XX_CM_L4CFG_SPINLOCK_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0628)
+#define DRA7XX_CM_L4CFG_MAILBOX1_CLKCTRL_OFFSET			0x0630
+#define DRA7XX_CM_L4CFG_MAILBOX1_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0630)
+#define DRA7XX_CM_L4CFG_SAR_ROM_CLKCTRL_OFFSET			0x0638
+#define DRA7XX_CM_L4CFG_SAR_ROM_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0638)
+#define DRA7XX_CM_L4CFG_OCP2SCP2_CLKCTRL_OFFSET			0x0640
+#define DRA7XX_CM_L4CFG_OCP2SCP2_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0640)
+#define DRA7XX_CM_L4CFG_MAILBOX2_CLKCTRL_OFFSET			0x0648
+#define DRA7XX_CM_L4CFG_MAILBOX2_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0648)
+#define DRA7XX_CM_L4CFG_MAILBOX3_CLKCTRL_OFFSET			0x0650
+#define DRA7XX_CM_L4CFG_MAILBOX3_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0650)
+#define DRA7XX_CM_L4CFG_MAILBOX4_CLKCTRL_OFFSET			0x0658
+#define DRA7XX_CM_L4CFG_MAILBOX4_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0658)
+#define DRA7XX_CM_L4CFG_MAILBOX5_CLKCTRL_OFFSET			0x0660
+#define DRA7XX_CM_L4CFG_MAILBOX5_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0660)
+#define DRA7XX_CM_L4CFG_MAILBOX6_CLKCTRL_OFFSET			0x0668
+#define DRA7XX_CM_L4CFG_MAILBOX6_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0668)
+#define DRA7XX_CM_L4CFG_MAILBOX7_CLKCTRL_OFFSET			0x0670
+#define DRA7XX_CM_L4CFG_MAILBOX7_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0670)
+#define DRA7XX_CM_L4CFG_MAILBOX8_CLKCTRL_OFFSET			0x0678
+#define DRA7XX_CM_L4CFG_MAILBOX8_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0678)
+#define DRA7XX_CM_L4CFG_MAILBOX9_CLKCTRL_OFFSET			0x0680
+#define DRA7XX_CM_L4CFG_MAILBOX9_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0680)
+#define DRA7XX_CM_L4CFG_MAILBOX10_CLKCTRL_OFFSET		0x0688
+#define DRA7XX_CM_L4CFG_MAILBOX10_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0688)
+#define DRA7XX_CM_L4CFG_MAILBOX11_CLKCTRL_OFFSET		0x0690
+#define DRA7XX_CM_L4CFG_MAILBOX11_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0690)
+#define DRA7XX_CM_L4CFG_MAILBOX12_CLKCTRL_OFFSET		0x0698
+#define DRA7XX_CM_L4CFG_MAILBOX12_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0698)
+#define DRA7XX_CM_L4CFG_MAILBOX13_CLKCTRL_OFFSET		0x06a0
+#define DRA7XX_CM_L4CFG_MAILBOX13_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x06a0)
+#define DRA7XX_CM_L4CFG_SPARE_SMARTREFLEX_RTC_CLKCTRL_OFFSET	0x06a8
+#define DRA7XX_CM_L4CFG_SPARE_SMARTREFLEX_RTC_CLKCTRL		DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x06a8)
+#define DRA7XX_CM_L4CFG_SPARE_SMARTREFLEX_SDRAM_CLKCTRL_OFFSET	0x06b0
+#define DRA7XX_CM_L4CFG_SPARE_SMARTREFLEX_SDRAM_CLKCTRL		DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x06b0)
+#define DRA7XX_CM_L4CFG_SPARE_SMARTREFLEX_WKUP_CLKCTRL_OFFSET	0x06b8
+#define DRA7XX_CM_L4CFG_SPARE_SMARTREFLEX_WKUP_CLKCTRL		DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x06b8)
+#define DRA7XX_CM_L4CFG_IO_DELAY_BLOCK_CLKCTRL_OFFSET		0x06c0
+#define DRA7XX_CM_L4CFG_IO_DELAY_BLOCK_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x06c0)
+#define DRA7XX_CM_L3INSTR_CLKSTCTRL_OFFSET			0x0700
+#define DRA7XX_CM_L3INSTR_L3_MAIN_2_CLKCTRL_OFFSET		0x0720
+#define DRA7XX_CM_L3INSTR_L3_MAIN_2_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0720)
+#define DRA7XX_CM_L3INSTR_L3_INSTR_CLKCTRL_OFFSET		0x0728
+#define DRA7XX_CM_L3INSTR_L3_INSTR_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0728)
+#define DRA7XX_CM_L3INSTR_OCP_WP_NOC_CLKCTRL_OFFSET		0x0740
+#define DRA7XX_CM_L3INSTR_OCP_WP_NOC_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0740)
+#define DRA7XX_CM_L3INSTR_DLL_AGING_CLKCTRL_OFFSET		0x0748
+#define DRA7XX_CM_L3INSTR_DLL_AGING_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0748)
+#define DRA7XX_CM_L3INSTR_CTRL_MODULE_BANDGAP_CLKCTRL_OFFSET	0x0750
+#define DRA7XX_CM_L3INSTR_CTRL_MODULE_BANDGAP_CLKCTRL		DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CORE_INST, 0x0750)
+
+/* CM_CORE.IVA_CM_CORE register offsets */
+#define DRA7XX_CM_IVA_CLKSTCTRL_OFFSET				0x0000
+#define DRA7XX_CM_IVA_STATICDEP_OFFSET				0x0004
+#define DRA7XX_CM_IVA_DYNAMICDEP_OFFSET				0x0008
+#define DRA7XX_CM_IVA_IVA_CLKCTRL_OFFSET			0x0020
+#define DRA7XX_CM_IVA_IVA_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_IVA_INST, 0x0020)
+#define DRA7XX_CM_IVA_SL2_CLKCTRL_OFFSET			0x0028
+#define DRA7XX_CM_IVA_SL2_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_IVA_INST, 0x0028)
+
+/* CM_CORE.CAM_CM_CORE register offsets */
+#define DRA7XX_CM_CAM_CLKSTCTRL_OFFSET				0x0000
+#define DRA7XX_CM_CAM_STATICDEP_OFFSET				0x0004
+#define DRA7XX_CM_CAM_VIP1_CLKCTRL_OFFSET			0x0020
+#define DRA7XX_CM_CAM_VIP1_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CAM_INST, 0x0020)
+#define DRA7XX_CM_CAM_VIP2_CLKCTRL_OFFSET			0x0028
+#define DRA7XX_CM_CAM_VIP2_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CAM_INST, 0x0028)
+#define DRA7XX_CM_CAM_VIP3_CLKCTRL_OFFSET			0x0030
+#define DRA7XX_CM_CAM_VIP3_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CAM_INST, 0x0030)
+#define DRA7XX_CM_CAM_LVDSRX_CLKCTRL_OFFSET			0x0038
+#define DRA7XX_CM_CAM_LVDSRX_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CAM_INST, 0x0038)
+#define DRA7XX_CM_CAM_CSI1_CLKCTRL_OFFSET			0x0040
+#define DRA7XX_CM_CAM_CSI1_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CAM_INST, 0x0040)
+#define DRA7XX_CM_CAM_CSI2_CLKCTRL_OFFSET			0x0048
+#define DRA7XX_CM_CAM_CSI2_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CAM_INST, 0x0048)
+
+/* CM_CORE.DSS_CM_CORE register offsets */
+#define DRA7XX_CM_DSS_CLKSTCTRL_OFFSET				0x0000
+#define DRA7XX_CM_DSS_STATICDEP_OFFSET				0x0004
+#define DRA7XX_CM_DSS_DYNAMICDEP_OFFSET				0x0008
+#define DRA7XX_CM_DSS_DSS_CLKCTRL_OFFSET			0x0020
+#define DRA7XX_CM_DSS_DSS_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_DSS_INST, 0x0020)
+#define DRA7XX_CM_DSS_BB2D_CLKCTRL_OFFSET			0x0030
+#define DRA7XX_CM_DSS_BB2D_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_DSS_INST, 0x0030)
+#define DRA7XX_CM_DSS_SDVENC_CLKCTRL_OFFSET			0x003c
+#define DRA7XX_CM_DSS_SDVENC_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_DSS_INST, 0x003c)
+
+/* CM_CORE.GPU_CM_CORE register offsets */
+#define DRA7XX_CM_GPU_CLKSTCTRL_OFFSET				0x0000
+#define DRA7XX_CM_GPU_STATICDEP_OFFSET				0x0004
+#define DRA7XX_CM_GPU_DYNAMICDEP_OFFSET				0x0008
+#define DRA7XX_CM_GPU_GPU_CLKCTRL_OFFSET			0x0020
+#define DRA7XX_CM_GPU_GPU_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_GPU_INST, 0x0020)
+
+/* CM_CORE.L3INIT_CM_CORE register offsets */
+#define DRA7XX_CM_L3INIT_CLKSTCTRL_OFFSET			0x0000
+#define DRA7XX_CM_L3INIT_STATICDEP_OFFSET			0x0004
+#define DRA7XX_CM_L3INIT_DYNAMICDEP_OFFSET			0x0008
+#define DRA7XX_CM_L3INIT_MMC1_CLKCTRL_OFFSET			0x0028
+#define DRA7XX_CM_L3INIT_MMC1_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L3INIT_INST, 0x0028)
+#define DRA7XX_CM_L3INIT_MMC2_CLKCTRL_OFFSET			0x0030
+#define DRA7XX_CM_L3INIT_MMC2_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L3INIT_INST, 0x0030)
+#define DRA7XX_CM_L3INIT_USB_OTG_SS2_CLKCTRL_OFFSET		0x0040
+#define DRA7XX_CM_L3INIT_USB_OTG_SS2_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L3INIT_INST, 0x0040)
+#define DRA7XX_CM_L3INIT_USB_OTG_SS3_CLKCTRL_OFFSET		0x0048
+#define DRA7XX_CM_L3INIT_USB_OTG_SS3_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L3INIT_INST, 0x0048)
+#define DRA7XX_CM_L3INIT_USB_OTG_SS4_CLKCTRL_OFFSET		0x0050
+#define DRA7XX_CM_L3INIT_USB_OTG_SS4_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L3INIT_INST, 0x0050)
+#define DRA7XX_CM_L3INIT_MLB_SS_CLKCTRL_OFFSET			0x0058
+#define DRA7XX_CM_L3INIT_MLB_SS_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L3INIT_INST, 0x0058)
+#define DRA7XX_CM_L3INIT_IEEE1500_2_OCP_CLKCTRL_OFFSET		0x0078
+#define DRA7XX_CM_L3INIT_IEEE1500_2_OCP_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L3INIT_INST, 0x0078)
+#define DRA7XX_CM_L3INIT_SATA_CLKCTRL_OFFSET			0x0088
+#define DRA7XX_CM_L3INIT_SATA_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L3INIT_INST, 0x0088)
+#define DRA7XX_CM_PCIE_CLKSTCTRL_OFFSET				0x00a0
+#define DRA7XX_CM_PCIE_STATICDEP_OFFSET				0x00a4
+#define DRA7XX_CM_GMAC_CLKSTCTRL_OFFSET				0x00c0
+#define DRA7XX_CM_GMAC_STATICDEP_OFFSET				0x00c4
+#define DRA7XX_CM_GMAC_DYNAMICDEP_OFFSET			0x00c8
+#define DRA7XX_CM_GMAC_GMAC_CLKCTRL_OFFSET			0x00d0
+#define DRA7XX_CM_GMAC_GMAC_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L3INIT_INST, 0x00d0)
+#define DRA7XX_CM_L3INIT_OCP2SCP1_CLKCTRL_OFFSET		0x00e0
+#define DRA7XX_CM_L3INIT_OCP2SCP1_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L3INIT_INST, 0x00e0)
+#define DRA7XX_CM_L3INIT_OCP2SCP3_CLKCTRL_OFFSET		0x00e8
+#define DRA7XX_CM_L3INIT_OCP2SCP3_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L3INIT_INST, 0x00e8)
+#define DRA7XX_CM_L3INIT_USB_OTG_SS1_CLKCTRL_OFFSET		0x00f0
+#define DRA7XX_CM_L3INIT_USB_OTG_SS1_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L3INIT_INST, 0x00f0)
+
+/* CM_CORE.CUSTEFUSE_CM_CORE register offsets */
+#define DRA7XX_CM_CUSTEFUSE_CLKSTCTRL_OFFSET			0x0000
+#define DRA7XX_CM_CUSTEFUSE_EFUSE_CTRL_CUST_CLKCTRL_OFFSET	0x0020
+#define DRA7XX_CM_CUSTEFUSE_EFUSE_CTRL_CUST_CLKCTRL		DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_CUSTEFUSE_INST, 0x0020)
+
+/* CM_CORE.L4PER_CM_CORE register offsets */
+#define DRA7XX_CM_L4PER_CLKSTCTRL_OFFSET			0x0000
+#define DRA7XX_CM_L4PER_DYNAMICDEP_OFFSET			0x0008
+#define DRA7XX_CM_L4PER2_L4_PER2_CLKCTRL_OFFSET			0x000c
+#define DRA7XX_CM_L4PER2_L4_PER2_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x000c)
+#define DRA7XX_CM_L4PER3_L4_PER3_CLKCTRL_OFFSET			0x0014
+#define DRA7XX_CM_L4PER3_L4_PER3_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0014)
+#define DRA7XX_CM_L4PER2_PRUSS1_CLKCTRL_OFFSET			0x0018
+#define DRA7XX_CM_L4PER2_PRUSS1_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0018)
+#define DRA7XX_CM_L4PER2_PRUSS2_CLKCTRL_OFFSET			0x0020
+#define DRA7XX_CM_L4PER2_PRUSS2_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0020)
+#define DRA7XX_CM_L4PER_TIMER10_CLKCTRL_OFFSET			0x0028
+#define DRA7XX_CM_L4PER_TIMER10_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0028)
+#define DRA7XX_CM_L4PER_TIMER11_CLKCTRL_OFFSET			0x0030
+#define DRA7XX_CM_L4PER_TIMER11_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0030)
+#define DRA7XX_CM_L4PER_TIMER2_CLKCTRL_OFFSET			0x0038
+#define DRA7XX_CM_L4PER_TIMER2_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0038)
+#define DRA7XX_CM_L4PER_TIMER3_CLKCTRL_OFFSET			0x0040
+#define DRA7XX_CM_L4PER_TIMER3_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0040)
+#define DRA7XX_CM_L4PER_TIMER4_CLKCTRL_OFFSET			0x0048
+#define DRA7XX_CM_L4PER_TIMER4_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0048)
+#define DRA7XX_CM_L4PER_TIMER9_CLKCTRL_OFFSET			0x0050
+#define DRA7XX_CM_L4PER_TIMER9_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0050)
+#define DRA7XX_CM_L4PER_ELM_CLKCTRL_OFFSET			0x0058
+#define DRA7XX_CM_L4PER_ELM_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0058)
+#define DRA7XX_CM_L4PER_GPIO2_CLKCTRL_OFFSET			0x0060
+#define DRA7XX_CM_L4PER_GPIO2_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0060)
+#define DRA7XX_CM_L4PER_GPIO3_CLKCTRL_OFFSET			0x0068
+#define DRA7XX_CM_L4PER_GPIO3_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0068)
+#define DRA7XX_CM_L4PER_GPIO4_CLKCTRL_OFFSET			0x0070
+#define DRA7XX_CM_L4PER_GPIO4_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0070)
+#define DRA7XX_CM_L4PER_GPIO5_CLKCTRL_OFFSET			0x0078
+#define DRA7XX_CM_L4PER_GPIO5_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0078)
+#define DRA7XX_CM_L4PER_GPIO6_CLKCTRL_OFFSET			0x0080
+#define DRA7XX_CM_L4PER_GPIO6_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0080)
+#define DRA7XX_CM_L4PER_HDQ1W_CLKCTRL_OFFSET			0x0088
+#define DRA7XX_CM_L4PER_HDQ1W_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0088)
+#define DRA7XX_CM_L4PER2_PWMSS2_CLKCTRL_OFFSET			0x0090
+#define DRA7XX_CM_L4PER2_PWMSS2_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0090)
+#define DRA7XX_CM_L4PER2_PWMSS3_CLKCTRL_OFFSET			0x0098
+#define DRA7XX_CM_L4PER2_PWMSS3_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0098)
+#define DRA7XX_CM_L4PER_I2C1_CLKCTRL_OFFSET			0x00a0
+#define DRA7XX_CM_L4PER_I2C1_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x00a0)
+#define DRA7XX_CM_L4PER_I2C2_CLKCTRL_OFFSET			0x00a8
+#define DRA7XX_CM_L4PER_I2C2_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x00a8)
+#define DRA7XX_CM_L4PER_I2C3_CLKCTRL_OFFSET			0x00b0
+#define DRA7XX_CM_L4PER_I2C3_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x00b0)
+#define DRA7XX_CM_L4PER_I2C4_CLKCTRL_OFFSET			0x00b8
+#define DRA7XX_CM_L4PER_I2C4_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x00b8)
+#define DRA7XX_CM_L4PER_L4_PER1_CLKCTRL_OFFSET			0x00c0
+#define DRA7XX_CM_L4PER_L4_PER1_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x00c0)
+#define DRA7XX_CM_L4PER2_PWMSS1_CLKCTRL_OFFSET			0x00c4
+#define DRA7XX_CM_L4PER2_PWMSS1_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x00c4)
+#define DRA7XX_CM_L4PER3_TIMER13_CLKCTRL_OFFSET			0x00c8
+#define DRA7XX_CM_L4PER3_TIMER13_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x00c8)
+#define DRA7XX_CM_L4PER3_TIMER14_CLKCTRL_OFFSET			0x00d0
+#define DRA7XX_CM_L4PER3_TIMER14_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x00d0)
+#define DRA7XX_CM_L4PER3_TIMER15_CLKCTRL_OFFSET			0x00d8
+#define DRA7XX_CM_L4PER3_TIMER15_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x00d8)
+#define DRA7XX_CM_L4PER_MCSPI1_CLKCTRL_OFFSET			0x00f0
+#define DRA7XX_CM_L4PER_MCSPI1_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x00f0)
+#define DRA7XX_CM_L4PER_MCSPI2_CLKCTRL_OFFSET			0x00f8
+#define DRA7XX_CM_L4PER_MCSPI2_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x00f8)
+#define DRA7XX_CM_L4PER_MCSPI3_CLKCTRL_OFFSET			0x0100
+#define DRA7XX_CM_L4PER_MCSPI3_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0100)
+#define DRA7XX_CM_L4PER_MCSPI4_CLKCTRL_OFFSET			0x0108
+#define DRA7XX_CM_L4PER_MCSPI4_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0108)
+#define DRA7XX_CM_L4PER_GPIO7_CLKCTRL_OFFSET			0x0110
+#define DRA7XX_CM_L4PER_GPIO7_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0110)
+#define DRA7XX_CM_L4PER_GPIO8_CLKCTRL_OFFSET			0x0118
+#define DRA7XX_CM_L4PER_GPIO8_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0118)
+#define DRA7XX_CM_L4PER_MMC3_CLKCTRL_OFFSET			0x0120
+#define DRA7XX_CM_L4PER_MMC3_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0120)
+#define DRA7XX_CM_L4PER_MMC4_CLKCTRL_OFFSET			0x0128
+#define DRA7XX_CM_L4PER_MMC4_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0128)
+#define DRA7XX_CM_L4PER3_TIMER16_CLKCTRL_OFFSET			0x0130
+#define DRA7XX_CM_L4PER3_TIMER16_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0130)
+#define DRA7XX_CM_L4PER2_QSPI_CLKCTRL_OFFSET			0x0138
+#define DRA7XX_CM_L4PER2_QSPI_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0138)
+#define DRA7XX_CM_L4PER_UART1_CLKCTRL_OFFSET			0x0140
+#define DRA7XX_CM_L4PER_UART1_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0140)
+#define DRA7XX_CM_L4PER_UART2_CLKCTRL_OFFSET			0x0148
+#define DRA7XX_CM_L4PER_UART2_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0148)
+#define DRA7XX_CM_L4PER_UART3_CLKCTRL_OFFSET			0x0150
+#define DRA7XX_CM_L4PER_UART3_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0150)
+#define DRA7XX_CM_L4PER_UART4_CLKCTRL_OFFSET			0x0158
+#define DRA7XX_CM_L4PER_UART4_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0158)
+#define DRA7XX_CM_L4PER2_MCASP2_CLKCTRL_OFFSET			0x0160
+#define DRA7XX_CM_L4PER2_MCASP2_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0160)
+#define DRA7XX_CM_L4PER2_MCASP3_CLKCTRL_OFFSET			0x0168
+#define DRA7XX_CM_L4PER2_MCASP3_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0168)
+#define DRA7XX_CM_L4PER_UART5_CLKCTRL_OFFSET			0x0170
+#define DRA7XX_CM_L4PER_UART5_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0170)
+#define DRA7XX_CM_L4PER2_MCASP5_CLKCTRL_OFFSET			0x0178
+#define DRA7XX_CM_L4PER2_MCASP5_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0178)
+#define DRA7XX_CM_L4SEC_CLKSTCTRL_OFFSET			0x0180
+#define DRA7XX_CM_L4SEC_STATICDEP_OFFSET			0x0184
+#define DRA7XX_CM_L4SEC_DYNAMICDEP_OFFSET			0x0188
+#define DRA7XX_CM_L4PER2_MCASP8_CLKCTRL_OFFSET			0x0190
+#define DRA7XX_CM_L4PER2_MCASP8_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0190)
+#define DRA7XX_CM_L4PER2_MCASP4_CLKCTRL_OFFSET			0x0198
+#define DRA7XX_CM_L4PER2_MCASP4_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0198)
+#define DRA7XX_CM_L4SEC_AES1_CLKCTRL_OFFSET			0x01a0
+#define DRA7XX_CM_L4SEC_AES1_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x01a0)
+#define DRA7XX_CM_L4SEC_AES2_CLKCTRL_OFFSET			0x01a8
+#define DRA7XX_CM_L4SEC_AES2_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x01a8)
+#define DRA7XX_CM_L4SEC_DES3DES_CLKCTRL_OFFSET			0x01b0
+#define DRA7XX_CM_L4SEC_DES3DES_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x01b0)
+#define DRA7XX_CM_L4SEC_FPKA_CLKCTRL_OFFSET			0x01b8
+#define DRA7XX_CM_L4SEC_FPKA_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x01b8)
+#define DRA7XX_CM_L4SEC_RNG_CLKCTRL_OFFSET			0x01c0
+#define DRA7XX_CM_L4SEC_RNG_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x01c0)
+#define DRA7XX_CM_L4SEC_SHA2MD51_CLKCTRL_OFFSET			0x01c8
+#define DRA7XX_CM_L4SEC_SHA2MD51_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x01c8)
+#define DRA7XX_CM_L4PER2_UART7_CLKCTRL_OFFSET			0x01d0
+#define DRA7XX_CM_L4PER2_UART7_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x01d0)
+#define DRA7XX_CM_L4SEC_DMA_CRYPTO_CLKCTRL_OFFSET		0x01d8
+#define DRA7XX_CM_L4SEC_DMA_CRYPTO_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x01d8)
+#define DRA7XX_CM_L4PER2_UART8_CLKCTRL_OFFSET			0x01e0
+#define DRA7XX_CM_L4PER2_UART8_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x01e0)
+#define DRA7XX_CM_L4PER2_UART9_CLKCTRL_OFFSET			0x01e8
+#define DRA7XX_CM_L4PER2_UART9_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x01e8)
+#define DRA7XX_CM_L4PER2_DCAN2_CLKCTRL_OFFSET			0x01f0
+#define DRA7XX_CM_L4PER2_DCAN2_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x01f0)
+#define DRA7XX_CM_L4SEC_SHA2MD52_CLKCTRL_OFFSET			0x01f8
+#define DRA7XX_CM_L4SEC_SHA2MD52_CLKCTRL			DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x01f8)
+#define DRA7XX_CM_L4PER2_CLKSTCTRL_OFFSET			0x01fc
+#define DRA7XX_CM_L4PER2_DYNAMICDEP_OFFSET			0x0200
+#define DRA7XX_CM_L4PER2_MCASP6_CLKCTRL_OFFSET			0x0204
+#define DRA7XX_CM_L4PER2_MCASP6_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0204)
+#define DRA7XX_CM_L4PER2_MCASP7_CLKCTRL_OFFSET			0x0208
+#define DRA7XX_CM_L4PER2_MCASP7_CLKCTRL				DRA7XX_CM_CORE_REGADDR(DRA7XX_CM_CORE_L4PER_INST, 0x0208)
+#define DRA7XX_CM_L4PER2_STATICDEP_OFFSET			0x020c
+#define DRA7XX_CM_L4PER3_CLKSTCTRL_OFFSET			0x0210
+#define DRA7XX_CM_L4PER3_DYNAMICDEP_OFFSET			0x0214
+
+#endif
diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c
index 3656b80..ff2113c 100644
--- a/arch/arm/mach-omap2/io.c
+++ b/arch/arm/mach-omap2/io.c
@@ -665,6 +665,11 @@
 	omap2_set_globals_prcm_mpu(OMAP2_L4_IO_ADDRESS(OMAP54XX_PRCM_MPU_BASE));
 	omap_prm_base_init();
 	omap_cm_base_init();
+	omap44xx_prm_init();
+	dra7xx_powerdomains_init();
+	dra7xx_clockdomains_init();
+	dra7xx_hwmod_init();
+	omap_hwmod_init_postsetup();
 }
 #endif
 
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index b4ecd2c..d9ee0ff 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -1405,7 +1405,9 @@
 	    (sf & SYSC_HAS_CLOCKACTIVITY))
 		_set_clockactivity(oh, oh->class->sysc->clockact, &v);
 
-	_write_sysconfig(v, oh);
+	/* If the cached value is the same as the new value, skip the write */
+	if (oh->_sysc_cache != v)
+		_write_sysconfig(v, oh);
 
 	/*
 	 * Set the autoidle bit only after setting the smartidle bit
diff --git a/arch/arm/mach-omap2/omap_hwmod.h b/arch/arm/mach-omap2/omap_hwmod.h
index e1482a9..d02acf9 100644
--- a/arch/arm/mach-omap2/omap_hwmod.h
+++ b/arch/arm/mach-omap2/omap_hwmod.h
@@ -751,6 +751,7 @@
 extern int omap44xx_hwmod_init(void);
 extern int omap54xx_hwmod_init(void);
 extern int am33xx_hwmod_init(void);
+extern int dra7xx_hwmod_init(void);
 
 extern int __init omap_hwmod_register_links(struct omap_hwmod_ocp_if **ois);
 
diff --git a/arch/arm/mach-omap2/omap_hwmod_33xx_data.c b/arch/arm/mach-omap2/omap_hwmod_33xx_data.c
index eb2f3b9..215894f 100644
--- a/arch/arm/mach-omap2/omap_hwmod_33xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_33xx_data.c
@@ -325,7 +325,6 @@
  *
  *    - cEFUSE (doesn't fall under any ocp_if)
  *    - clkdiv32k
- *    - debugss
  *    - ocp watch point
  */
 #if 0
@@ -369,27 +368,6 @@
 	},
 };
 
-/*
- * 'debugss' class
- * debug sub system
- */
-static struct omap_hwmod_class am33xx_debugss_hwmod_class = {
-	.name		= "debugss",
-};
-
-static struct omap_hwmod am33xx_debugss_hwmod = {
-	.name		= "debugss",
-	.class		= &am33xx_debugss_hwmod_class,
-	.clkdm_name	= "l3_aon_clkdm",
-	.main_clk	= "debugss_ick",
-	.prcm		= {
-		.omap4	= {
-			.clkctrl_offs	= AM33XX_CM_WKUP_DEBUGSS_CLKCTRL_OFFSET,
-			.modulemode	= MODULEMODE_SWCTRL,
-		},
-	},
-};
-
 /* ocpwp */
 static struct omap_hwmod_class am33xx_ocpwp_hwmod_class = {
 	.name		= "ocpwp",
@@ -482,6 +460,34 @@
 	},
 };
 
+/*
+ * 'debugss' class
+ * debug sub system
+ */
+static struct omap_hwmod_opt_clk debugss_opt_clks[] = {
+	{ .role = "dbg_sysclk", .clk = "dbg_sysclk_ck" },
+	{ .role = "dbg_clka", .clk = "dbg_clka_ck" },
+};
+
+static struct omap_hwmod_class am33xx_debugss_hwmod_class = {
+	.name		= "debugss",
+};
+
+static struct omap_hwmod am33xx_debugss_hwmod = {
+	.name		= "debugss",
+	.class		= &am33xx_debugss_hwmod_class,
+	.clkdm_name	= "l3_aon_clkdm",
+	.main_clk	= "trace_clk_div_ck",
+	.prcm		= {
+		.omap4	= {
+			.clkctrl_offs	= AM33XX_CM_WKUP_DEBUGSS_CLKCTRL_OFFSET,
+			.modulemode	= MODULEMODE_SWCTRL,
+		},
+	},
+	.opt_clks	= debugss_opt_clks,
+	.opt_clks_cnt	= ARRAY_SIZE(debugss_opt_clks),
+};
+
 /* 'smartreflex' class */
 static struct omap_hwmod_class am33xx_smartreflex_hwmod_class = {
 	.name		= "smartreflex",
@@ -1796,6 +1802,24 @@
 	.user		= OCP_USER_MPU | OCP_USER_SDMA,
 };
 
+/* l3_main -> debugss */
+static struct omap_hwmod_addr_space am33xx_debugss_addrs[] = {
+	{
+		.pa_start	= 0x4b000000,
+		.pa_end		= 0x4b000000 + SZ_16M - 1,
+		.flags		= ADDR_TYPE_RT
+	},
+	{ }
+};
+
+static struct omap_hwmod_ocp_if am33xx_l3_main__debugss = {
+	.master		= &am33xx_l3_main_hwmod,
+	.slave		= &am33xx_debugss_hwmod,
+	.clk		= "dpll_core_m4_ck",
+	.addr		= am33xx_debugss_addrs,
+	.user		= OCP_USER_MPU,
+};
+
 /* l4 wkup -> smartreflex0 */
 static struct omap_hwmod_ocp_if am33xx_l4_wkup__smartreflex0 = {
 	.master		= &am33xx_l4_wkup_hwmod,
@@ -2470,6 +2494,7 @@
 	&am33xx_pruss__l3_main,
 	&am33xx_wkup_m3__l4_wkup,
 	&am33xx_gfx__l3_main,
+	&am33xx_l3_main__debugss,
 	&am33xx_l4_wkup__wkup_m3,
 	&am33xx_l4_wkup__control,
 	&am33xx_l4_wkup__smartreflex0,
diff --git a/arch/arm/mach-omap2/omap_hwmod_54xx_data.c b/arch/arm/mach-omap2/omap_hwmod_54xx_data.c
index b4d0474..cde4155 100644
--- a/arch/arm/mach-omap2/omap_hwmod_54xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_54xx_data.c
@@ -740,6 +740,39 @@
 };
 
 /*
+ * 'mailbox' class
+ * mailbox module allowing communication between the on-chip processors using a
+ * queued mailbox-interrupt mechanism.
+ */
+
+static struct omap_hwmod_class_sysconfig omap54xx_mailbox_sysc = {
+	.rev_offs	= 0x0000,
+	.sysc_offs	= 0x0010,
+	.sysc_flags	= (SYSC_HAS_RESET_STATUS | SYSC_HAS_SIDLEMODE |
+			   SYSC_HAS_SOFTRESET),
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART),
+	.sysc_fields	= &omap_hwmod_sysc_type2,
+};
+
+static struct omap_hwmod_class omap54xx_mailbox_hwmod_class = {
+	.name	= "mailbox",
+	.sysc	= &omap54xx_mailbox_sysc,
+};
+
+/* mailbox */
+static struct omap_hwmod omap54xx_mailbox_hwmod = {
+	.name		= "mailbox",
+	.class		= &omap54xx_mailbox_hwmod_class,
+	.clkdm_name	= "l4cfg_clkdm",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = OMAP54XX_CM_L4CFG_MAILBOX_CLKCTRL_OFFSET,
+			.context_offs = OMAP54XX_RM_L4CFG_MAILBOX_CONTEXT_OFFSET,
+		},
+	},
+};
+
+/*
  * 'mcbsp' class
  * multi channel buffered serial port controller
  */
@@ -1807,6 +1840,14 @@
 	.user		= OCP_USER_MPU | OCP_USER_SDMA,
 };
 
+/* l4_cfg -> mailbox */
+static struct omap_hwmod_ocp_if omap54xx_l4_cfg__mailbox = {
+	.master		= &omap54xx_l4_cfg_hwmod,
+	.slave		= &omap54xx_mailbox_hwmod,
+	.clk		= "l4_root_clk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
 /* l4_abe -> mcbsp1 */
 static struct omap_hwmod_ocp_if omap54xx_l4_abe__mcbsp1 = {
 	.master		= &omap54xx_l4_abe_hwmod,
@@ -2107,6 +2148,7 @@
 	&omap54xx_l4_per__i2c4,
 	&omap54xx_l4_per__i2c5,
 	&omap54xx_l4_wkup__kbd,
+	&omap54xx_l4_cfg__mailbox,
 	&omap54xx_l4_abe__mcbsp1,
 	&omap54xx_l4_abe__mcbsp2,
 	&omap54xx_l4_abe__mcbsp3,
diff --git a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
new file mode 100644
index 0000000..db32d53
--- /dev/null
+++ b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
@@ -0,0 +1,2724 @@
+/*
+ * Hardware modules present on the DRA7xx chips
+ *
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com
+ *
+ * Paul Walmsley
+ * Benoit Cousson
+ *
+ * This file is automatically generated from the OMAP hardware databases.
+ * We respectfully ask that any modifications to this file be coordinated
+ * with the public linux-omap@vger.kernel.org mailing list and the
+ * authors above to ensure that the autogeneration scripts are kept
+ * up-to-date with the file contents.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/io.h>
+#include <linux/platform_data/gpio-omap.h>
+#include <linux/power/smartreflex.h>
+#include <linux/i2c-omap.h>
+
+#include <linux/omap-dma.h>
+#include <linux/platform_data/spi-omap2-mcspi.h>
+#include <linux/platform_data/asoc-ti-mcbsp.h>
+#include <plat/dmtimer.h>
+
+#include "omap_hwmod.h"
+#include "omap_hwmod_common_data.h"
+#include "cm1_7xx.h"
+#include "cm2_7xx.h"
+#include "prm7xx.h"
+#include "i2c.h"
+#include "mmc.h"
+#include "wd_timer.h"
+
+/* Base offset for all DRA7XX interrupts external to MPUSS */
+#define DRA7XX_IRQ_GIC_START	32
+
+/* Base offset for all DRA7XX dma requests */
+#define DRA7XX_DMA_REQ_START	1
+
+
+/*
+ * IP blocks
+ */
+
+/*
+ * 'l3' class
+ * instance(s): l3_instr, l3_main_1, l3_main_2
+ */
+static struct omap_hwmod_class dra7xx_l3_hwmod_class = {
+	.name	= "l3",
+};
+
+/* l3_instr */
+static struct omap_hwmod dra7xx_l3_instr_hwmod = {
+	.name		= "l3_instr",
+	.class		= &dra7xx_l3_hwmod_class,
+	.clkdm_name	= "l3instr_clkdm",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L3INSTR_L3_INSTR_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L3INSTR_L3_INSTR_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_HWCTRL,
+		},
+	},
+};
+
+/* l3_main_1 */
+static struct omap_hwmod dra7xx_l3_main_1_hwmod = {
+	.name		= "l3_main_1",
+	.class		= &dra7xx_l3_hwmod_class,
+	.clkdm_name	= "l3main1_clkdm",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L3MAIN1_L3_MAIN_1_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L3MAIN1_L3_MAIN_1_CONTEXT_OFFSET,
+		},
+	},
+};
+
+/* l3_main_2 */
+static struct omap_hwmod dra7xx_l3_main_2_hwmod = {
+	.name		= "l3_main_2",
+	.class		= &dra7xx_l3_hwmod_class,
+	.clkdm_name	= "l3instr_clkdm",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L3INSTR_L3_MAIN_2_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L3INSTR_L3_MAIN_2_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_HWCTRL,
+		},
+	},
+};
+
+/*
+ * 'l4' class
+ * instance(s): l4_cfg, l4_per1, l4_per2, l4_per3, l4_wkup
+ */
+static struct omap_hwmod_class dra7xx_l4_hwmod_class = {
+	.name	= "l4",
+};
+
+/* l4_cfg */
+static struct omap_hwmod dra7xx_l4_cfg_hwmod = {
+	.name		= "l4_cfg",
+	.class		= &dra7xx_l4_hwmod_class,
+	.clkdm_name	= "l4cfg_clkdm",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4CFG_L4_CFG_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4CFG_L4_CFG_CONTEXT_OFFSET,
+		},
+	},
+};
+
+/* l4_per1 */
+static struct omap_hwmod dra7xx_l4_per1_hwmod = {
+	.name		= "l4_per1",
+	.class		= &dra7xx_l4_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_L4_PER1_CLKCTRL_OFFSET,
+			.flags = HWMOD_OMAP4_NO_CONTEXT_LOSS_BIT,
+		},
+	},
+};
+
+/* l4_per2 */
+static struct omap_hwmod dra7xx_l4_per2_hwmod = {
+	.name		= "l4_per2",
+	.class		= &dra7xx_l4_hwmod_class,
+	.clkdm_name	= "l4per2_clkdm",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER2_L4_PER2_CLKCTRL_OFFSET,
+			.flags = HWMOD_OMAP4_NO_CONTEXT_LOSS_BIT,
+		},
+	},
+};
+
+/* l4_per3 */
+static struct omap_hwmod dra7xx_l4_per3_hwmod = {
+	.name		= "l4_per3",
+	.class		= &dra7xx_l4_hwmod_class,
+	.clkdm_name	= "l4per3_clkdm",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER3_L4_PER3_CLKCTRL_OFFSET,
+			.flags = HWMOD_OMAP4_NO_CONTEXT_LOSS_BIT,
+		},
+	},
+};
+
+/* l4_wkup */
+static struct omap_hwmod dra7xx_l4_wkup_hwmod = {
+	.name		= "l4_wkup",
+	.class		= &dra7xx_l4_hwmod_class,
+	.clkdm_name	= "wkupaon_clkdm",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_WKUPAON_L4_WKUP_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_WKUPAON_L4_WKUP_CONTEXT_OFFSET,
+		},
+	},
+};
+
+/*
+ * 'atl' class
+ *
+ */
+
+static struct omap_hwmod_class dra7xx_atl_hwmod_class = {
+	.name	= "atl",
+};
+
+/* atl */
+static struct omap_hwmod dra7xx_atl_hwmod = {
+	.name		= "atl",
+	.class		= &dra7xx_atl_hwmod_class,
+	.clkdm_name	= "atl_clkdm",
+	.main_clk	= "atl_gfclk_mux",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_ATL_ATL_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_ATL_ATL_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+/*
+ * 'bb2d' class
+ *
+ */
+
+static struct omap_hwmod_class dra7xx_bb2d_hwmod_class = {
+	.name	= "bb2d",
+};
+
+/* bb2d */
+static struct omap_hwmod dra7xx_bb2d_hwmod = {
+	.name		= "bb2d",
+	.class		= &dra7xx_bb2d_hwmod_class,
+	.clkdm_name	= "dss_clkdm",
+	.main_clk	= "dpll_core_h24x2_ck",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_DSS_BB2D_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_DSS_BB2D_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+/*
+ * 'counter' class
+ *
+ */
+
+static struct omap_hwmod_class_sysconfig dra7xx_counter_sysc = {
+	.rev_offs	= 0x0000,
+	.sysc_offs	= 0x0010,
+	.sysc_flags	= SYSC_HAS_SIDLEMODE,
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+			   SIDLE_SMART_WKUP),
+	.sysc_fields	= &omap_hwmod_sysc_type1,
+};
+
+static struct omap_hwmod_class dra7xx_counter_hwmod_class = {
+	.name	= "counter",
+	.sysc	= &dra7xx_counter_sysc,
+};
+
+/* counter_32k */
+static struct omap_hwmod dra7xx_counter_32k_hwmod = {
+	.name		= "counter_32k",
+	.class		= &dra7xx_counter_hwmod_class,
+	.clkdm_name	= "wkupaon_clkdm",
+	.flags		= HWMOD_SWSUP_SIDLE,
+	.main_clk	= "wkupaon_iclk_mux",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_WKUPAON_COUNTER_32K_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_WKUPAON_COUNTER_32K_CONTEXT_OFFSET,
+		},
+	},
+};
+
+/*
+ * 'ctrl_module' class
+ *
+ */
+
+static struct omap_hwmod_class dra7xx_ctrl_module_hwmod_class = {
+	.name	= "ctrl_module",
+};
+
+/* ctrl_module_wkup */
+static struct omap_hwmod dra7xx_ctrl_module_wkup_hwmod = {
+	.name		= "ctrl_module_wkup",
+	.class		= &dra7xx_ctrl_module_hwmod_class,
+	.clkdm_name	= "wkupaon_clkdm",
+	.prcm = {
+		.omap4 = {
+			.flags = HWMOD_OMAP4_NO_CONTEXT_LOSS_BIT,
+		},
+	},
+};
+
+/*
+ * 'dcan' class
+ *
+ */
+
+static struct omap_hwmod_class dra7xx_dcan_hwmod_class = {
+	.name	= "dcan",
+};
+
+/* dcan1 */
+static struct omap_hwmod dra7xx_dcan1_hwmod = {
+	.name		= "dcan1",
+	.class		= &dra7xx_dcan_hwmod_class,
+	.clkdm_name	= "wkupaon_clkdm",
+	.main_clk	= "dcan1_sys_clk_mux",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_WKUPAON_DCAN1_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_WKUPAON_DCAN1_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+/* dcan2 */
+static struct omap_hwmod dra7xx_dcan2_hwmod = {
+	.name		= "dcan2",
+	.class		= &dra7xx_dcan_hwmod_class,
+	.clkdm_name	= "l4per2_clkdm",
+	.main_clk	= "sys_clkin1",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER2_DCAN2_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER2_DCAN2_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+/*
+ * 'dma' class
+ *
+ */
+
+static struct omap_hwmod_class_sysconfig dra7xx_dma_sysc = {
+	.rev_offs	= 0x0000,
+	.sysc_offs	= 0x002c,
+	.syss_offs	= 0x0028,
+	.sysc_flags	= (SYSC_HAS_AUTOIDLE | SYSC_HAS_CLOCKACTIVITY |
+			   SYSC_HAS_EMUFREE | SYSC_HAS_MIDLEMODE |
+			   SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET |
+			   SYSS_HAS_RESET_STATUS),
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+			   SIDLE_SMART_WKUP | MSTANDBY_FORCE | MSTANDBY_NO |
+			   MSTANDBY_SMART | MSTANDBY_SMART_WKUP),
+	.sysc_fields	= &omap_hwmod_sysc_type1,
+};
+
+static struct omap_hwmod_class dra7xx_dma_hwmod_class = {
+	.name	= "dma",
+	.sysc	= &dra7xx_dma_sysc,
+};
+
+/* dma dev_attr */
+static struct omap_dma_dev_attr dma_dev_attr = {
+	.dev_caps	= RESERVE_CHANNEL | DMA_LINKED_LCH | GLOBAL_PRIORITY |
+			  IS_CSSA_32 | IS_CDSA_32 | IS_RW_PRIORITY,
+	.lch_count	= 32,
+};
+
+/* dma_system */
+static struct omap_hwmod_irq_info dra7xx_dma_system_irqs[] = {
+	{ .name = "0", .irq = 12 + DRA7XX_IRQ_GIC_START },
+	{ .name = "1", .irq = 13 + DRA7XX_IRQ_GIC_START },
+	{ .name = "2", .irq = 14 + DRA7XX_IRQ_GIC_START },
+	{ .name = "3", .irq = 15 + DRA7XX_IRQ_GIC_START },
+	{ .irq = -1 }
+};
+
+static struct omap_hwmod dra7xx_dma_system_hwmod = {
+	.name		= "dma_system",
+	.class		= &dra7xx_dma_hwmod_class,
+	.clkdm_name	= "dma_clkdm",
+	.mpu_irqs	= dra7xx_dma_system_irqs,
+	.main_clk	= "l3_iclk_div",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_DMA_DMA_SYSTEM_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_DMA_DMA_SYSTEM_CONTEXT_OFFSET,
+		},
+	},
+	.dev_attr	= &dma_dev_attr,
+};
+
+/*
+ * 'dss' class
+ *
+ */
+
+static struct omap_hwmod_class_sysconfig dra7xx_dss_sysc = {
+	.rev_offs	= 0x0000,
+	.syss_offs	= 0x0014,
+	.sysc_flags	= SYSS_HAS_RESET_STATUS,
+};
+
+static struct omap_hwmod_class dra7xx_dss_hwmod_class = {
+	.name	= "dss",
+	.sysc	= &dra7xx_dss_sysc,
+	.reset	= omap_dss_reset,
+};
+
+/* dss */
+static struct omap_hwmod_dma_info dra7xx_dss_sdma_reqs[] = {
+	{ .dma_req = 75 + DRA7XX_DMA_REQ_START },
+	{ .dma_req = -1 }
+};
+
+static struct omap_hwmod_opt_clk dss_opt_clks[] = {
+	{ .role = "dss_clk", .clk = "dss_dss_clk" },
+	{ .role = "hdmi_phy_clk", .clk = "dss_48mhz_clk" },
+	{ .role = "32khz_clk", .clk = "dss_32khz_clk" },
+	{ .role = "video2_clk", .clk = "dss_video2_clk" },
+	{ .role = "video1_clk", .clk = "dss_video1_clk" },
+	{ .role = "hdmi_clk", .clk = "dss_hdmi_clk" },
+};
+
+static struct omap_hwmod dra7xx_dss_hwmod = {
+	.name		= "dss_core",
+	.class		= &dra7xx_dss_hwmod_class,
+	.clkdm_name	= "dss_clkdm",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
+	.sdma_reqs	= dra7xx_dss_sdma_reqs,
+	.main_clk	= "dss_dss_clk",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_DSS_DSS_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_DSS_DSS_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+	.opt_clks	= dss_opt_clks,
+	.opt_clks_cnt	= ARRAY_SIZE(dss_opt_clks),
+};
+
+/*
+ * 'dispc' class
+ * display controller
+ */
+
+static struct omap_hwmod_class_sysconfig dra7xx_dispc_sysc = {
+	.rev_offs	= 0x0000,
+	.sysc_offs	= 0x0010,
+	.syss_offs	= 0x0014,
+	.sysc_flags	= (SYSC_HAS_AUTOIDLE | SYSC_HAS_CLOCKACTIVITY |
+			   SYSC_HAS_ENAWAKEUP | SYSC_HAS_MIDLEMODE |
+			   SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET |
+			   SYSS_HAS_RESET_STATUS),
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+			   MSTANDBY_FORCE | MSTANDBY_NO | MSTANDBY_SMART),
+	.sysc_fields	= &omap_hwmod_sysc_type1,
+};
+
+static struct omap_hwmod_class dra7xx_dispc_hwmod_class = {
+	.name	= "dispc",
+	.sysc	= &dra7xx_dispc_sysc,
+};
+
+/* dss_dispc */
+/* dss_dispc dev_attr */
+static struct omap_dss_dispc_dev_attr dss_dispc_dev_attr = {
+	.has_framedonetv_irq	= 1,
+	.manager_count		= 4,
+};
+
+static struct omap_hwmod dra7xx_dss_dispc_hwmod = {
+	.name		= "dss_dispc",
+	.class		= &dra7xx_dispc_hwmod_class,
+	.clkdm_name	= "dss_clkdm",
+	.main_clk	= "dss_dss_clk",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_DSS_DSS_CLKCTRL_OFFSET,
+			.flags = HWMOD_OMAP4_NO_CONTEXT_LOSS_BIT,
+		},
+	},
+	.dev_attr	= &dss_dispc_dev_attr,
+};
+
+/*
+ * 'hdmi' class
+ * hdmi controller
+ */
+
+static struct omap_hwmod_class_sysconfig dra7xx_hdmi_sysc = {
+	.rev_offs	= 0x0000,
+	.sysc_offs	= 0x0010,
+	.sysc_flags	= (SYSC_HAS_RESET_STATUS | SYSC_HAS_SIDLEMODE |
+			   SYSC_HAS_SOFTRESET),
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+			   SIDLE_SMART_WKUP),
+	.sysc_fields	= &omap_hwmod_sysc_type2,
+};
+
+static struct omap_hwmod_class dra7xx_hdmi_hwmod_class = {
+	.name	= "hdmi",
+	.sysc	= &dra7xx_hdmi_sysc,
+};
+
+/* dss_hdmi */
+
+static struct omap_hwmod_opt_clk dss_hdmi_opt_clks[] = {
+	{ .role = "sys_clk", .clk = "dss_hdmi_clk" },
+};
+
+static struct omap_hwmod dra7xx_dss_hdmi_hwmod = {
+	.name		= "dss_hdmi",
+	.class		= &dra7xx_hdmi_hwmod_class,
+	.clkdm_name	= "dss_clkdm",
+	.main_clk	= "dss_48mhz_clk",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_DSS_DSS_CLKCTRL_OFFSET,
+			.flags = HWMOD_OMAP4_NO_CONTEXT_LOSS_BIT,
+		},
+	},
+	.opt_clks	= dss_hdmi_opt_clks,
+	.opt_clks_cnt	= ARRAY_SIZE(dss_hdmi_opt_clks),
+};
+
+/*
+ * 'elm' class
+ *
+ */
+
+static struct omap_hwmod_class_sysconfig dra7xx_elm_sysc = {
+	.rev_offs	= 0x0000,
+	.sysc_offs	= 0x0010,
+	.syss_offs	= 0x0014,
+	.sysc_flags	= (SYSC_HAS_AUTOIDLE | SYSC_HAS_CLOCKACTIVITY |
+			   SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET |
+			   SYSS_HAS_RESET_STATUS),
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+			   SIDLE_SMART_WKUP),
+	.sysc_fields	= &omap_hwmod_sysc_type1,
+};
+
+static struct omap_hwmod_class dra7xx_elm_hwmod_class = {
+	.name	= "elm",
+	.sysc	= &dra7xx_elm_sysc,
+};
+
+/* elm */
+
+static struct omap_hwmod dra7xx_elm_hwmod = {
+	.name		= "elm",
+	.class		= &dra7xx_elm_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.main_clk	= "l3_iclk_div",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_ELM_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_ELM_CONTEXT_OFFSET,
+		},
+	},
+};
+
+/*
+ * 'gpio' class
+ *
+ */
+
+static struct omap_hwmod_class_sysconfig dra7xx_gpio_sysc = {
+	.rev_offs	= 0x0000,
+	.sysc_offs	= 0x0010,
+	.syss_offs	= 0x0114,
+	.sysc_flags	= (SYSC_HAS_AUTOIDLE | SYSC_HAS_ENAWAKEUP |
+			   SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET |
+			   SYSS_HAS_RESET_STATUS),
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+			   SIDLE_SMART_WKUP),
+	.sysc_fields	= &omap_hwmod_sysc_type1,
+};
+
+static struct omap_hwmod_class dra7xx_gpio_hwmod_class = {
+	.name	= "gpio",
+	.sysc	= &dra7xx_gpio_sysc,
+	.rev	= 2,
+};
+
+/* gpio dev_attr */
+static struct omap_gpio_dev_attr gpio_dev_attr = {
+	.bank_width	= 32,
+	.dbck_flag	= true,
+};
+
+/* gpio1 */
+static struct omap_hwmod_opt_clk gpio1_opt_clks[] = {
+	{ .role = "dbclk", .clk = "gpio1_dbclk" },
+};
+
+static struct omap_hwmod dra7xx_gpio1_hwmod = {
+	.name		= "gpio1",
+	.class		= &dra7xx_gpio_hwmod_class,
+	.clkdm_name	= "wkupaon_clkdm",
+	.main_clk	= "wkupaon_iclk_mux",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_WKUPAON_GPIO1_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_WKUPAON_GPIO1_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_HWCTRL,
+		},
+	},
+	.opt_clks	= gpio1_opt_clks,
+	.opt_clks_cnt	= ARRAY_SIZE(gpio1_opt_clks),
+	.dev_attr	= &gpio_dev_attr,
+};
+
+/* gpio2 */
+static struct omap_hwmod_opt_clk gpio2_opt_clks[] = {
+	{ .role = "dbclk", .clk = "gpio2_dbclk" },
+};
+
+static struct omap_hwmod dra7xx_gpio2_hwmod = {
+	.name		= "gpio2",
+	.class		= &dra7xx_gpio_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
+	.main_clk	= "l3_iclk_div",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_GPIO2_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_GPIO2_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_HWCTRL,
+		},
+	},
+	.opt_clks	= gpio2_opt_clks,
+	.opt_clks_cnt	= ARRAY_SIZE(gpio2_opt_clks),
+	.dev_attr	= &gpio_dev_attr,
+};
+
+/* gpio3 */
+static struct omap_hwmod_opt_clk gpio3_opt_clks[] = {
+	{ .role = "dbclk", .clk = "gpio3_dbclk" },
+};
+
+static struct omap_hwmod dra7xx_gpio3_hwmod = {
+	.name		= "gpio3",
+	.class		= &dra7xx_gpio_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
+	.main_clk	= "l3_iclk_div",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_GPIO3_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_GPIO3_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_HWCTRL,
+		},
+	},
+	.opt_clks	= gpio3_opt_clks,
+	.opt_clks_cnt	= ARRAY_SIZE(gpio3_opt_clks),
+	.dev_attr	= &gpio_dev_attr,
+};
+
+/* gpio4 */
+static struct omap_hwmod_opt_clk gpio4_opt_clks[] = {
+	{ .role = "dbclk", .clk = "gpio4_dbclk" },
+};
+
+static struct omap_hwmod dra7xx_gpio4_hwmod = {
+	.name		= "gpio4",
+	.class		= &dra7xx_gpio_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
+	.main_clk	= "l3_iclk_div",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_GPIO4_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_GPIO4_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_HWCTRL,
+		},
+	},
+	.opt_clks	= gpio4_opt_clks,
+	.opt_clks_cnt	= ARRAY_SIZE(gpio4_opt_clks),
+	.dev_attr	= &gpio_dev_attr,
+};
+
+/* gpio5 */
+static struct omap_hwmod_opt_clk gpio5_opt_clks[] = {
+	{ .role = "dbclk", .clk = "gpio5_dbclk" },
+};
+
+static struct omap_hwmod dra7xx_gpio5_hwmod = {
+	.name		= "gpio5",
+	.class		= &dra7xx_gpio_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
+	.main_clk	= "l3_iclk_div",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_GPIO5_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_GPIO5_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_HWCTRL,
+		},
+	},
+	.opt_clks	= gpio5_opt_clks,
+	.opt_clks_cnt	= ARRAY_SIZE(gpio5_opt_clks),
+	.dev_attr	= &gpio_dev_attr,
+};
+
+/* gpio6 */
+static struct omap_hwmod_opt_clk gpio6_opt_clks[] = {
+	{ .role = "dbclk", .clk = "gpio6_dbclk" },
+};
+
+static struct omap_hwmod dra7xx_gpio6_hwmod = {
+	.name		= "gpio6",
+	.class		= &dra7xx_gpio_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
+	.main_clk	= "l3_iclk_div",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_GPIO6_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_GPIO6_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_HWCTRL,
+		},
+	},
+	.opt_clks	= gpio6_opt_clks,
+	.opt_clks_cnt	= ARRAY_SIZE(gpio6_opt_clks),
+	.dev_attr	= &gpio_dev_attr,
+};
+
+/* gpio7 */
+static struct omap_hwmod_opt_clk gpio7_opt_clks[] = {
+	{ .role = "dbclk", .clk = "gpio7_dbclk" },
+};
+
+static struct omap_hwmod dra7xx_gpio7_hwmod = {
+	.name		= "gpio7",
+	.class		= &dra7xx_gpio_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
+	.main_clk	= "l3_iclk_div",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_GPIO7_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_GPIO7_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_HWCTRL,
+		},
+	},
+	.opt_clks	= gpio7_opt_clks,
+	.opt_clks_cnt	= ARRAY_SIZE(gpio7_opt_clks),
+	.dev_attr	= &gpio_dev_attr,
+};
+
+/* gpio8 */
+static struct omap_hwmod_opt_clk gpio8_opt_clks[] = {
+	{ .role = "dbclk", .clk = "gpio8_dbclk" },
+};
+
+static struct omap_hwmod dra7xx_gpio8_hwmod = {
+	.name		= "gpio8",
+	.class		= &dra7xx_gpio_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.flags		= HWMOD_CONTROL_OPT_CLKS_IN_RESET,
+	.main_clk	= "l3_iclk_div",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_GPIO8_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_GPIO8_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_HWCTRL,
+		},
+	},
+	.opt_clks	= gpio8_opt_clks,
+	.opt_clks_cnt	= ARRAY_SIZE(gpio8_opt_clks),
+	.dev_attr	= &gpio_dev_attr,
+};
+
+/*
+ * 'gpmc' class
+ *
+ */
+
+static struct omap_hwmod_class_sysconfig dra7xx_gpmc_sysc = {
+	.rev_offs	= 0x0000,
+	.sysc_offs	= 0x0010,
+	.syss_offs	= 0x0014,
+	.sysc_flags	= (SYSC_HAS_AUTOIDLE | SYSC_HAS_SIDLEMODE |
+			   SYSC_HAS_SOFTRESET | SYSS_HAS_RESET_STATUS),
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+			   SIDLE_SMART_WKUP),
+	.sysc_fields	= &omap_hwmod_sysc_type1,
+};
+
+static struct omap_hwmod_class dra7xx_gpmc_hwmod_class = {
+	.name	= "gpmc",
+	.sysc	= &dra7xx_gpmc_sysc,
+};
+
+/* gpmc */
+
+static struct omap_hwmod dra7xx_gpmc_hwmod = {
+	.name		= "gpmc",
+	.class		= &dra7xx_gpmc_hwmod_class,
+	.clkdm_name	= "l3main1_clkdm",
+	.flags		= HWMOD_INIT_NO_IDLE | HWMOD_INIT_NO_RESET,
+	.main_clk	= "l3_iclk_div",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L3MAIN1_GPMC_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L3MAIN1_GPMC_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_HWCTRL,
+		},
+	},
+};
+
+/*
+ * 'hdq1w' class
+ *
+ */
+
+static struct omap_hwmod_class_sysconfig dra7xx_hdq1w_sysc = {
+	.rev_offs	= 0x0000,
+	.sysc_offs	= 0x0014,
+	.syss_offs	= 0x0018,
+	.sysc_flags	= (SYSC_HAS_AUTOIDLE | SYSC_HAS_SOFTRESET |
+			   SYSS_HAS_RESET_STATUS),
+	.sysc_fields	= &omap_hwmod_sysc_type1,
+};
+
+static struct omap_hwmod_class dra7xx_hdq1w_hwmod_class = {
+	.name	= "hdq1w",
+	.sysc	= &dra7xx_hdq1w_sysc,
+};
+
+/* hdq1w */
+
+static struct omap_hwmod dra7xx_hdq1w_hwmod = {
+	.name		= "hdq1w",
+	.class		= &dra7xx_hdq1w_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.flags		= HWMOD_INIT_NO_RESET,
+	.main_clk	= "func_12m_fclk",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_HDQ1W_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_HDQ1W_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+/*
+ * 'i2c' class
+ *
+ */
+
+static struct omap_hwmod_class_sysconfig dra7xx_i2c_sysc = {
+	.sysc_offs	= 0x0010,
+	.syss_offs	= 0x0090,
+	.sysc_flags	= (SYSC_HAS_AUTOIDLE | SYSC_HAS_CLOCKACTIVITY |
+			   SYSC_HAS_ENAWAKEUP | SYSC_HAS_SIDLEMODE |
+			   SYSC_HAS_SOFTRESET | SYSS_HAS_RESET_STATUS),
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+			   SIDLE_SMART_WKUP),
+	.clockact	= CLOCKACT_TEST_ICLK,
+	.sysc_fields	= &omap_hwmod_sysc_type1,
+};
+
+static struct omap_hwmod_class dra7xx_i2c_hwmod_class = {
+	.name	= "i2c",
+	.sysc	= &dra7xx_i2c_sysc,
+	.reset	= &omap_i2c_reset,
+	.rev	= OMAP_I2C_IP_VERSION_2,
+};
+
+/* i2c dev_attr */
+static struct omap_i2c_dev_attr i2c_dev_attr = {
+	.flags	= OMAP_I2C_FLAG_BUS_SHIFT_NONE,
+};
+
+/* i2c1 */
+static struct omap_hwmod dra7xx_i2c1_hwmod = {
+	.name		= "i2c1",
+	.class		= &dra7xx_i2c_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.flags		= HWMOD_16BIT_REG | HWMOD_SET_DEFAULT_CLOCKACT,
+	.main_clk	= "func_96m_fclk",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_I2C1_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_I2C1_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+	.dev_attr	= &i2c_dev_attr,
+};
+
+/* i2c2 */
+static struct omap_hwmod dra7xx_i2c2_hwmod = {
+	.name		= "i2c2",
+	.class		= &dra7xx_i2c_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.flags		= HWMOD_16BIT_REG | HWMOD_SET_DEFAULT_CLOCKACT,
+	.main_clk	= "func_96m_fclk",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_I2C2_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_I2C2_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+	.dev_attr	= &i2c_dev_attr,
+};
+
+/* i2c3 */
+static struct omap_hwmod dra7xx_i2c3_hwmod = {
+	.name		= "i2c3",
+	.class		= &dra7xx_i2c_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.flags		= HWMOD_16BIT_REG | HWMOD_SET_DEFAULT_CLOCKACT,
+	.main_clk	= "func_96m_fclk",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_I2C3_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_I2C3_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+	.dev_attr	= &i2c_dev_attr,
+};
+
+/* i2c4 */
+static struct omap_hwmod dra7xx_i2c4_hwmod = {
+	.name		= "i2c4",
+	.class		= &dra7xx_i2c_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.flags		= HWMOD_16BIT_REG | HWMOD_SET_DEFAULT_CLOCKACT,
+	.main_clk	= "func_96m_fclk",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_I2C4_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_I2C4_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+	.dev_attr	= &i2c_dev_attr,
+};
+
+/* i2c5 */
+static struct omap_hwmod dra7xx_i2c5_hwmod = {
+	.name		= "i2c5",
+	.class		= &dra7xx_i2c_hwmod_class,
+	.clkdm_name	= "ipu_clkdm",
+	.flags		= HWMOD_16BIT_REG | HWMOD_SET_DEFAULT_CLOCKACT,
+	.main_clk	= "func_96m_fclk",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_IPU_I2C5_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_IPU_I2C5_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+	.dev_attr	= &i2c_dev_attr,
+};
+
+/*
+ * 'mcspi' class
+ *
+ */
+
+static struct omap_hwmod_class_sysconfig dra7xx_mcspi_sysc = {
+	.rev_offs	= 0x0000,
+	.sysc_offs	= 0x0010,
+	.sysc_flags	= (SYSC_HAS_EMUFREE | SYSC_HAS_RESET_STATUS |
+			   SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET),
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+			   SIDLE_SMART_WKUP),
+	.sysc_fields	= &omap_hwmod_sysc_type2,
+};
+
+static struct omap_hwmod_class dra7xx_mcspi_hwmod_class = {
+	.name	= "mcspi",
+	.sysc	= &dra7xx_mcspi_sysc,
+	.rev	= OMAP4_MCSPI_REV,
+};
+
+/* mcspi1 */
+/* mcspi1 dev_attr */
+static struct omap2_mcspi_dev_attr mcspi1_dev_attr = {
+	.num_chipselect	= 4,
+};
+
+static struct omap_hwmod dra7xx_mcspi1_hwmod = {
+	.name		= "mcspi1",
+	.class		= &dra7xx_mcspi_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.main_clk	= "func_48m_fclk",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_MCSPI1_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_MCSPI1_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+	.dev_attr	= &mcspi1_dev_attr,
+};
+
+/* mcspi2 */
+/* mcspi2 dev_attr */
+static struct omap2_mcspi_dev_attr mcspi2_dev_attr = {
+	.num_chipselect	= 2,
+};
+
+static struct omap_hwmod dra7xx_mcspi2_hwmod = {
+	.name		= "mcspi2",
+	.class		= &dra7xx_mcspi_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.main_clk	= "func_48m_fclk",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_MCSPI2_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_MCSPI2_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+	.dev_attr	= &mcspi2_dev_attr,
+};
+
+/* mcspi3 */
+/* mcspi3 dev_attr */
+static struct omap2_mcspi_dev_attr mcspi3_dev_attr = {
+	.num_chipselect	= 2,
+};
+
+static struct omap_hwmod dra7xx_mcspi3_hwmod = {
+	.name		= "mcspi3",
+	.class		= &dra7xx_mcspi_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.main_clk	= "func_48m_fclk",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_MCSPI3_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_MCSPI3_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+	.dev_attr	= &mcspi3_dev_attr,
+};
+
+/* mcspi4 */
+/* mcspi4 dev_attr */
+static struct omap2_mcspi_dev_attr mcspi4_dev_attr = {
+	.num_chipselect	= 1,
+};
+
+static struct omap_hwmod dra7xx_mcspi4_hwmod = {
+	.name		= "mcspi4",
+	.class		= &dra7xx_mcspi_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.main_clk	= "func_48m_fclk",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_MCSPI4_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_MCSPI4_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+	.dev_attr	= &mcspi4_dev_attr,
+};
+
+/*
+ * 'mmc' class
+ *
+ */
+
+static struct omap_hwmod_class_sysconfig dra7xx_mmc_sysc = {
+	.rev_offs	= 0x0000,
+	.sysc_offs	= 0x0010,
+	.sysc_flags	= (SYSC_HAS_EMUFREE | SYSC_HAS_MIDLEMODE |
+			   SYSC_HAS_RESET_STATUS | SYSC_HAS_SIDLEMODE |
+			   SYSC_HAS_SOFTRESET),
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+			   SIDLE_SMART_WKUP | MSTANDBY_FORCE | MSTANDBY_NO |
+			   MSTANDBY_SMART | MSTANDBY_SMART_WKUP),
+	.sysc_fields	= &omap_hwmod_sysc_type2,
+};
+
+static struct omap_hwmod_class dra7xx_mmc_hwmod_class = {
+	.name	= "mmc",
+	.sysc	= &dra7xx_mmc_sysc,
+};
+
+/* mmc1 */
+static struct omap_hwmod_opt_clk mmc1_opt_clks[] = {
+	{ .role = "clk32k", .clk = "mmc1_clk32k" },
+};
+
+/* mmc1 dev_attr */
+static struct omap_mmc_dev_attr mmc1_dev_attr = {
+	.flags	= OMAP_HSMMC_SUPPORTS_DUAL_VOLT,
+};
+
+static struct omap_hwmod dra7xx_mmc1_hwmod = {
+	.name		= "mmc1",
+	.class		= &dra7xx_mmc_hwmod_class,
+	.clkdm_name	= "l3init_clkdm",
+	.main_clk	= "mmc1_fclk_div",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L3INIT_MMC1_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L3INIT_MMC1_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+	.opt_clks	= mmc1_opt_clks,
+	.opt_clks_cnt	= ARRAY_SIZE(mmc1_opt_clks),
+	.dev_attr	= &mmc1_dev_attr,
+};
+
+/* mmc2 */
+static struct omap_hwmod_opt_clk mmc2_opt_clks[] = {
+	{ .role = "clk32k", .clk = "mmc2_clk32k" },
+};
+
+static struct omap_hwmod dra7xx_mmc2_hwmod = {
+	.name		= "mmc2",
+	.class		= &dra7xx_mmc_hwmod_class,
+	.clkdm_name	= "l3init_clkdm",
+	.main_clk	= "mmc2_fclk_div",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L3INIT_MMC2_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L3INIT_MMC2_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+	.opt_clks	= mmc2_opt_clks,
+	.opt_clks_cnt	= ARRAY_SIZE(mmc2_opt_clks),
+};
+
+/* mmc3 */
+static struct omap_hwmod_opt_clk mmc3_opt_clks[] = {
+	{ .role = "clk32k", .clk = "mmc3_clk32k" },
+};
+
+static struct omap_hwmod dra7xx_mmc3_hwmod = {
+	.name		= "mmc3",
+	.class		= &dra7xx_mmc_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.main_clk	= "mmc3_gfclk_div",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_MMC3_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_MMC3_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+	.opt_clks	= mmc3_opt_clks,
+	.opt_clks_cnt	= ARRAY_SIZE(mmc3_opt_clks),
+};
+
+/* mmc4 */
+static struct omap_hwmod_opt_clk mmc4_opt_clks[] = {
+	{ .role = "clk32k", .clk = "mmc4_clk32k" },
+};
+
+static struct omap_hwmod dra7xx_mmc4_hwmod = {
+	.name		= "mmc4",
+	.class		= &dra7xx_mmc_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.main_clk	= "mmc4_gfclk_div",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_MMC4_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_MMC4_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+	.opt_clks	= mmc4_opt_clks,
+	.opt_clks_cnt	= ARRAY_SIZE(mmc4_opt_clks),
+};
+
+/*
+ * 'mpu' class
+ *
+ */
+
+static struct omap_hwmod_class dra7xx_mpu_hwmod_class = {
+	.name	= "mpu",
+};
+
+/* mpu */
+static struct omap_hwmod dra7xx_mpu_hwmod = {
+	.name		= "mpu",
+	.class		= &dra7xx_mpu_hwmod_class,
+	.clkdm_name	= "mpu_clkdm",
+	.flags		= HWMOD_INIT_NO_IDLE | HWMOD_INIT_NO_RESET,
+	.main_clk	= "dpll_mpu_m2_ck",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_MPU_MPU_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_MPU_MPU_CONTEXT_OFFSET,
+		},
+	},
+};
+
+/*
+ * 'ocp2scp' class
+ *
+ */
+
+static struct omap_hwmod_class_sysconfig dra7xx_ocp2scp_sysc = {
+	.rev_offs	= 0x0000,
+	.sysc_offs	= 0x0010,
+	.syss_offs	= 0x0014,
+	.sysc_flags	= (SYSC_HAS_AUTOIDLE | SYSC_HAS_SIDLEMODE |
+			   SYSC_HAS_SOFTRESET | SYSS_HAS_RESET_STATUS),
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+			   SIDLE_SMART_WKUP),
+	.sysc_fields	= &omap_hwmod_sysc_type1,
+};
+
+static struct omap_hwmod_class dra7xx_ocp2scp_hwmod_class = {
+	.name	= "ocp2scp",
+	.sysc	= &dra7xx_ocp2scp_sysc,
+};
+
+/* ocp2scp1 */
+static struct omap_hwmod dra7xx_ocp2scp1_hwmod = {
+	.name		= "ocp2scp1",
+	.class		= &dra7xx_ocp2scp_hwmod_class,
+	.clkdm_name	= "l3init_clkdm",
+	.main_clk	= "l4_root_clk_div",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L3INIT_OCP2SCP1_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L3INIT_OCP2SCP1_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_HWCTRL,
+		},
+	},
+};
+
+/*
+ * 'qspi' class
+ *
+ */
+
+static struct omap_hwmod_class_sysconfig dra7xx_qspi_sysc = {
+	.sysc_offs	= 0x0010,
+	.sysc_flags	= SYSC_HAS_SIDLEMODE,
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+			   SIDLE_SMART_WKUP),
+	.sysc_fields	= &omap_hwmod_sysc_type2,
+};
+
+static struct omap_hwmod_class dra7xx_qspi_hwmod_class = {
+	.name	= "qspi",
+	.sysc	= &dra7xx_qspi_sysc,
+};
+
+/* qspi */
+static struct omap_hwmod dra7xx_qspi_hwmod = {
+	.name		= "qspi",
+	.class		= &dra7xx_qspi_hwmod_class,
+	.clkdm_name	= "l4per2_clkdm",
+	.main_clk	= "qspi_gfclk_div",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER2_QSPI_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER2_QSPI_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+/*
+ * 'sata' class
+ *
+ */
+
+static struct omap_hwmod_class_sysconfig dra7xx_sata_sysc = {
+	.sysc_offs	= 0x0000,
+	.sysc_flags	= (SYSC_HAS_MIDLEMODE | SYSC_HAS_SIDLEMODE),
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+			   SIDLE_SMART_WKUP | MSTANDBY_FORCE | MSTANDBY_NO |
+			   MSTANDBY_SMART | MSTANDBY_SMART_WKUP),
+	.sysc_fields	= &omap_hwmod_sysc_type2,
+};
+
+static struct omap_hwmod_class dra7xx_sata_hwmod_class = {
+	.name	= "sata",
+	.sysc	= &dra7xx_sata_sysc,
+};
+
+/* sata */
+static struct omap_hwmod_opt_clk sata_opt_clks[] = {
+	{ .role = "ref_clk", .clk = "sata_ref_clk" },
+};
+
+static struct omap_hwmod dra7xx_sata_hwmod = {
+	.name		= "sata",
+	.class		= &dra7xx_sata_hwmod_class,
+	.clkdm_name	= "l3init_clkdm",
+	.flags		= HWMOD_SWSUP_SIDLE | HWMOD_SWSUP_MSTANDBY,
+	.main_clk	= "func_48m_fclk",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L3INIT_SATA_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L3INIT_SATA_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+	.opt_clks	= sata_opt_clks,
+	.opt_clks_cnt	= ARRAY_SIZE(sata_opt_clks),
+};
+
+/*
+ * 'smartreflex' class
+ *
+ */
+
+/* The IP is not compliant to type1 / type2 scheme */
+static struct omap_hwmod_sysc_fields omap_hwmod_sysc_type_smartreflex = {
+	.sidle_shift	= 24,
+	.enwkup_shift	= 26,
+};
+
+static struct omap_hwmod_class_sysconfig dra7xx_smartreflex_sysc = {
+	.sysc_offs	= 0x0038,
+	.sysc_flags	= (SYSC_HAS_ENAWAKEUP | SYSC_HAS_SIDLEMODE),
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+			   SIDLE_SMART_WKUP),
+	.sysc_fields	= &omap_hwmod_sysc_type_smartreflex,
+};
+
+static struct omap_hwmod_class dra7xx_smartreflex_hwmod_class = {
+	.name	= "smartreflex",
+	.sysc	= &dra7xx_smartreflex_sysc,
+	.rev	= 2,
+};
+
+/* smartreflex_core */
+/* smartreflex_core dev_attr */
+static struct omap_smartreflex_dev_attr smartreflex_core_dev_attr = {
+	.sensor_voltdm_name	= "core",
+};
+
+static struct omap_hwmod dra7xx_smartreflex_core_hwmod = {
+	.name		= "smartreflex_core",
+	.class		= &dra7xx_smartreflex_hwmod_class,
+	.clkdm_name	= "coreaon_clkdm",
+	.main_clk	= "wkupaon_iclk_mux",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_COREAON_SMARTREFLEX_CORE_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_COREAON_SMARTREFLEX_CORE_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+	.dev_attr	= &smartreflex_core_dev_attr,
+};
+
+/* smartreflex_mpu */
+/* smartreflex_mpu dev_attr */
+static struct omap_smartreflex_dev_attr smartreflex_mpu_dev_attr = {
+	.sensor_voltdm_name	= "mpu",
+};
+
+static struct omap_hwmod dra7xx_smartreflex_mpu_hwmod = {
+	.name		= "smartreflex_mpu",
+	.class		= &dra7xx_smartreflex_hwmod_class,
+	.clkdm_name	= "coreaon_clkdm",
+	.main_clk	= "wkupaon_iclk_mux",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_COREAON_SMARTREFLEX_MPU_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_COREAON_SMARTREFLEX_MPU_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+	.dev_attr	= &smartreflex_mpu_dev_attr,
+};
+
+/*
+ * 'spinlock' class
+ *
+ */
+
+static struct omap_hwmod_class_sysconfig dra7xx_spinlock_sysc = {
+	.rev_offs	= 0x0000,
+	.sysc_offs	= 0x0010,
+	.syss_offs	= 0x0014,
+	.sysc_flags	= (SYSC_HAS_AUTOIDLE | SYSC_HAS_CLOCKACTIVITY |
+			   SYSC_HAS_ENAWAKEUP | SYSC_HAS_SIDLEMODE |
+			   SYSC_HAS_SOFTRESET | SYSS_HAS_RESET_STATUS),
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+			   SIDLE_SMART_WKUP),
+	.sysc_fields	= &omap_hwmod_sysc_type1,
+};
+
+static struct omap_hwmod_class dra7xx_spinlock_hwmod_class = {
+	.name	= "spinlock",
+	.sysc	= &dra7xx_spinlock_sysc,
+};
+
+/* spinlock */
+static struct omap_hwmod dra7xx_spinlock_hwmod = {
+	.name		= "spinlock",
+	.class		= &dra7xx_spinlock_hwmod_class,
+	.clkdm_name	= "l4cfg_clkdm",
+	.main_clk	= "l3_iclk_div",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4CFG_SPINLOCK_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4CFG_SPINLOCK_CONTEXT_OFFSET,
+		},
+	},
+};
+
+/*
+ * 'timer' class
+ *
+ * This class contains several variants: ['timer_1ms', 'timer_secure',
+ * 'timer']
+ */
+
+static struct omap_hwmod_class_sysconfig dra7xx_timer_1ms_sysc = {
+	.rev_offs	= 0x0000,
+	.sysc_offs	= 0x0010,
+	.sysc_flags	= (SYSC_HAS_EMUFREE | SYSC_HAS_RESET_STATUS |
+			   SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET),
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+			   SIDLE_SMART_WKUP),
+	.sysc_fields	= &omap_hwmod_sysc_type2,
+};
+
+static struct omap_hwmod_class dra7xx_timer_1ms_hwmod_class = {
+	.name	= "timer",
+	.sysc	= &dra7xx_timer_1ms_sysc,
+};
+
+static struct omap_hwmod_class_sysconfig dra7xx_timer_secure_sysc = {
+	.rev_offs	= 0x0000,
+	.sysc_offs	= 0x0010,
+	.sysc_flags	= (SYSC_HAS_EMUFREE | SYSC_HAS_RESET_STATUS |
+			   SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET),
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+			   SIDLE_SMART_WKUP),
+	.sysc_fields	= &omap_hwmod_sysc_type2,
+};
+
+static struct omap_hwmod_class dra7xx_timer_secure_hwmod_class = {
+	.name	= "timer",
+	.sysc	= &dra7xx_timer_secure_sysc,
+};
+
+static struct omap_hwmod_class_sysconfig dra7xx_timer_sysc = {
+	.rev_offs	= 0x0000,
+	.sysc_offs	= 0x0010,
+	.sysc_flags	= (SYSC_HAS_EMUFREE | SYSC_HAS_RESET_STATUS |
+			   SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET),
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+			   SIDLE_SMART_WKUP),
+	.sysc_fields	= &omap_hwmod_sysc_type2,
+};
+
+static struct omap_hwmod_class dra7xx_timer_hwmod_class = {
+	.name	= "timer",
+	.sysc	= &dra7xx_timer_sysc,
+};
+
+/* timer1 */
+static struct omap_hwmod dra7xx_timer1_hwmod = {
+	.name		= "timer1",
+	.class		= &dra7xx_timer_1ms_hwmod_class,
+	.clkdm_name	= "wkupaon_clkdm",
+	.main_clk	= "timer1_gfclk_mux",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_WKUPAON_TIMER1_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_WKUPAON_TIMER1_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+/* timer2 */
+static struct omap_hwmod dra7xx_timer2_hwmod = {
+	.name		= "timer2",
+	.class		= &dra7xx_timer_1ms_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.main_clk	= "timer2_gfclk_mux",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_TIMER2_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_TIMER2_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+/* timer3 */
+static struct omap_hwmod dra7xx_timer3_hwmod = {
+	.name		= "timer3",
+	.class		= &dra7xx_timer_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.main_clk	= "timer3_gfclk_mux",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_TIMER3_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_TIMER3_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+/* timer4 */
+static struct omap_hwmod dra7xx_timer4_hwmod = {
+	.name		= "timer4",
+	.class		= &dra7xx_timer_secure_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.main_clk	= "timer4_gfclk_mux",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_TIMER4_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_TIMER4_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+/* timer5 */
+static struct omap_hwmod dra7xx_timer5_hwmod = {
+	.name		= "timer5",
+	.class		= &dra7xx_timer_hwmod_class,
+	.clkdm_name	= "ipu_clkdm",
+	.main_clk	= "timer5_gfclk_mux",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_IPU_TIMER5_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_IPU_TIMER5_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+/* timer6 */
+static struct omap_hwmod dra7xx_timer6_hwmod = {
+	.name		= "timer6",
+	.class		= &dra7xx_timer_hwmod_class,
+	.clkdm_name	= "ipu_clkdm",
+	.main_clk	= "timer6_gfclk_mux",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_IPU_TIMER6_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_IPU_TIMER6_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+/* timer7 */
+static struct omap_hwmod dra7xx_timer7_hwmod = {
+	.name		= "timer7",
+	.class		= &dra7xx_timer_hwmod_class,
+	.clkdm_name	= "ipu_clkdm",
+	.main_clk	= "timer7_gfclk_mux",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_IPU_TIMER7_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_IPU_TIMER7_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+/* timer8 */
+static struct omap_hwmod dra7xx_timer8_hwmod = {
+	.name		= "timer8",
+	.class		= &dra7xx_timer_hwmod_class,
+	.clkdm_name	= "ipu_clkdm",
+	.main_clk	= "timer8_gfclk_mux",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_IPU_TIMER8_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_IPU_TIMER8_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+/* timer9 */
+static struct omap_hwmod dra7xx_timer9_hwmod = {
+	.name		= "timer9",
+	.class		= &dra7xx_timer_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.main_clk	= "timer9_gfclk_mux",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_TIMER9_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_TIMER9_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+/* timer10 */
+static struct omap_hwmod dra7xx_timer10_hwmod = {
+	.name		= "timer10",
+	.class		= &dra7xx_timer_1ms_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.main_clk	= "timer10_gfclk_mux",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_TIMER10_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_TIMER10_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+/* timer11 */
+static struct omap_hwmod dra7xx_timer11_hwmod = {
+	.name		= "timer11",
+	.class		= &dra7xx_timer_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.main_clk	= "timer11_gfclk_mux",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_TIMER11_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_TIMER11_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+/*
+ * 'uart' class
+ *
+ */
+
+static struct omap_hwmod_class_sysconfig dra7xx_uart_sysc = {
+	.rev_offs	= 0x0050,
+	.sysc_offs	= 0x0054,
+	.syss_offs	= 0x0058,
+	.sysc_flags	= (SYSC_HAS_AUTOIDLE | SYSC_HAS_ENAWAKEUP |
+			   SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET |
+			   SYSS_HAS_RESET_STATUS),
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+			   SIDLE_SMART_WKUP),
+	.sysc_fields	= &omap_hwmod_sysc_type1,
+};
+
+static struct omap_hwmod_class dra7xx_uart_hwmod_class = {
+	.name	= "uart",
+	.sysc	= &dra7xx_uart_sysc,
+};
+
+/* uart1 */
+static struct omap_hwmod dra7xx_uart1_hwmod = {
+	.name		= "uart1",
+	.class		= &dra7xx_uart_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.main_clk	= "uart1_gfclk_mux",
+	.flags		= HWMOD_SWSUP_SIDLE_ACT,
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_UART1_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_UART1_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+/* uart2 */
+static struct omap_hwmod dra7xx_uart2_hwmod = {
+	.name		= "uart2",
+	.class		= &dra7xx_uart_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.main_clk	= "uart2_gfclk_mux",
+	.flags		= HWMOD_SWSUP_SIDLE_ACT,
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_UART2_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_UART2_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+/* uart3 */
+static struct omap_hwmod dra7xx_uart3_hwmod = {
+	.name		= "uart3",
+	.class		= &dra7xx_uart_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.main_clk	= "uart3_gfclk_mux",
+	.flags		= HWMOD_SWSUP_SIDLE_ACT,
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_UART3_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_UART3_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+/* uart4 */
+static struct omap_hwmod dra7xx_uart4_hwmod = {
+	.name		= "uart4",
+	.class		= &dra7xx_uart_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.main_clk	= "uart4_gfclk_mux",
+	.flags		= HWMOD_SWSUP_SIDLE_ACT,
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_UART4_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_UART4_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+/* uart5 */
+static struct omap_hwmod dra7xx_uart5_hwmod = {
+	.name		= "uart5",
+	.class		= &dra7xx_uart_hwmod_class,
+	.clkdm_name	= "l4per_clkdm",
+	.main_clk	= "uart5_gfclk_mux",
+	.flags		= HWMOD_SWSUP_SIDLE_ACT,
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L4PER_UART5_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L4PER_UART5_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+/* uart6 */
+static struct omap_hwmod dra7xx_uart6_hwmod = {
+	.name		= "uart6",
+	.class		= &dra7xx_uart_hwmod_class,
+	.clkdm_name	= "ipu_clkdm",
+	.main_clk	= "uart6_gfclk_mux",
+	.flags		= HWMOD_SWSUP_SIDLE_ACT,
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_IPU_UART6_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_IPU_UART6_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+/*
+ * 'usb_otg_ss' class
+ *
+ */
+
+static struct omap_hwmod_class dra7xx_usb_otg_ss_hwmod_class = {
+	.name	= "usb_otg_ss",
+};
+
+/* usb_otg_ss1 */
+static struct omap_hwmod_opt_clk usb_otg_ss1_opt_clks[] = {
+	{ .role = "refclk960m", .clk = "usb_otg_ss1_refclk960m" },
+};
+
+static struct omap_hwmod dra7xx_usb_otg_ss1_hwmod = {
+	.name		= "usb_otg_ss1",
+	.class		= &dra7xx_usb_otg_ss_hwmod_class,
+	.clkdm_name	= "l3init_clkdm",
+	.main_clk	= "dpll_core_h13x2_ck",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L3INIT_USB_OTG_SS1_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L3INIT_USB_OTG_SS1_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_HWCTRL,
+		},
+	},
+	.opt_clks	= usb_otg_ss1_opt_clks,
+	.opt_clks_cnt	= ARRAY_SIZE(usb_otg_ss1_opt_clks),
+};
+
+/* usb_otg_ss2 */
+static struct omap_hwmod_opt_clk usb_otg_ss2_opt_clks[] = {
+	{ .role = "refclk960m", .clk = "usb_otg_ss2_refclk960m" },
+};
+
+static struct omap_hwmod dra7xx_usb_otg_ss2_hwmod = {
+	.name		= "usb_otg_ss2",
+	.class		= &dra7xx_usb_otg_ss_hwmod_class,
+	.clkdm_name	= "l3init_clkdm",
+	.main_clk	= "dpll_core_h13x2_ck",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L3INIT_USB_OTG_SS2_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L3INIT_USB_OTG_SS2_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_HWCTRL,
+		},
+	},
+	.opt_clks	= usb_otg_ss2_opt_clks,
+	.opt_clks_cnt	= ARRAY_SIZE(usb_otg_ss2_opt_clks),
+};
+
+/* usb_otg_ss3 */
+static struct omap_hwmod dra7xx_usb_otg_ss3_hwmod = {
+	.name		= "usb_otg_ss3",
+	.class		= &dra7xx_usb_otg_ss_hwmod_class,
+	.clkdm_name	= "l3init_clkdm",
+	.main_clk	= "dpll_core_h13x2_ck",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L3INIT_USB_OTG_SS3_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L3INIT_USB_OTG_SS3_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_HWCTRL,
+		},
+	},
+};
+
+/* usb_otg_ss4 */
+static struct omap_hwmod dra7xx_usb_otg_ss4_hwmod = {
+	.name		= "usb_otg_ss4",
+	.class		= &dra7xx_usb_otg_ss_hwmod_class,
+	.clkdm_name	= "l3init_clkdm",
+	.main_clk	= "dpll_core_h13x2_ck",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L3INIT_USB_OTG_SS4_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L3INIT_USB_OTG_SS4_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_HWCTRL,
+		},
+	},
+};
+
+/*
+ * 'vcp' class
+ *
+ */
+
+static struct omap_hwmod_class dra7xx_vcp_hwmod_class = {
+	.name	= "vcp",
+};
+
+/* vcp1 */
+static struct omap_hwmod dra7xx_vcp1_hwmod = {
+	.name		= "vcp1",
+	.class		= &dra7xx_vcp_hwmod_class,
+	.clkdm_name	= "l3main1_clkdm",
+	.main_clk	= "l3_iclk_div",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L3MAIN1_VCP1_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L3MAIN1_VCP1_CONTEXT_OFFSET,
+		},
+	},
+};
+
+/* vcp2 */
+static struct omap_hwmod dra7xx_vcp2_hwmod = {
+	.name		= "vcp2",
+	.class		= &dra7xx_vcp_hwmod_class,
+	.clkdm_name	= "l3main1_clkdm",
+	.main_clk	= "l3_iclk_div",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_L3MAIN1_VCP2_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_L3MAIN1_VCP2_CONTEXT_OFFSET,
+		},
+	},
+};
+
+/*
+ * 'wd_timer' class
+ *
+ */
+
+static struct omap_hwmod_class_sysconfig dra7xx_wd_timer_sysc = {
+	.rev_offs	= 0x0000,
+	.sysc_offs	= 0x0010,
+	.syss_offs	= 0x0014,
+	.sysc_flags	= (SYSC_HAS_EMUFREE | SYSC_HAS_SIDLEMODE |
+			   SYSC_HAS_SOFTRESET | SYSS_HAS_RESET_STATUS),
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART |
+			   SIDLE_SMART_WKUP),
+	.sysc_fields	= &omap_hwmod_sysc_type1,
+};
+
+static struct omap_hwmod_class dra7xx_wd_timer_hwmod_class = {
+	.name		= "wd_timer",
+	.sysc		= &dra7xx_wd_timer_sysc,
+	.pre_shutdown	= &omap2_wd_timer_disable,
+	.reset		= &omap2_wd_timer_reset,
+};
+
+/* wd_timer2 */
+static struct omap_hwmod dra7xx_wd_timer2_hwmod = {
+	.name		= "wd_timer2",
+	.class		= &dra7xx_wd_timer_hwmod_class,
+	.clkdm_name	= "wkupaon_clkdm",
+	.main_clk	= "sys_32k_ck",
+	.prcm = {
+		.omap4 = {
+			.clkctrl_offs = DRA7XX_CM_WKUPAON_WD_TIMER2_CLKCTRL_OFFSET,
+			.context_offs = DRA7XX_RM_WKUPAON_WD_TIMER2_CONTEXT_OFFSET,
+			.modulemode   = MODULEMODE_SWCTRL,
+		},
+	},
+};
+
+
+/*
+ * Interfaces
+ */
+
+/* l3_main_2 -> l3_instr */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_2__l3_instr = {
+	.master		= &dra7xx_l3_main_2_hwmod,
+	.slave		= &dra7xx_l3_instr_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_cfg -> l3_main_1 */
+static struct omap_hwmod_ocp_if dra7xx_l4_cfg__l3_main_1 = {
+	.master		= &dra7xx_l4_cfg_hwmod,
+	.slave		= &dra7xx_l3_main_1_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* mpu -> l3_main_1 */
+static struct omap_hwmod_ocp_if dra7xx_mpu__l3_main_1 = {
+	.master		= &dra7xx_mpu_hwmod,
+	.slave		= &dra7xx_l3_main_1_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU,
+};
+
+/* l3_main_1 -> l3_main_2 */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_1__l3_main_2 = {
+	.master		= &dra7xx_l3_main_1_hwmod,
+	.slave		= &dra7xx_l3_main_2_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU,
+};
+
+/* l4_cfg -> l3_main_2 */
+static struct omap_hwmod_ocp_if dra7xx_l4_cfg__l3_main_2 = {
+	.master		= &dra7xx_l4_cfg_hwmod,
+	.slave		= &dra7xx_l3_main_2_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l3_main_1 -> l4_cfg */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_1__l4_cfg = {
+	.master		= &dra7xx_l3_main_1_hwmod,
+	.slave		= &dra7xx_l4_cfg_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l3_main_1 -> l4_per1 */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_1__l4_per1 = {
+	.master		= &dra7xx_l3_main_1_hwmod,
+	.slave		= &dra7xx_l4_per1_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l3_main_1 -> l4_per2 */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_1__l4_per2 = {
+	.master		= &dra7xx_l3_main_1_hwmod,
+	.slave		= &dra7xx_l4_per2_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l3_main_1 -> l4_per3 */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_1__l4_per3 = {
+	.master		= &dra7xx_l3_main_1_hwmod,
+	.slave		= &dra7xx_l4_per3_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l3_main_1 -> l4_wkup */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_1__l4_wkup = {
+	.master		= &dra7xx_l3_main_1_hwmod,
+	.slave		= &dra7xx_l4_wkup_hwmod,
+	.clk		= "wkupaon_iclk_mux",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per2 -> atl */
+static struct omap_hwmod_ocp_if dra7xx_l4_per2__atl = {
+	.master		= &dra7xx_l4_per2_hwmod,
+	.slave		= &dra7xx_atl_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l3_main_1 -> bb2d */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_1__bb2d = {
+	.master		= &dra7xx_l3_main_1_hwmod,
+	.slave		= &dra7xx_bb2d_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_wkup -> counter_32k */
+static struct omap_hwmod_ocp_if dra7xx_l4_wkup__counter_32k = {
+	.master		= &dra7xx_l4_wkup_hwmod,
+	.slave		= &dra7xx_counter_32k_hwmod,
+	.clk		= "wkupaon_iclk_mux",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_wkup -> ctrl_module_wkup */
+static struct omap_hwmod_ocp_if dra7xx_l4_wkup__ctrl_module_wkup = {
+	.master		= &dra7xx_l4_wkup_hwmod,
+	.slave		= &dra7xx_ctrl_module_wkup_hwmod,
+	.clk		= "wkupaon_iclk_mux",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_wkup -> dcan1 */
+static struct omap_hwmod_ocp_if dra7xx_l4_wkup__dcan1 = {
+	.master		= &dra7xx_l4_wkup_hwmod,
+	.slave		= &dra7xx_dcan1_hwmod,
+	.clk		= "wkupaon_iclk_mux",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per2 -> dcan2 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per2__dcan2 = {
+	.master		= &dra7xx_l4_per2_hwmod,
+	.slave		= &dra7xx_dcan2_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_addr_space dra7xx_dma_system_addrs[] = {
+	{
+		.pa_start	= 0x4a056000,
+		.pa_end		= 0x4a056fff,
+		.flags		= ADDR_TYPE_RT
+	},
+	{ }
+};
+
+/* l4_cfg -> dma_system */
+static struct omap_hwmod_ocp_if dra7xx_l4_cfg__dma_system = {
+	.master		= &dra7xx_l4_cfg_hwmod,
+	.slave		= &dra7xx_dma_system_hwmod,
+	.clk		= "l3_iclk_div",
+	.addr		= dra7xx_dma_system_addrs,
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_addr_space dra7xx_dss_addrs[] = {
+	{
+		.name		= "family",
+		.pa_start	= 0x58000000,
+		.pa_end		= 0x5800007f,
+		.flags		= ADDR_TYPE_RT
+	},
+};
+
+/* l3_main_1 -> dss */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_1__dss = {
+	.master		= &dra7xx_l3_main_1_hwmod,
+	.slave		= &dra7xx_dss_hwmod,
+	.clk		= "l3_iclk_div",
+	.addr		= dra7xx_dss_addrs,
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_addr_space dra7xx_dss_dispc_addrs[] = {
+	{
+		.name		= "dispc",
+		.pa_start	= 0x58001000,
+		.pa_end		= 0x58001fff,
+		.flags		= ADDR_TYPE_RT
+	},
+};
+
+/* l3_main_1 -> dispc */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_1__dispc = {
+	.master		= &dra7xx_l3_main_1_hwmod,
+	.slave		= &dra7xx_dss_dispc_hwmod,
+	.clk		= "l3_iclk_div",
+	.addr		= dra7xx_dss_dispc_addrs,
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_addr_space dra7xx_dss_hdmi_addrs[] = {
+	{
+		.name		= "hdmi_wp",
+		.pa_start	= 0x58040000,
+		.pa_end		= 0x580400ff,
+		.flags		= ADDR_TYPE_RT
+	},
+	{ }
+};
+
+/* l3_main_1 -> dispc */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_1__hdmi = {
+	.master		= &dra7xx_l3_main_1_hwmod,
+	.slave		= &dra7xx_dss_hdmi_hwmod,
+	.clk		= "l3_iclk_div",
+	.addr		= dra7xx_dss_hdmi_addrs,
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_addr_space dra7xx_elm_addrs[] = {
+	{
+		.pa_start	= 0x48078000,
+		.pa_end		= 0x48078fff,
+		.flags		= ADDR_TYPE_RT
+	},
+	{ }
+};
+
+/* l4_per1 -> elm */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__elm = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_elm_hwmod,
+	.clk		= "l3_iclk_div",
+	.addr		= dra7xx_elm_addrs,
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_wkup -> gpio1 */
+static struct omap_hwmod_ocp_if dra7xx_l4_wkup__gpio1 = {
+	.master		= &dra7xx_l4_wkup_hwmod,
+	.slave		= &dra7xx_gpio1_hwmod,
+	.clk		= "wkupaon_iclk_mux",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> gpio2 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__gpio2 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_gpio2_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> gpio3 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__gpio3 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_gpio3_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> gpio4 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__gpio4 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_gpio4_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> gpio5 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__gpio5 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_gpio5_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> gpio6 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__gpio6 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_gpio6_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> gpio7 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__gpio7 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_gpio7_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> gpio8 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__gpio8 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_gpio8_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_addr_space dra7xx_gpmc_addrs[] = {
+	{
+		.pa_start	= 0x50000000,
+		.pa_end		= 0x500003ff,
+		.flags		= ADDR_TYPE_RT
+	},
+	{ }
+};
+
+/* l3_main_1 -> gpmc */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_1__gpmc = {
+	.master		= &dra7xx_l3_main_1_hwmod,
+	.slave		= &dra7xx_gpmc_hwmod,
+	.clk		= "l3_iclk_div",
+	.addr		= dra7xx_gpmc_addrs,
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_addr_space dra7xx_hdq1w_addrs[] = {
+	{
+		.pa_start	= 0x480b2000,
+		.pa_end		= 0x480b201f,
+		.flags		= ADDR_TYPE_RT
+	},
+	{ }
+};
+
+/* l4_per1 -> hdq1w */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__hdq1w = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_hdq1w_hwmod,
+	.clk		= "l3_iclk_div",
+	.addr		= dra7xx_hdq1w_addrs,
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> i2c1 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__i2c1 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_i2c1_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> i2c2 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__i2c2 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_i2c2_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> i2c3 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__i2c3 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_i2c3_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> i2c4 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__i2c4 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_i2c4_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> i2c5 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__i2c5 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_i2c5_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> mcspi1 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__mcspi1 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_mcspi1_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> mcspi2 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__mcspi2 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_mcspi2_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> mcspi3 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__mcspi3 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_mcspi3_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> mcspi4 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__mcspi4 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_mcspi4_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> mmc1 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__mmc1 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_mmc1_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> mmc2 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__mmc2 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_mmc2_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> mmc3 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__mmc3 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_mmc3_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> mmc4 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__mmc4 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_mmc4_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_cfg -> mpu */
+static struct omap_hwmod_ocp_if dra7xx_l4_cfg__mpu = {
+	.master		= &dra7xx_l4_cfg_hwmod,
+	.slave		= &dra7xx_mpu_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_addr_space dra7xx_ocp2scp1_addrs[] = {
+	{
+		.pa_start	= 0x4a080000,
+		.pa_end		= 0x4a08001f,
+		.flags		= ADDR_TYPE_RT
+	},
+	{ }
+};
+
+/* l4_cfg -> ocp2scp1 */
+static struct omap_hwmod_ocp_if dra7xx_l4_cfg__ocp2scp1 = {
+	.master		= &dra7xx_l4_cfg_hwmod,
+	.slave		= &dra7xx_ocp2scp1_hwmod,
+	.clk		= "l4_root_clk_div",
+	.addr		= dra7xx_ocp2scp1_addrs,
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_addr_space dra7xx_qspi_addrs[] = {
+	{
+		.pa_start	= 0x4b300000,
+		.pa_end		= 0x4b30007f,
+		.flags		= ADDR_TYPE_RT
+	},
+	{ }
+};
+
+/* l3_main_1 -> qspi */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_1__qspi = {
+	.master		= &dra7xx_l3_main_1_hwmod,
+	.slave		= &dra7xx_qspi_hwmod,
+	.clk		= "l3_iclk_div",
+	.addr		= dra7xx_qspi_addrs,
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_addr_space dra7xx_sata_addrs[] = {
+	{
+		.name		= "sysc",
+		.pa_start	= 0x4a141100,
+		.pa_end		= 0x4a141107,
+		.flags		= ADDR_TYPE_RT
+	},
+	{ }
+};
+
+/* l4_cfg -> sata */
+static struct omap_hwmod_ocp_if dra7xx_l4_cfg__sata = {
+	.master		= &dra7xx_l4_cfg_hwmod,
+	.slave		= &dra7xx_sata_hwmod,
+	.clk		= "l3_iclk_div",
+	.addr		= dra7xx_sata_addrs,
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_addr_space dra7xx_smartreflex_core_addrs[] = {
+	{
+		.pa_start	= 0x4a0dd000,
+		.pa_end		= 0x4a0dd07f,
+		.flags		= ADDR_TYPE_RT
+	},
+	{ }
+};
+
+/* l4_cfg -> smartreflex_core */
+static struct omap_hwmod_ocp_if dra7xx_l4_cfg__smartreflex_core = {
+	.master		= &dra7xx_l4_cfg_hwmod,
+	.slave		= &dra7xx_smartreflex_core_hwmod,
+	.clk		= "l4_root_clk_div",
+	.addr		= dra7xx_smartreflex_core_addrs,
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_addr_space dra7xx_smartreflex_mpu_addrs[] = {
+	{
+		.pa_start	= 0x4a0d9000,
+		.pa_end		= 0x4a0d907f,
+		.flags		= ADDR_TYPE_RT
+	},
+	{ }
+};
+
+/* l4_cfg -> smartreflex_mpu */
+static struct omap_hwmod_ocp_if dra7xx_l4_cfg__smartreflex_mpu = {
+	.master		= &dra7xx_l4_cfg_hwmod,
+	.slave		= &dra7xx_smartreflex_mpu_hwmod,
+	.clk		= "l4_root_clk_div",
+	.addr		= dra7xx_smartreflex_mpu_addrs,
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_addr_space dra7xx_spinlock_addrs[] = {
+	{
+		.pa_start	= 0x4a0f6000,
+		.pa_end		= 0x4a0f6fff,
+		.flags		= ADDR_TYPE_RT
+	},
+	{ }
+};
+
+/* l4_cfg -> spinlock */
+static struct omap_hwmod_ocp_if dra7xx_l4_cfg__spinlock = {
+	.master		= &dra7xx_l4_cfg_hwmod,
+	.slave		= &dra7xx_spinlock_hwmod,
+	.clk		= "l3_iclk_div",
+	.addr		= dra7xx_spinlock_addrs,
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_wkup -> timer1 */
+static struct omap_hwmod_ocp_if dra7xx_l4_wkup__timer1 = {
+	.master		= &dra7xx_l4_wkup_hwmod,
+	.slave		= &dra7xx_timer1_hwmod,
+	.clk		= "wkupaon_iclk_mux",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> timer2 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__timer2 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_timer2_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> timer3 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__timer3 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_timer3_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> timer4 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__timer4 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_timer4_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per3 -> timer5 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per3__timer5 = {
+	.master		= &dra7xx_l4_per3_hwmod,
+	.slave		= &dra7xx_timer5_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per3 -> timer6 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per3__timer6 = {
+	.master		= &dra7xx_l4_per3_hwmod,
+	.slave		= &dra7xx_timer6_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per3 -> timer7 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per3__timer7 = {
+	.master		= &dra7xx_l4_per3_hwmod,
+	.slave		= &dra7xx_timer7_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per3 -> timer8 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per3__timer8 = {
+	.master		= &dra7xx_l4_per3_hwmod,
+	.slave		= &dra7xx_timer8_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> timer9 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__timer9 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_timer9_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> timer10 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__timer10 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_timer10_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> timer11 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__timer11 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_timer11_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> uart1 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__uart1 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_uart1_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> uart2 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__uart2 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_uart2_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> uart3 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__uart3 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_uart3_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> uart4 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__uart4 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_uart4_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> uart5 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__uart5 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_uart5_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per1 -> uart6 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per1__uart6 = {
+	.master		= &dra7xx_l4_per1_hwmod,
+	.slave		= &dra7xx_uart6_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per3 -> usb_otg_ss1 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per3__usb_otg_ss1 = {
+	.master		= &dra7xx_l4_per3_hwmod,
+	.slave		= &dra7xx_usb_otg_ss1_hwmod,
+	.clk		= "dpll_core_h13x2_ck",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per3 -> usb_otg_ss2 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per3__usb_otg_ss2 = {
+	.master		= &dra7xx_l4_per3_hwmod,
+	.slave		= &dra7xx_usb_otg_ss2_hwmod,
+	.clk		= "dpll_core_h13x2_ck",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per3 -> usb_otg_ss3 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per3__usb_otg_ss3 = {
+	.master		= &dra7xx_l4_per3_hwmod,
+	.slave		= &dra7xx_usb_otg_ss3_hwmod,
+	.clk		= "dpll_core_h13x2_ck",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per3 -> usb_otg_ss4 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per3__usb_otg_ss4 = {
+	.master		= &dra7xx_l4_per3_hwmod,
+	.slave		= &dra7xx_usb_otg_ss4_hwmod,
+	.clk		= "dpll_core_h13x2_ck",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l3_main_1 -> vcp1 */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_1__vcp1 = {
+	.master		= &dra7xx_l3_main_1_hwmod,
+	.slave		= &dra7xx_vcp1_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per2 -> vcp1 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per2__vcp1 = {
+	.master		= &dra7xx_l4_per2_hwmod,
+	.slave		= &dra7xx_vcp1_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l3_main_1 -> vcp2 */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_1__vcp2 = {
+	.master		= &dra7xx_l3_main_1_hwmod,
+	.slave		= &dra7xx_vcp2_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_per2 -> vcp2 */
+static struct omap_hwmod_ocp_if dra7xx_l4_per2__vcp2 = {
+	.master		= &dra7xx_l4_per2_hwmod,
+	.slave		= &dra7xx_vcp2_hwmod,
+	.clk		= "l3_iclk_div",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+/* l4_wkup -> wd_timer2 */
+static struct omap_hwmod_ocp_if dra7xx_l4_wkup__wd_timer2 = {
+	.master		= &dra7xx_l4_wkup_hwmod,
+	.slave		= &dra7xx_wd_timer2_hwmod,
+	.clk		= "wkupaon_iclk_mux",
+	.user		= OCP_USER_MPU | OCP_USER_SDMA,
+};
+
+static struct omap_hwmod_ocp_if *dra7xx_hwmod_ocp_ifs[] __initdata = {
+	&dra7xx_l3_main_2__l3_instr,
+	&dra7xx_l4_cfg__l3_main_1,
+	&dra7xx_mpu__l3_main_1,
+	&dra7xx_l3_main_1__l3_main_2,
+	&dra7xx_l4_cfg__l3_main_2,
+	&dra7xx_l3_main_1__l4_cfg,
+	&dra7xx_l3_main_1__l4_per1,
+	&dra7xx_l3_main_1__l4_per2,
+	&dra7xx_l3_main_1__l4_per3,
+	&dra7xx_l3_main_1__l4_wkup,
+	&dra7xx_l4_per2__atl,
+	&dra7xx_l3_main_1__bb2d,
+	&dra7xx_l4_wkup__counter_32k,
+	&dra7xx_l4_wkup__ctrl_module_wkup,
+	&dra7xx_l4_wkup__dcan1,
+	&dra7xx_l4_per2__dcan2,
+	&dra7xx_l4_cfg__dma_system,
+	&dra7xx_l3_main_1__dss,
+	&dra7xx_l3_main_1__dispc,
+	&dra7xx_l3_main_1__hdmi,
+	&dra7xx_l4_per1__elm,
+	&dra7xx_l4_wkup__gpio1,
+	&dra7xx_l4_per1__gpio2,
+	&dra7xx_l4_per1__gpio3,
+	&dra7xx_l4_per1__gpio4,
+	&dra7xx_l4_per1__gpio5,
+	&dra7xx_l4_per1__gpio6,
+	&dra7xx_l4_per1__gpio7,
+	&dra7xx_l4_per1__gpio8,
+	&dra7xx_l3_main_1__gpmc,
+	&dra7xx_l4_per1__hdq1w,
+	&dra7xx_l4_per1__i2c1,
+	&dra7xx_l4_per1__i2c2,
+	&dra7xx_l4_per1__i2c3,
+	&dra7xx_l4_per1__i2c4,
+	&dra7xx_l4_per1__i2c5,
+	&dra7xx_l4_per1__mcspi1,
+	&dra7xx_l4_per1__mcspi2,
+	&dra7xx_l4_per1__mcspi3,
+	&dra7xx_l4_per1__mcspi4,
+	&dra7xx_l4_per1__mmc1,
+	&dra7xx_l4_per1__mmc2,
+	&dra7xx_l4_per1__mmc3,
+	&dra7xx_l4_per1__mmc4,
+	&dra7xx_l4_cfg__mpu,
+	&dra7xx_l4_cfg__ocp2scp1,
+	&dra7xx_l3_main_1__qspi,
+	&dra7xx_l4_cfg__sata,
+	&dra7xx_l4_cfg__smartreflex_core,
+	&dra7xx_l4_cfg__smartreflex_mpu,
+	&dra7xx_l4_cfg__spinlock,
+	&dra7xx_l4_wkup__timer1,
+	&dra7xx_l4_per1__timer2,
+	&dra7xx_l4_per1__timer3,
+	&dra7xx_l4_per1__timer4,
+	&dra7xx_l4_per3__timer5,
+	&dra7xx_l4_per3__timer6,
+	&dra7xx_l4_per3__timer7,
+	&dra7xx_l4_per3__timer8,
+	&dra7xx_l4_per1__timer9,
+	&dra7xx_l4_per1__timer10,
+	&dra7xx_l4_per1__timer11,
+	&dra7xx_l4_per1__uart1,
+	&dra7xx_l4_per1__uart2,
+	&dra7xx_l4_per1__uart3,
+	&dra7xx_l4_per1__uart4,
+	&dra7xx_l4_per1__uart5,
+	&dra7xx_l4_per1__uart6,
+	&dra7xx_l4_per3__usb_otg_ss1,
+	&dra7xx_l4_per3__usb_otg_ss2,
+	&dra7xx_l4_per3__usb_otg_ss3,
+	&dra7xx_l4_per3__usb_otg_ss4,
+	&dra7xx_l3_main_1__vcp1,
+	&dra7xx_l4_per2__vcp1,
+	&dra7xx_l3_main_1__vcp2,
+	&dra7xx_l4_per2__vcp2,
+	&dra7xx_l4_wkup__wd_timer2,
+	NULL,
+};
+
+int __init dra7xx_hwmod_init(void)
+{
+	omap_hwmod_init();
+	return omap_hwmod_register_links(dra7xx_hwmod_ocp_ifs);
+}
diff --git a/arch/arm/mach-omap2/powerdomain.h b/arch/arm/mach-omap2/powerdomain.h
index e4d7bd6..baf3d8b 100644
--- a/arch/arm/mach-omap2/powerdomain.h
+++ b/arch/arm/mach-omap2/powerdomain.h
@@ -256,6 +256,7 @@
 extern void am33xx_powerdomains_init(void);
 extern void omap44xx_powerdomains_init(void);
 extern void omap54xx_powerdomains_init(void);
+extern void dra7xx_powerdomains_init(void);
 
 extern struct pwrdm_ops omap2_pwrdm_operations;
 extern struct pwrdm_ops omap3_pwrdm_operations;
diff --git a/arch/arm/mach-omap2/powerdomains3xxx_data.c b/arch/arm/mach-omap2/powerdomains3xxx_data.c
index e2d4bd8..328c103 100644
--- a/arch/arm/mach-omap2/powerdomains3xxx_data.c
+++ b/arch/arm/mach-omap2/powerdomains3xxx_data.c
@@ -336,6 +336,13 @@
 	.voltdm		  = { .name = "core" },
 };
 
+static struct powerdomain alwon_81xx_pwrdm = {
+	.name		  = "alwon_pwrdm",
+	.prcm_offs	  = TI81XX_PRM_ALWON_MOD,
+	.pwrsts		  = PWRSTS_OFF_ON,
+	.voltdm		  = { .name = "core" },
+};
+
 static struct powerdomain device_81xx_pwrdm = {
 	.name		  = "device_pwrdm",
 	.prcm_offs	  = TI81XX_PRM_DEVICE_MOD,
@@ -442,6 +449,7 @@
 };
 
 static struct powerdomain *powerdomains_ti81xx[] __initdata = {
+	&alwon_81xx_pwrdm,
 	&device_81xx_pwrdm,
 	&active_816x_pwrdm,
 	&default_816x_pwrdm,
diff --git a/arch/arm/mach-omap2/powerdomains7xx_data.c b/arch/arm/mach-omap2/powerdomains7xx_data.c
new file mode 100644
index 0000000..48151d1
--- /dev/null
+++ b/arch/arm/mach-omap2/powerdomains7xx_data.c
@@ -0,0 +1,454 @@
+/*
+ * DRA7xx Power domains framework
+ *
+ * Copyright (C) 2009-2013 Texas Instruments, Inc.
+ * Copyright (C) 2009-2011 Nokia Corporation
+ *
+ * Generated by code originally written by:
+ * Abhijit Pagare (abhijitpagare@ti.com)
+ * Benoit Cousson (b-cousson@ti.com)
+ * Paul Walmsley (paul@pwsan.com)
+ *
+ * This file is automatically generated from the OMAP hardware databases.
+ * We respectfully ask that any modifications to this file be coordinated
+ * with the public linux-omap@vger.kernel.org mailing list and the
+ * authors above to ensure that the autogeneration scripts are kept
+ * up-to-date with the file contents.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+
+#include "powerdomain.h"
+
+#include "prcm-common.h"
+#include "prcm44xx.h"
+#include "prm7xx.h"
+#include "prcm_mpu7xx.h"
+
+/* iva_7xx_pwrdm: IVA-HD power domain */
+static struct powerdomain iva_7xx_pwrdm = {
+	.name		  = "iva_pwrdm",
+	.prcm_offs	  = DRA7XX_PRM_IVA_INST,
+	.prcm_partition	  = DRA7XX_PRM_PARTITION,
+	.pwrsts		  = PWRSTS_OFF_RET_ON,
+	.pwrsts_logic_ret = PWRSTS_OFF,
+	.banks		  = 4,
+	.pwrsts_mem_ret	= {
+		[0] = PWRSTS_OFF_RET,	/* hwa_mem */
+		[1] = PWRSTS_OFF_RET,	/* sl2_mem */
+		[2] = PWRSTS_OFF_RET,	/* tcm1_mem */
+		[3] = PWRSTS_OFF_RET,	/* tcm2_mem */
+	},
+	.pwrsts_mem_on	= {
+		[0] = PWRSTS_OFF_RET,	/* hwa_mem */
+		[1] = PWRSTS_OFF_RET,	/* sl2_mem */
+		[2] = PWRSTS_OFF_RET,	/* tcm1_mem */
+		[3] = PWRSTS_OFF_RET,	/* tcm2_mem */
+	},
+	.flags		  = PWRDM_HAS_LOWPOWERSTATECHANGE,
+};
+
+/* rtc_7xx_pwrdm:  */
+static struct powerdomain rtc_7xx_pwrdm = {
+	.name		  = "rtc_pwrdm",
+	.prcm_offs	  = DRA7XX_PRM_RTC_INST,
+	.prcm_partition	  = DRA7XX_PRM_PARTITION,
+	.pwrsts		  = PWRSTS_ON,
+};
+
+/* custefuse_7xx_pwrdm: Customer efuse controller power domain */
+static struct powerdomain custefuse_7xx_pwrdm = {
+	.name		  = "custefuse_pwrdm",
+	.prcm_offs	  = DRA7XX_PRM_CUSTEFUSE_INST,
+	.prcm_partition	  = DRA7XX_PRM_PARTITION,
+	.pwrsts		  = PWRSTS_OFF_ON,
+	.flags		  = PWRDM_HAS_LOWPOWERSTATECHANGE,
+};
+
+/* ipu_7xx_pwrdm: Audio back end power domain */
+static struct powerdomain ipu_7xx_pwrdm = {
+	.name		  = "ipu_pwrdm",
+	.prcm_offs	  = DRA7XX_PRM_IPU_INST,
+	.prcm_partition	  = DRA7XX_PRM_PARTITION,
+	.pwrsts		  = PWRSTS_OFF_RET_ON,
+	.pwrsts_logic_ret = PWRSTS_OFF,
+	.banks		  = 2,
+	.pwrsts_mem_ret	= {
+		[0] = PWRSTS_OFF_RET,	/* aessmem */
+		[1] = PWRSTS_OFF_RET,	/* periphmem */
+	},
+	.pwrsts_mem_on	= {
+		[0] = PWRSTS_OFF_RET,	/* aessmem */
+		[1] = PWRSTS_OFF_RET,	/* periphmem */
+	},
+	.flags		  = PWRDM_HAS_LOWPOWERSTATECHANGE,
+};
+
+/* dss_7xx_pwrdm: Display subsystem power domain */
+static struct powerdomain dss_7xx_pwrdm = {
+	.name		  = "dss_pwrdm",
+	.prcm_offs	  = DRA7XX_PRM_DSS_INST,
+	.prcm_partition	  = DRA7XX_PRM_PARTITION,
+	.pwrsts		  = PWRSTS_OFF_RET_ON,
+	.pwrsts_logic_ret = PWRSTS_OFF,
+	.banks		  = 1,
+	.pwrsts_mem_ret	= {
+		[0] = PWRSTS_OFF_RET,	/* dss_mem */
+	},
+	.pwrsts_mem_on	= {
+		[0] = PWRSTS_OFF_RET,	/* dss_mem */
+	},
+	.flags		  = PWRDM_HAS_LOWPOWERSTATECHANGE,
+};
+
+/* l4per_7xx_pwrdm: Target peripherals power domain */
+static struct powerdomain l4per_7xx_pwrdm = {
+	.name		  = "l4per_pwrdm",
+	.prcm_offs	  = DRA7XX_PRM_L4PER_INST,
+	.prcm_partition	  = DRA7XX_PRM_PARTITION,
+	.pwrsts		  = PWRSTS_RET_ON,
+	.pwrsts_logic_ret = PWRSTS_OFF_RET,
+	.banks		  = 2,
+	.pwrsts_mem_ret	= {
+		[0] = PWRSTS_OFF_RET,	/* nonretained_bank */
+		[1] = PWRSTS_OFF_RET,	/* retained_bank */
+	},
+	.pwrsts_mem_on	= {
+		[0] = PWRSTS_OFF_RET,	/* nonretained_bank */
+		[1] = PWRSTS_OFF_RET,	/* retained_bank */
+	},
+	.flags		  = PWRDM_HAS_LOWPOWERSTATECHANGE,
+};
+
+/* gpu_7xx_pwrdm: 3D accelerator power domain */
+static struct powerdomain gpu_7xx_pwrdm = {
+	.name		  = "gpu_pwrdm",
+	.prcm_offs	  = DRA7XX_PRM_GPU_INST,
+	.prcm_partition	  = DRA7XX_PRM_PARTITION,
+	.pwrsts		  = PWRSTS_OFF_ON,
+	.banks		  = 1,
+	.pwrsts_mem_ret	= {
+		[0] = PWRSTS_OFF_RET,	/* gpu_mem */
+	},
+	.pwrsts_mem_on	= {
+		[0] = PWRSTS_OFF_RET,	/* gpu_mem */
+	},
+	.flags		  = PWRDM_HAS_LOWPOWERSTATECHANGE,
+};
+
+/* wkupaon_7xx_pwrdm: Wake-up power domain */
+static struct powerdomain wkupaon_7xx_pwrdm = {
+	.name		  = "wkupaon_pwrdm",
+	.prcm_offs	  = DRA7XX_PRM_WKUPAON_INST,
+	.prcm_partition	  = DRA7XX_PRM_PARTITION,
+	.pwrsts		  = PWRSTS_ON,
+	.banks		  = 1,
+	.pwrsts_mem_ret	= {
+	},
+	.pwrsts_mem_on	= {
+		[0] = PWRSTS_ON,	/* wkup_bank */
+	},
+};
+
+/* core_7xx_pwrdm: CORE power domain */
+static struct powerdomain core_7xx_pwrdm = {
+	.name		  = "core_pwrdm",
+	.prcm_offs	  = DRA7XX_PRM_CORE_INST,
+	.prcm_partition	  = DRA7XX_PRM_PARTITION,
+	.pwrsts		  = PWRSTS_RET_ON,
+	.pwrsts_logic_ret = PWRSTS_OFF_RET,
+	.banks		  = 5,
+	.pwrsts_mem_ret	= {
+		[0] = PWRSTS_OFF_RET,	/* core_nret_bank */
+		[1] = PWRSTS_OFF_RET,	/* core_ocmram */
+		[2] = PWRSTS_OFF_RET,	/* core_other_bank */
+		[3] = PWRSTS_OFF_RET,	/* ipu_l2ram */
+		[4] = PWRSTS_OFF_RET,	/* ipu_unicache */
+	},
+	.pwrsts_mem_on	= {
+		[0] = PWRSTS_OFF_RET,	/* core_nret_bank */
+		[1] = PWRSTS_OFF_RET,	/* core_ocmram */
+		[2] = PWRSTS_OFF_RET,	/* core_other_bank */
+		[3] = PWRSTS_OFF_RET,	/* ipu_l2ram */
+		[4] = PWRSTS_OFF_RET,	/* ipu_unicache */
+	},
+	.flags		  = PWRDM_HAS_LOWPOWERSTATECHANGE,
+};
+
+/* coreaon_7xx_pwrdm: Always ON logic that sits in VDD_CORE voltage domain */
+static struct powerdomain coreaon_7xx_pwrdm = {
+	.name		  = "coreaon_pwrdm",
+	.prcm_offs	  = DRA7XX_PRM_COREAON_INST,
+	.prcm_partition	  = DRA7XX_PRM_PARTITION,
+	.pwrsts		  = PWRSTS_ON,
+};
+
+/* cpu0_7xx_pwrdm: MPU0 processor and Neon coprocessor power domain */
+static struct powerdomain cpu0_7xx_pwrdm = {
+	.name		  = "cpu0_pwrdm",
+	.prcm_offs	  = DRA7XX_MPU_PRCM_PRM_C0_INST,
+	.prcm_partition	  = DRA7XX_MPU_PRCM_PARTITION,
+	.pwrsts		  = PWRSTS_OFF_RET_ON,
+	.pwrsts_logic_ret = PWRSTS_OFF_RET,
+	.banks		  = 1,
+	.pwrsts_mem_ret	= {
+		[0] = PWRSTS_OFF_RET,	/* cpu0_l1 */
+	},
+	.pwrsts_mem_on	= {
+		[0] = PWRSTS_ON,	/* cpu0_l1 */
+	},
+};
+
+/* cpu1_7xx_pwrdm: MPU1 processor and Neon coprocessor power domain */
+static struct powerdomain cpu1_7xx_pwrdm = {
+	.name		  = "cpu1_pwrdm",
+	.prcm_offs	  = DRA7XX_MPU_PRCM_PRM_C1_INST,
+	.prcm_partition	  = DRA7XX_MPU_PRCM_PARTITION,
+	.pwrsts		  = PWRSTS_OFF_RET_ON,
+	.pwrsts_logic_ret = PWRSTS_OFF_RET,
+	.banks		  = 1,
+	.pwrsts_mem_ret	= {
+		[0] = PWRSTS_OFF_RET,	/* cpu1_l1 */
+	},
+	.pwrsts_mem_on	= {
+		[0] = PWRSTS_ON,	/* cpu1_l1 */
+	},
+};
+
+/* vpe_7xx_pwrdm:  */
+static struct powerdomain vpe_7xx_pwrdm = {
+	.name		  = "vpe_pwrdm",
+	.prcm_offs	  = DRA7XX_PRM_VPE_INST,
+	.prcm_partition	  = DRA7XX_PRM_PARTITION,
+	.pwrsts		  = PWRSTS_OFF_RET_ON,
+	.pwrsts_logic_ret = PWRSTS_OFF_RET,
+	.banks		  = 1,
+	.pwrsts_mem_ret	= {
+		[0] = PWRSTS_OFF_RET,	/* vpe_bank */
+	},
+	.pwrsts_mem_on	= {
+		[0] = PWRSTS_OFF_RET,	/* vpe_bank */
+	},
+	.flags		  = PWRDM_HAS_LOWPOWERSTATECHANGE,
+};
+
+/* mpu_7xx_pwrdm: Modena processor and the Neon coprocessor power domain */
+static struct powerdomain mpu_7xx_pwrdm = {
+	.name		  = "mpu_pwrdm",
+	.prcm_offs	  = DRA7XX_PRM_MPU_INST,
+	.prcm_partition	  = DRA7XX_PRM_PARTITION,
+	.pwrsts		  = PWRSTS_RET_ON,
+	.pwrsts_logic_ret = PWRSTS_OFF_RET,
+	.banks		  = 2,
+	.pwrsts_mem_ret	= {
+		[0] = PWRSTS_OFF_RET,	/* mpu_l2 */
+		[1] = PWRSTS_RET,	/* mpu_ram */
+	},
+	.pwrsts_mem_on	= {
+		[0] = PWRSTS_OFF_RET,	/* mpu_l2 */
+		[1] = PWRSTS_OFF_RET,	/* mpu_ram */
+	},
+};
+
+/* l3init_7xx_pwrdm: L3 initators pheripherals power domain  */
+static struct powerdomain l3init_7xx_pwrdm = {
+	.name		  = "l3init_pwrdm",
+	.prcm_offs	  = DRA7XX_PRM_L3INIT_INST,
+	.prcm_partition	  = DRA7XX_PRM_PARTITION,
+	.pwrsts		  = PWRSTS_RET_ON,
+	.pwrsts_logic_ret = PWRSTS_OFF_RET,
+	.banks		  = 3,
+	.pwrsts_mem_ret	= {
+		[0] = PWRSTS_OFF_RET,	/* gmac_bank */
+		[1] = PWRSTS_OFF_RET,	/* l3init_bank1 */
+		[2] = PWRSTS_OFF_RET,	/* l3init_bank2 */
+	},
+	.pwrsts_mem_on	= {
+		[0] = PWRSTS_OFF_RET,	/* gmac_bank */
+		[1] = PWRSTS_OFF_RET,	/* l3init_bank1 */
+		[2] = PWRSTS_OFF_RET,	/* l3init_bank2 */
+	},
+	.flags		  = PWRDM_HAS_LOWPOWERSTATECHANGE,
+};
+
+/* eve3_7xx_pwrdm:  */
+static struct powerdomain eve3_7xx_pwrdm = {
+	.name		  = "eve3_pwrdm",
+	.prcm_offs	  = DRA7XX_PRM_EVE3_INST,
+	.prcm_partition	  = DRA7XX_PRM_PARTITION,
+	.pwrsts		  = PWRSTS_OFF_ON,
+	.banks		  = 1,
+	.pwrsts_mem_ret	= {
+		[0] = PWRSTS_OFF_RET,	/* eve3_bank */
+	},
+	.pwrsts_mem_on	= {
+		[0] = PWRSTS_OFF_RET,	/* eve3_bank */
+	},
+	.flags		  = PWRDM_HAS_LOWPOWERSTATECHANGE,
+};
+
+/* emu_7xx_pwrdm: Emulation power domain */
+static struct powerdomain emu_7xx_pwrdm = {
+	.name		  = "emu_pwrdm",
+	.prcm_offs	  = DRA7XX_PRM_EMU_INST,
+	.prcm_partition	  = DRA7XX_PRM_PARTITION,
+	.pwrsts		  = PWRSTS_OFF_ON,
+	.banks		  = 1,
+	.pwrsts_mem_ret	= {
+		[0] = PWRSTS_OFF_RET,	/* emu_bank */
+	},
+	.pwrsts_mem_on	= {
+		[0] = PWRSTS_OFF_RET,	/* emu_bank */
+	},
+};
+
+/* dsp2_7xx_pwrdm:  */
+static struct powerdomain dsp2_7xx_pwrdm = {
+	.name		  = "dsp2_pwrdm",
+	.prcm_offs	  = DRA7XX_PRM_DSP2_INST,
+	.prcm_partition	  = DRA7XX_PRM_PARTITION,
+	.pwrsts		  = PWRSTS_OFF_ON,
+	.banks		  = 3,
+	.pwrsts_mem_ret	= {
+		[0] = PWRSTS_OFF_RET,	/* dsp2_edma */
+		[1] = PWRSTS_OFF_RET,	/* dsp2_l1 */
+		[2] = PWRSTS_OFF_RET,	/* dsp2_l2 */
+	},
+	.pwrsts_mem_on	= {
+		[0] = PWRSTS_OFF_RET,	/* dsp2_edma */
+		[1] = PWRSTS_OFF_RET,	/* dsp2_l1 */
+		[2] = PWRSTS_OFF_RET,	/* dsp2_l2 */
+	},
+	.flags		  = PWRDM_HAS_LOWPOWERSTATECHANGE,
+};
+
+/* dsp1_7xx_pwrdm: Tesla processor power domain */
+static struct powerdomain dsp1_7xx_pwrdm = {
+	.name		  = "dsp1_pwrdm",
+	.prcm_offs	  = DRA7XX_PRM_DSP1_INST,
+	.prcm_partition	  = DRA7XX_PRM_PARTITION,
+	.pwrsts		  = PWRSTS_OFF_ON,
+	.banks		  = 3,
+	.pwrsts_mem_ret	= {
+		[0] = PWRSTS_OFF_RET,	/* dsp1_edma */
+		[1] = PWRSTS_OFF_RET,	/* dsp1_l1 */
+		[2] = PWRSTS_OFF_RET,	/* dsp1_l2 */
+	},
+	.pwrsts_mem_on	= {
+		[0] = PWRSTS_OFF_RET,	/* dsp1_edma */
+		[1] = PWRSTS_OFF_RET,	/* dsp1_l1 */
+		[2] = PWRSTS_OFF_RET,	/* dsp1_l2 */
+	},
+	.flags		  = PWRDM_HAS_LOWPOWERSTATECHANGE,
+};
+
+/* cam_7xx_pwrdm: Camera subsystem power domain */
+static struct powerdomain cam_7xx_pwrdm = {
+	.name		  = "cam_pwrdm",
+	.prcm_offs	  = DRA7XX_PRM_CAM_INST,
+	.prcm_partition	  = DRA7XX_PRM_PARTITION,
+	.pwrsts		  = PWRSTS_OFF_ON,
+	.banks		  = 1,
+	.pwrsts_mem_ret	= {
+		[0] = PWRSTS_OFF_RET,	/* vip_bank */
+	},
+	.pwrsts_mem_on	= {
+		[0] = PWRSTS_OFF_RET,	/* vip_bank */
+	},
+	.flags		  = PWRDM_HAS_LOWPOWERSTATECHANGE,
+};
+
+/* eve4_7xx_pwrdm:  */
+static struct powerdomain eve4_7xx_pwrdm = {
+	.name		  = "eve4_pwrdm",
+	.prcm_offs	  = DRA7XX_PRM_EVE4_INST,
+	.prcm_partition	  = DRA7XX_PRM_PARTITION,
+	.pwrsts		  = PWRSTS_OFF_ON,
+	.banks		  = 1,
+	.pwrsts_mem_ret	= {
+		[0] = PWRSTS_OFF_RET,	/* eve4_bank */
+	},
+	.pwrsts_mem_on	= {
+		[0] = PWRSTS_OFF_RET,	/* eve4_bank */
+	},
+	.flags		  = PWRDM_HAS_LOWPOWERSTATECHANGE,
+};
+
+/* eve2_7xx_pwrdm:  */
+static struct powerdomain eve2_7xx_pwrdm = {
+	.name		  = "eve2_pwrdm",
+	.prcm_offs	  = DRA7XX_PRM_EVE2_INST,
+	.prcm_partition	  = DRA7XX_PRM_PARTITION,
+	.pwrsts		  = PWRSTS_OFF_ON,
+	.banks		  = 1,
+	.pwrsts_mem_ret	= {
+		[0] = PWRSTS_OFF_RET,	/* eve2_bank */
+	},
+	.pwrsts_mem_on	= {
+		[0] = PWRSTS_OFF_RET,	/* eve2_bank */
+	},
+	.flags		  = PWRDM_HAS_LOWPOWERSTATECHANGE,
+};
+
+/* eve1_7xx_pwrdm:  */
+static struct powerdomain eve1_7xx_pwrdm = {
+	.name		  = "eve1_pwrdm",
+	.prcm_offs	  = DRA7XX_PRM_EVE1_INST,
+	.prcm_partition	  = DRA7XX_PRM_PARTITION,
+	.pwrsts		  = PWRSTS_OFF_ON,
+	.banks		  = 1,
+	.pwrsts_mem_ret	= {
+		[0] = PWRSTS_OFF_RET,	/* eve1_bank */
+	},
+	.pwrsts_mem_on	= {
+		[0] = PWRSTS_OFF_RET,	/* eve1_bank */
+	},
+	.flags		  = PWRDM_HAS_LOWPOWERSTATECHANGE,
+};
+
+/*
+ * The following power domains are not under SW control
+ *
+ * mpuaon
+ * mmaon
+ */
+
+/* As powerdomains are added or removed above, this list must also be changed */
+static struct powerdomain *powerdomains_dra7xx[] __initdata = {
+	&iva_7xx_pwrdm,
+	&rtc_7xx_pwrdm,
+	&custefuse_7xx_pwrdm,
+	&ipu_7xx_pwrdm,
+	&dss_7xx_pwrdm,
+	&l4per_7xx_pwrdm,
+	&gpu_7xx_pwrdm,
+	&wkupaon_7xx_pwrdm,
+	&core_7xx_pwrdm,
+	&coreaon_7xx_pwrdm,
+	&cpu0_7xx_pwrdm,
+	&cpu1_7xx_pwrdm,
+	&vpe_7xx_pwrdm,
+	&mpu_7xx_pwrdm,
+	&l3init_7xx_pwrdm,
+	&eve3_7xx_pwrdm,
+	&emu_7xx_pwrdm,
+	&dsp2_7xx_pwrdm,
+	&dsp1_7xx_pwrdm,
+	&cam_7xx_pwrdm,
+	&eve4_7xx_pwrdm,
+	&eve2_7xx_pwrdm,
+	&eve1_7xx_pwrdm,
+	NULL
+};
+
+void __init dra7xx_powerdomains_init(void)
+{
+	pwrdm_register_platform_funcs(&omap4_pwrdm_operations);
+	pwrdm_register_pwrdms(powerdomains_dra7xx);
+	pwrdm_complete_init();
+}
diff --git a/arch/arm/mach-omap2/prcm-common.h b/arch/arm/mach-omap2/prcm-common.h
index ff1ac4a..0e841fd 100644
--- a/arch/arm/mach-omap2/prcm-common.h
+++ b/arch/arm/mach-omap2/prcm-common.h
@@ -58,6 +58,7 @@
 #define TI816X_PRM_IVAHD1_MOD			0x0d00
 #define TI816X_PRM_IVAHD2_MOD			0x0e00
 #define TI816X_PRM_SGX_MOD				0x0f00
+#define TI81XX_PRM_ALWON_MOD			0x1800
 
 /* 24XX register bits shared between CM & PRM registers */
 
diff --git a/arch/arm/mach-omap2/prcm44xx.h b/arch/arm/mach-omap2/prcm44xx.h
index f429cdd..4fea2cf 100644
--- a/arch/arm/mach-omap2/prcm44xx.h
+++ b/arch/arm/mach-omap2/prcm44xx.h
@@ -38,6 +38,11 @@
 #define OMAP54XX_SCRM_PARTITION			4
 #define OMAP54XX_PRCM_MPU_PARTITION		5
 
+#define DRA7XX_PRM_PARTITION                   1
+#define DRA7XX_CM_CORE_AON_PARTITION           2
+#define DRA7XX_CM_CORE_PARTITION               3
+#define DRA7XX_MPU_PRCM_PARTITION              5
+
 /*
  * OMAP4_MAX_PRCM_PARTITIONS: set to the highest value of the PRCM partition
  * IDs, plus one
diff --git a/arch/arm/mach-omap2/prcm_mpu7xx.h b/arch/arm/mach-omap2/prcm_mpu7xx.h
new file mode 100644
index 0000000..9ebb5ce
--- /dev/null
+++ b/arch/arm/mach-omap2/prcm_mpu7xx.h
@@ -0,0 +1,78 @@
+/*
+ * DRA7xx PRCM MPU instance offset macros
+ *
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com
+ *
+ * Generated by code originally written by:
+ * Paul Walmsley (paul@pwsan.com)
+ * Rajendra Nayak (rnayak@ti.com)
+ * Benoit Cousson (b-cousson@ti.com)
+ *
+ * This file is automatically generated from the OMAP hardware databases.
+ * We respectfully ask that any modifications to this file be coordinated
+ * with the public linux-omap@vger.kernel.org mailing list and the
+ * authors above to ensure that the autogeneration scripts are kept
+ * up-to-date with the file contents.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ARCH_ARM_MACH_OMAP2_PRCM_MPU7XX_H
+#define __ARCH_ARM_MACH_OMAP2_PRCM_MPU7XX_H
+
+#include "prcm_mpu_44xx_54xx.h"
+
+#define DRA7XX_PRCM_MPU_BASE			0x48243000
+
+#define DRA7XX_PRCM_MPU_REGADDR(inst, reg)				\
+	OMAP2_L4_IO_ADDRESS(DRA7XX_PRCM_MPU_BASE + (inst) + (reg))
+
+/* MPU_PRCM instances */
+#define DRA7XX_MPU_PRCM_OCP_SOCKET_INST	0x0000
+#define DRA7XX_MPU_PRCM_DEVICE_INST	0x0200
+#define DRA7XX_MPU_PRCM_PRM_C0_INST	0x0400
+#define DRA7XX_MPU_PRCM_CM_C0_INST	0x0600
+#define DRA7XX_MPU_PRCM_PRM_C1_INST	0x0800
+#define DRA7XX_MPU_PRCM_CM_C1_INST	0x0a00
+
+/* PRCM_MPU clockdomain register offsets (from instance start) */
+#define DRA7XX_MPU_PRCM_CM_C0_CPU0_CDOFFS	0x0000
+#define DRA7XX_MPU_PRCM_CM_C1_CPU1_CDOFFS	0x0000
+
+
+/* MPU_PRCM */
+
+/* MPU_PRCM.PRCM_MPU_OCP_SOCKET register offsets */
+#define DRA7XX_REVISION_PRCM_MPU_OFFSET				0x0000
+
+/* MPU_PRCM.PRCM_MPU_DEVICE register offsets */
+#define DRA7XX_PRM_FRAC_INCREMENTER_NUMERATOR_OFFSET		0x0010
+#define DRA7XX_PRM_FRAC_INCREMENTER_DENUMERATOR_RELOAD_OFFSET	0x0014
+
+/* MPU_PRCM.PRCM_MPU_PRM_C0 register offsets */
+#define DRA7XX_PM_CPU0_PWRSTCTRL_OFFSET				0x0000
+#define DRA7XX_PM_CPU0_PWRSTST_OFFSET				0x0004
+#define DRA7XX_RM_CPU0_CPU0_RSTCTRL_OFFSET			0x0010
+#define DRA7XX_RM_CPU0_CPU0_RSTST_OFFSET			0x0014
+#define DRA7XX_RM_CPU0_CPU0_CONTEXT_OFFSET			0x0024
+
+/* MPU_PRCM.PRCM_MPU_CM_C0 register offsets */
+#define DRA7XX_CM_CPU0_CLKSTCTRL_OFFSET				0x0000
+#define DRA7XX_CM_CPU0_CPU0_CLKCTRL_OFFSET			0x0020
+#define DRA7XX_CM_CPU0_CPU0_CLKCTRL				DRA7XX_MPU_PRCM_REGADDR(DRA7XX_MPU_PRCM_CM_C0_INST, 0x0020)
+
+/* MPU_PRCM.PRCM_MPU_PRM_C1 register offsets */
+#define DRA7XX_PM_CPU1_PWRSTCTRL_OFFSET				0x0000
+#define DRA7XX_PM_CPU1_PWRSTST_OFFSET				0x0004
+#define DRA7XX_RM_CPU1_CPU1_RSTCTRL_OFFSET			0x0010
+#define DRA7XX_RM_CPU1_CPU1_RSTST_OFFSET			0x0014
+#define DRA7XX_RM_CPU1_CPU1_CONTEXT_OFFSET			0x0024
+
+/* MPU_PRCM.PRCM_MPU_CM_C1 register offsets */
+#define DRA7XX_CM_CPU1_CLKSTCTRL_OFFSET				0x0000
+#define DRA7XX_CM_CPU1_CPU1_CLKCTRL_OFFSET			0x0020
+#define DRA7XX_CM_CPU1_CPU1_CLKCTRL				DRA7XX_MPU_PRCM_REGADDR(DRA7XX_MPU_PRCM_CM_C1_INST, 0x0020)
+
+#endif
diff --git a/arch/arm/mach-omap2/prm44xx.c b/arch/arm/mach-omap2/prm44xx.c
index 415c7e0..03a6034 100644
--- a/arch/arm/mach-omap2/prm44xx.c
+++ b/arch/arm/mach-omap2/prm44xx.c
@@ -620,6 +620,15 @@
 	return 0;
 }
 
+static int omap4_check_vcvp(void)
+{
+	/* No VC/VP on dra7xx devices */
+	if (soc_is_dra7xx())
+		return 0;
+
+	return 1;
+}
+
 struct pwrdm_ops omap4_pwrdm_operations = {
 	.pwrdm_set_next_pwrst	= omap4_pwrdm_set_next_pwrst,
 	.pwrdm_read_next_pwrst	= omap4_pwrdm_read_next_pwrst,
@@ -637,6 +646,7 @@
 	.pwrdm_set_mem_onst	= omap4_pwrdm_set_mem_onst,
 	.pwrdm_set_mem_retst	= omap4_pwrdm_set_mem_retst,
 	.pwrdm_wait_transition	= omap4_pwrdm_wait_transition,
+	.pwrdm_has_voltdm	= omap4_check_vcvp,
 };
 
 /*
@@ -650,7 +660,7 @@
 
 int __init omap44xx_prm_init(void)
 {
-	if (!cpu_is_omap44xx() && !soc_is_omap54xx())
+	if (!cpu_is_omap44xx() && !soc_is_omap54xx() && !soc_is_dra7xx())
 		return 0;
 
 	return prm_register(&omap44xx_prm_ll_data);
diff --git a/arch/arm/mach-omap2/prm7xx.h b/arch/arm/mach-omap2/prm7xx.h
new file mode 100644
index 0000000..d92a840
--- /dev/null
+++ b/arch/arm/mach-omap2/prm7xx.h
@@ -0,0 +1,678 @@
+/*
+ * DRA7xx PRM instance offset macros
+ *
+ * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com
+ *
+ * Generated by code originally written by:
+ * Paul Walmsley (paul@pwsan.com)
+ * Rajendra Nayak (rnayak@ti.com)
+ * Benoit Cousson (b-cousson@ti.com)
+ *
+ * This file is automatically generated from the OMAP hardware databases.
+ * We respectfully ask that any modifications to this file be coordinated
+ * with the public linux-omap@vger.kernel.org mailing list and the
+ * authors above to ensure that the autogeneration scripts are kept
+ * up-to-date with the file contents.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ARCH_ARM_MACH_OMAP2_PRM7XX_H
+#define __ARCH_ARM_MACH_OMAP2_PRM7XX_H
+
+#include "prm44xx_54xx.h"
+#include "prcm-common.h"
+#include "prm.h"
+
+#define DRA7XX_PRM_BASE		0x4ae06000
+
+#define DRA7XX_PRM_REGADDR(inst, reg)				\
+	OMAP2_L4_IO_ADDRESS(DRA7XX_PRM_BASE + (inst) + (reg))
+
+
+/* PRM instances */
+#define DRA7XX_PRM_OCP_SOCKET_INST	0x0000
+#define DRA7XX_PRM_CKGEN_INST		0x0100
+#define DRA7XX_PRM_MPU_INST		0x0300
+#define DRA7XX_PRM_DSP1_INST		0x0400
+#define DRA7XX_PRM_IPU_INST		0x0500
+#define DRA7XX_PRM_COREAON_INST		0x0628
+#define DRA7XX_PRM_CORE_INST		0x0700
+#define DRA7XX_PRM_IVA_INST		0x0f00
+#define DRA7XX_PRM_CAM_INST		0x1000
+#define DRA7XX_PRM_DSS_INST		0x1100
+#define DRA7XX_PRM_GPU_INST		0x1200
+#define DRA7XX_PRM_L3INIT_INST		0x1300
+#define DRA7XX_PRM_L4PER_INST		0x1400
+#define DRA7XX_PRM_CUSTEFUSE_INST	0x1600
+#define DRA7XX_PRM_WKUPAON_INST		0x1724
+#define DRA7XX_PRM_WKUPAON_CM_INST	0x1800
+#define DRA7XX_PRM_EMU_INST		0x1900
+#define DRA7XX_PRM_EMU_CM_INST		0x1a00
+#define DRA7XX_PRM_DSP2_INST		0x1b00
+#define DRA7XX_PRM_EVE1_INST		0x1b40
+#define DRA7XX_PRM_EVE2_INST		0x1b80
+#define DRA7XX_PRM_EVE3_INST		0x1bc0
+#define DRA7XX_PRM_EVE4_INST		0x1c00
+#define DRA7XX_PRM_RTC_INST		0x1c60
+#define DRA7XX_PRM_VPE_INST		0x1c80
+#define DRA7XX_PRM_DEVICE_INST		0x1d00
+#define DRA7XX_PRM_INSTR_INST		0x1f00
+
+/* PRM clockdomain register offsets (from instance start) */
+#define DRA7XX_PRM_WKUPAON_CM_WKUPAON_CDOFFS	0x0000
+#define DRA7XX_PRM_EMU_CM_EMU_CDOFFS		0x0000
+
+/* PRM */
+
+/* PRM.OCP_SOCKET_PRM register offsets */
+#define DRA7XX_REVISION_PRM_OFFSET				0x0000
+#define DRA7XX_PRM_IRQSTATUS_MPU_OFFSET				0x0010
+#define DRA7XX_PRM_IRQSTATUS_MPU_2_OFFSET			0x0014
+#define DRA7XX_PRM_IRQENABLE_MPU_OFFSET				0x0018
+#define DRA7XX_PRM_IRQENABLE_MPU_2_OFFSET			0x001c
+#define DRA7XX_PRM_IRQSTATUS_IPU2_OFFSET			0x0020
+#define DRA7XX_PRM_IRQENABLE_IPU2_OFFSET			0x0028
+#define DRA7XX_PRM_IRQSTATUS_DSP1_OFFSET			0x0030
+#define DRA7XX_PRM_IRQENABLE_DSP1_OFFSET			0x0038
+#define DRA7XX_CM_PRM_PROFILING_CLKCTRL_OFFSET			0x0040
+#define DRA7XX_CM_PRM_PROFILING_CLKCTRL				DRA7XX_PRM_REGADDR(DRA7XX_PRM_OCP_SOCKET_INST, 0x0040)
+#define DRA7XX_PRM_IRQENABLE_DSP2_OFFSET			0x0044
+#define DRA7XX_PRM_IRQENABLE_EVE1_OFFSET			0x0048
+#define DRA7XX_PRM_IRQENABLE_EVE2_OFFSET			0x004c
+#define DRA7XX_PRM_IRQENABLE_EVE3_OFFSET			0x0050
+#define DRA7XX_PRM_IRQENABLE_EVE4_OFFSET			0x0054
+#define DRA7XX_PRM_IRQENABLE_IPU1_OFFSET			0x0058
+#define DRA7XX_PRM_IRQSTATUS_DSP2_OFFSET			0x005c
+#define DRA7XX_PRM_IRQSTATUS_EVE1_OFFSET			0x0060
+#define DRA7XX_PRM_IRQSTATUS_EVE2_OFFSET			0x0064
+#define DRA7XX_PRM_IRQSTATUS_EVE3_OFFSET			0x0068
+#define DRA7XX_PRM_IRQSTATUS_EVE4_OFFSET			0x006c
+#define DRA7XX_PRM_IRQSTATUS_IPU1_OFFSET			0x0070
+#define DRA7XX_PRM_DEBUG_CFG1_OFFSET				0x00e4
+#define DRA7XX_PRM_DEBUG_CFG2_OFFSET				0x00e8
+#define DRA7XX_PRM_DEBUG_CFG3_OFFSET				0x00ec
+#define DRA7XX_PRM_DEBUG_OUT_OFFSET				0x00f4
+
+/* PRM.CKGEN_PRM register offsets */
+#define DRA7XX_CM_CLKSEL_SYSCLK1_OFFSET				0x0000
+#define DRA7XX_CM_CLKSEL_SYSCLK1				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0000)
+#define DRA7XX_CM_CLKSEL_WKUPAON_OFFSET				0x0008
+#define DRA7XX_CM_CLKSEL_WKUPAON				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0008)
+#define DRA7XX_CM_CLKSEL_ABE_PLL_REF_OFFSET			0x000c
+#define DRA7XX_CM_CLKSEL_ABE_PLL_REF				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x000c)
+#define DRA7XX_CM_CLKSEL_SYS_OFFSET				0x0010
+#define DRA7XX_CM_CLKSEL_SYS					DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0010)
+#define DRA7XX_CM_CLKSEL_ABE_PLL_BYPAS_OFFSET			0x0014
+#define DRA7XX_CM_CLKSEL_ABE_PLL_BYPAS				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0014)
+#define DRA7XX_CM_CLKSEL_ABE_PLL_SYS_OFFSET			0x0018
+#define DRA7XX_CM_CLKSEL_ABE_PLL_SYS				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0018)
+#define DRA7XX_CM_CLKSEL_ABE_24M_OFFSET				0x001c
+#define DRA7XX_CM_CLKSEL_ABE_24M				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x001c)
+#define DRA7XX_CM_CLKSEL_ABE_SYS_OFFSET				0x0020
+#define DRA7XX_CM_CLKSEL_ABE_SYS				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0020)
+#define DRA7XX_CM_CLKSEL_HDMI_MCASP_AUX_OFFSET			0x0024
+#define DRA7XX_CM_CLKSEL_HDMI_MCASP_AUX				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0024)
+#define DRA7XX_CM_CLKSEL_HDMI_TIMER_OFFSET			0x0028
+#define DRA7XX_CM_CLKSEL_HDMI_TIMER				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0028)
+#define DRA7XX_CM_CLKSEL_MCASP_SYS_OFFSET			0x002c
+#define DRA7XX_CM_CLKSEL_MCASP_SYS				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x002c)
+#define DRA7XX_CM_CLKSEL_MLBP_MCASP_OFFSET			0x0030
+#define DRA7XX_CM_CLKSEL_MLBP_MCASP				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0030)
+#define DRA7XX_CM_CLKSEL_MLB_MCASP_OFFSET			0x0034
+#define DRA7XX_CM_CLKSEL_MLB_MCASP				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0034)
+#define DRA7XX_CM_CLKSEL_PER_ABE_X1_GFCLK_MCASP_AUX_OFFSET	0x0038
+#define DRA7XX_CM_CLKSEL_PER_ABE_X1_GFCLK_MCASP_AUX		DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0038)
+#define DRA7XX_CM_CLKSEL_SYS_CLK1_32K_OFFSET			0x0040
+#define DRA7XX_CM_CLKSEL_SYS_CLK1_32K				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0040)
+#define DRA7XX_CM_CLKSEL_TIMER_SYS_OFFSET			0x0044
+#define DRA7XX_CM_CLKSEL_TIMER_SYS				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0044)
+#define DRA7XX_CM_CLKSEL_VIDEO1_MCASP_AUX_OFFSET		0x0048
+#define DRA7XX_CM_CLKSEL_VIDEO1_MCASP_AUX			DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0048)
+#define DRA7XX_CM_CLKSEL_VIDEO1_TIMER_OFFSET			0x004c
+#define DRA7XX_CM_CLKSEL_VIDEO1_TIMER				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x004c)
+#define DRA7XX_CM_CLKSEL_VIDEO2_MCASP_AUX_OFFSET		0x0050
+#define DRA7XX_CM_CLKSEL_VIDEO2_MCASP_AUX			DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0050)
+#define DRA7XX_CM_CLKSEL_VIDEO2_TIMER_OFFSET			0x0054
+#define DRA7XX_CM_CLKSEL_VIDEO2_TIMER				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0054)
+#define DRA7XX_CM_CLKSEL_CLKOUTMUX0_OFFSET			0x0058
+#define DRA7XX_CM_CLKSEL_CLKOUTMUX0				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0058)
+#define DRA7XX_CM_CLKSEL_CLKOUTMUX1_OFFSET			0x005c
+#define DRA7XX_CM_CLKSEL_CLKOUTMUX1				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x005c)
+#define DRA7XX_CM_CLKSEL_CLKOUTMUX2_OFFSET			0x0060
+#define DRA7XX_CM_CLKSEL_CLKOUTMUX2				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0060)
+#define DRA7XX_CM_CLKSEL_HDMI_PLL_SYS_OFFSET			0x0064
+#define DRA7XX_CM_CLKSEL_HDMI_PLL_SYS				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0064)
+#define DRA7XX_CM_CLKSEL_VIDEO1_PLL_SYS_OFFSET			0x0068
+#define DRA7XX_CM_CLKSEL_VIDEO1_PLL_SYS				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0068)
+#define DRA7XX_CM_CLKSEL_VIDEO2_PLL_SYS_OFFSET			0x006c
+#define DRA7XX_CM_CLKSEL_VIDEO2_PLL_SYS				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x006c)
+#define DRA7XX_CM_CLKSEL_ABE_CLK_DIV_OFFSET			0x0070
+#define DRA7XX_CM_CLKSEL_ABE_CLK_DIV				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0070)
+#define DRA7XX_CM_CLKSEL_ABE_GICLK_DIV_OFFSET			0x0074
+#define DRA7XX_CM_CLKSEL_ABE_GICLK_DIV				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0074)
+#define DRA7XX_CM_CLKSEL_AESS_FCLK_DIV_OFFSET			0x0078
+#define DRA7XX_CM_CLKSEL_AESS_FCLK_DIV				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0078)
+#define DRA7XX_CM_CLKSEL_EVE_CLK_OFFSET				0x0080
+#define DRA7XX_CM_CLKSEL_EVE_CLK				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0080)
+#define DRA7XX_CM_CLKSEL_USB_OTG_CLK_CLKOUTMUX_OFFSET		0x0084
+#define DRA7XX_CM_CLKSEL_USB_OTG_CLK_CLKOUTMUX			DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0084)
+#define DRA7XX_CM_CLKSEL_CORE_DPLL_OUT_CLK_CLKOUTMUX_OFFSET	0x0088
+#define DRA7XX_CM_CLKSEL_CORE_DPLL_OUT_CLK_CLKOUTMUX		DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0088)
+#define DRA7XX_CM_CLKSEL_DSP_GFCLK_CLKOUTMUX_OFFSET		0x008c
+#define DRA7XX_CM_CLKSEL_DSP_GFCLK_CLKOUTMUX			DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x008c)
+#define DRA7XX_CM_CLKSEL_EMIF_PHY_GCLK_CLKOUTMUX_OFFSET		0x0090
+#define DRA7XX_CM_CLKSEL_EMIF_PHY_GCLK_CLKOUTMUX		DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0090)
+#define DRA7XX_CM_CLKSEL_EMU_CLK_CLKOUTMUX_OFFSET		0x0094
+#define DRA7XX_CM_CLKSEL_EMU_CLK_CLKOUTMUX			DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0094)
+#define DRA7XX_CM_CLKSEL_FUNC_96M_AON_CLK_CLKOUTMUX_OFFSET	0x0098
+#define DRA7XX_CM_CLKSEL_FUNC_96M_AON_CLK_CLKOUTMUX		DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x0098)
+#define DRA7XX_CM_CLKSEL_GMAC_250M_CLK_CLKOUTMUX_OFFSET		0x009c
+#define DRA7XX_CM_CLKSEL_GMAC_250M_CLK_CLKOUTMUX		DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x009c)
+#define DRA7XX_CM_CLKSEL_GPU_GCLK_CLKOUTMUX_OFFSET		0x00a0
+#define DRA7XX_CM_CLKSEL_GPU_GCLK_CLKOUTMUX			DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x00a0)
+#define DRA7XX_CM_CLKSEL_HDMI_CLK_CLKOUTMUX_OFFSET		0x00a4
+#define DRA7XX_CM_CLKSEL_HDMI_CLK_CLKOUTMUX			DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x00a4)
+#define DRA7XX_CM_CLKSEL_IVA_GCLK_CLKOUTMUX_OFFSET		0x00a8
+#define DRA7XX_CM_CLKSEL_IVA_GCLK_CLKOUTMUX			DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x00a8)
+#define DRA7XX_CM_CLKSEL_L3INIT_480M_GFCLK_CLKOUTMUX_OFFSET	0x00ac
+#define DRA7XX_CM_CLKSEL_L3INIT_480M_GFCLK_CLKOUTMUX		DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x00ac)
+#define DRA7XX_CM_CLKSEL_MPU_GCLK_CLKOUTMUX_OFFSET		0x00b0
+#define DRA7XX_CM_CLKSEL_MPU_GCLK_CLKOUTMUX			DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x00b0)
+#define DRA7XX_CM_CLKSEL_PCIE1_CLK_CLKOUTMUX_OFFSET		0x00b4
+#define DRA7XX_CM_CLKSEL_PCIE1_CLK_CLKOUTMUX			DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x00b4)
+#define DRA7XX_CM_CLKSEL_PCIE2_CLK_CLKOUTMUX_OFFSET		0x00b8
+#define DRA7XX_CM_CLKSEL_PCIE2_CLK_CLKOUTMUX			DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x00b8)
+#define DRA7XX_CM_CLKSEL_PER_ABE_X1_CLK_CLKOUTMUX_OFFSET	0x00bc
+#define DRA7XX_CM_CLKSEL_PER_ABE_X1_CLK_CLKOUTMUX		DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x00bc)
+#define DRA7XX_CM_CLKSEL_SATA_CLK_CLKOUTMUX_OFFSET		0x00c0
+#define DRA7XX_CM_CLKSEL_SATA_CLK_CLKOUTMUX			DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x00c0)
+#define DRA7XX_CM_CLKSEL_SECURE_32K_CLK_CLKOUTMUX_OFFSET	0x00c4
+#define DRA7XX_CM_CLKSEL_SECURE_32K_CLK_CLKOUTMUX		DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x00c4)
+#define DRA7XX_CM_CLKSEL_SYS_CLK1_CLKOUTMUX_OFFSET		0x00c8
+#define DRA7XX_CM_CLKSEL_SYS_CLK1_CLKOUTMUX			DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x00c8)
+#define DRA7XX_CM_CLKSEL_SYS_CLK2_CLKOUTMUX_OFFSET		0x00cc
+#define DRA7XX_CM_CLKSEL_SYS_CLK2_CLKOUTMUX			DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x00cc)
+#define DRA7XX_CM_CLKSEL_VIDEO1_CLK_CLKOUTMUX_OFFSET		0x00d0
+#define DRA7XX_CM_CLKSEL_VIDEO1_CLK_CLKOUTMUX			DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x00d0)
+#define DRA7XX_CM_CLKSEL_VIDEO2_CLK_CLKOUTMUX_OFFSET		0x00d4
+#define DRA7XX_CM_CLKSEL_VIDEO2_CLK_CLKOUTMUX			DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x00d4)
+#define DRA7XX_CM_CLKSEL_ABE_LP_CLK_OFFSET			0x00d8
+#define DRA7XX_CM_CLKSEL_ABE_LP_CLK				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x00d8)
+#define DRA7XX_CM_CLKSEL_ADC_GFCLK_OFFSET			0x00dc
+#define DRA7XX_CM_CLKSEL_ADC_GFCLK				DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x00dc)
+#define DRA7XX_CM_CLKSEL_EVE_GFCLK_CLKOUTMUX_OFFSET		0x00e0
+#define DRA7XX_CM_CLKSEL_EVE_GFCLK_CLKOUTMUX			DRA7XX_PRM_REGADDR(DRA7XX_PRM_CKGEN_INST, 0x00e0)
+
+/* PRM.MPU_PRM register offsets */
+#define DRA7XX_PM_MPU_PWRSTCTRL_OFFSET				0x0000
+#define DRA7XX_PM_MPU_PWRSTST_OFFSET				0x0004
+#define DRA7XX_RM_MPU_MPU_CONTEXT_OFFSET			0x0024
+
+/* PRM.DSP1_PRM register offsets */
+#define DRA7XX_PM_DSP1_PWRSTCTRL_OFFSET				0x0000
+#define DRA7XX_PM_DSP1_PWRSTST_OFFSET				0x0004
+#define DRA7XX_RM_DSP1_RSTCTRL_OFFSET				0x0010
+#define DRA7XX_RM_DSP1_RSTST_OFFSET				0x0014
+#define DRA7XX_RM_DSP1_DSP1_CONTEXT_OFFSET			0x0024
+
+/* PRM.IPU_PRM register offsets */
+#define DRA7XX_PM_IPU_PWRSTCTRL_OFFSET				0x0000
+#define DRA7XX_PM_IPU_PWRSTST_OFFSET				0x0004
+#define DRA7XX_RM_IPU1_RSTCTRL_OFFSET				0x0010
+#define DRA7XX_RM_IPU1_RSTST_OFFSET				0x0014
+#define DRA7XX_RM_IPU1_IPU1_CONTEXT_OFFSET			0x0024
+#define DRA7XX_PM_IPU_MCASP1_WKDEP_OFFSET			0x0050
+#define DRA7XX_RM_IPU_MCASP1_CONTEXT_OFFSET			0x0054
+#define DRA7XX_PM_IPU_TIMER5_WKDEP_OFFSET			0x0058
+#define DRA7XX_RM_IPU_TIMER5_CONTEXT_OFFSET			0x005c
+#define DRA7XX_PM_IPU_TIMER6_WKDEP_OFFSET			0x0060
+#define DRA7XX_RM_IPU_TIMER6_CONTEXT_OFFSET			0x0064
+#define DRA7XX_PM_IPU_TIMER7_WKDEP_OFFSET			0x0068
+#define DRA7XX_RM_IPU_TIMER7_CONTEXT_OFFSET			0x006c
+#define DRA7XX_PM_IPU_TIMER8_WKDEP_OFFSET			0x0070
+#define DRA7XX_RM_IPU_TIMER8_CONTEXT_OFFSET			0x0074
+#define DRA7XX_PM_IPU_I2C5_WKDEP_OFFSET				0x0078
+#define DRA7XX_RM_IPU_I2C5_CONTEXT_OFFSET			0x007c
+#define DRA7XX_PM_IPU_UART6_WKDEP_OFFSET			0x0080
+#define DRA7XX_RM_IPU_UART6_CONTEXT_OFFSET			0x0084
+
+/* PRM.COREAON_PRM register offsets */
+#define DRA7XX_PM_COREAON_SMARTREFLEX_MPU_WKDEP_OFFSET		0x0000
+#define DRA7XX_RM_COREAON_SMARTREFLEX_MPU_CONTEXT_OFFSET	0x0004
+#define DRA7XX_PM_COREAON_SMARTREFLEX_CORE_WKDEP_OFFSET		0x0010
+#define DRA7XX_RM_COREAON_SMARTREFLEX_CORE_CONTEXT_OFFSET	0x0014
+#define DRA7XX_PM_COREAON_SMARTREFLEX_GPU_WKDEP_OFFSET		0x0030
+#define DRA7XX_RM_COREAON_SMARTREFLEX_GPU_CONTEXT_OFFSET	0x0034
+#define DRA7XX_PM_COREAON_SMARTREFLEX_DSPEVE_WKDEP_OFFSET	0x0040
+#define DRA7XX_RM_COREAON_SMARTREFLEX_DSPEVE_CONTEXT_OFFSET	0x0044
+#define DRA7XX_PM_COREAON_SMARTREFLEX_IVAHD_WKDEP_OFFSET	0x0050
+#define DRA7XX_RM_COREAON_SMARTREFLEX_IVAHD_CONTEXT_OFFSET	0x0054
+#define DRA7XX_RM_COREAON_DUMMY_MODULE1_CONTEXT_OFFSET		0x0084
+#define DRA7XX_RM_COREAON_DUMMY_MODULE2_CONTEXT_OFFSET		0x0094
+#define DRA7XX_RM_COREAON_DUMMY_MODULE3_CONTEXT_OFFSET		0x00a4
+#define DRA7XX_RM_COREAON_DUMMY_MODULE4_CONTEXT_OFFSET		0x00b4
+
+/* PRM.CORE_PRM register offsets */
+#define DRA7XX_PM_CORE_PWRSTCTRL_OFFSET				0x0000
+#define DRA7XX_PM_CORE_PWRSTST_OFFSET				0x0004
+#define DRA7XX_RM_L3MAIN1_L3_MAIN_1_CONTEXT_OFFSET		0x0024
+#define DRA7XX_RM_L3MAIN1_GPMC_CONTEXT_OFFSET			0x002c
+#define DRA7XX_RM_L3MAIN1_MMU_EDMA_CONTEXT_OFFSET		0x0034
+#define DRA7XX_PM_L3MAIN1_OCMC_RAM1_WKDEP_OFFSET		0x0050
+#define DRA7XX_RM_L3MAIN1_OCMC_RAM1_CONTEXT_OFFSET		0x0054
+#define DRA7XX_PM_L3MAIN1_OCMC_RAM2_WKDEP_OFFSET		0x0058
+#define DRA7XX_RM_L3MAIN1_OCMC_RAM2_CONTEXT_OFFSET		0x005c
+#define DRA7XX_PM_L3MAIN1_OCMC_RAM3_WKDEP_OFFSET		0x0060
+#define DRA7XX_RM_L3MAIN1_OCMC_RAM3_CONTEXT_OFFSET		0x0064
+#define DRA7XX_RM_L3MAIN1_OCMC_ROM_CONTEXT_OFFSET		0x006c
+#define DRA7XX_PM_L3MAIN1_TPCC_WKDEP_OFFSET			0x0070
+#define DRA7XX_RM_L3MAIN1_TPCC_CONTEXT_OFFSET			0x0074
+#define DRA7XX_PM_L3MAIN1_TPTC1_WKDEP_OFFSET			0x0078
+#define DRA7XX_RM_L3MAIN1_TPTC1_CONTEXT_OFFSET			0x007c
+#define DRA7XX_PM_L3MAIN1_TPTC2_WKDEP_OFFSET			0x0080
+#define DRA7XX_RM_L3MAIN1_TPTC2_CONTEXT_OFFSET			0x0084
+#define DRA7XX_RM_L3MAIN1_VCP1_CONTEXT_OFFSET			0x008c
+#define DRA7XX_RM_L3MAIN1_VCP2_CONTEXT_OFFSET			0x0094
+#define DRA7XX_RM_L3MAIN1_SPARE_CME_CONTEXT_OFFSET		0x009c
+#define DRA7XX_RM_L3MAIN1_SPARE_HDMI_CONTEXT_OFFSET		0x00a4
+#define DRA7XX_RM_L3MAIN1_SPARE_ICM_CONTEXT_OFFSET		0x00ac
+#define DRA7XX_RM_L3MAIN1_SPARE_IVA2_CONTEXT_OFFSET		0x00b4
+#define DRA7XX_RM_L3MAIN1_SPARE_SATA2_CONTEXT_OFFSET		0x00bc
+#define DRA7XX_RM_L3MAIN1_SPARE_UNKNOWN4_CONTEXT_OFFSET		0x00c4
+#define DRA7XX_RM_L3MAIN1_SPARE_UNKNOWN5_CONTEXT_OFFSET		0x00cc
+#define DRA7XX_RM_L3MAIN1_SPARE_UNKNOWN6_CONTEXT_OFFSET		0x00d4
+#define DRA7XX_RM_L3MAIN1_SPARE_VIDEOPLL1_CONTEXT_OFFSET	0x00dc
+#define DRA7XX_RM_L3MAIN1_SPARE_VIDEOPLL2_CONTEXT_OFFSET	0x00f4
+#define DRA7XX_RM_L3MAIN1_SPARE_VIDEOPLL3_CONTEXT_OFFSET	0x00fc
+#define DRA7XX_RM_IPU2_RSTCTRL_OFFSET				0x0210
+#define DRA7XX_RM_IPU2_RSTST_OFFSET				0x0214
+#define DRA7XX_RM_IPU2_IPU2_CONTEXT_OFFSET			0x0224
+#define DRA7XX_RM_DMA_DMA_SYSTEM_CONTEXT_OFFSET			0x0324
+#define DRA7XX_RM_EMIF_DMM_CONTEXT_OFFSET			0x0424
+#define DRA7XX_RM_EMIF_EMIF_OCP_FW_CONTEXT_OFFSET		0x042c
+#define DRA7XX_RM_EMIF_EMIF1_CONTEXT_OFFSET			0x0434
+#define DRA7XX_RM_EMIF_EMIF2_CONTEXT_OFFSET			0x043c
+#define DRA7XX_RM_EMIF_EMIF_DLL_CONTEXT_OFFSET			0x0444
+#define DRA7XX_RM_ATL_ATL_CONTEXT_OFFSET			0x0524
+#define DRA7XX_RM_L4CFG_L4_CFG_CONTEXT_OFFSET			0x0624
+#define DRA7XX_RM_L4CFG_SPINLOCK_CONTEXT_OFFSET			0x062c
+#define DRA7XX_RM_L4CFG_MAILBOX1_CONTEXT_OFFSET			0x0634
+#define DRA7XX_RM_L4CFG_SAR_ROM_CONTEXT_OFFSET			0x063c
+#define DRA7XX_RM_L4CFG_OCP2SCP2_CONTEXT_OFFSET			0x0644
+#define DRA7XX_RM_L4CFG_MAILBOX2_CONTEXT_OFFSET			0x064c
+#define DRA7XX_RM_L4CFG_MAILBOX3_CONTEXT_OFFSET			0x0654
+#define DRA7XX_RM_L4CFG_MAILBOX4_CONTEXT_OFFSET			0x065c
+#define DRA7XX_RM_L4CFG_MAILBOX5_CONTEXT_OFFSET			0x0664
+#define DRA7XX_RM_L4CFG_MAILBOX6_CONTEXT_OFFSET			0x066c
+#define DRA7XX_RM_L4CFG_MAILBOX7_CONTEXT_OFFSET			0x0674
+#define DRA7XX_RM_L4CFG_MAILBOX8_CONTEXT_OFFSET			0x067c
+#define DRA7XX_RM_L4CFG_MAILBOX9_CONTEXT_OFFSET			0x0684
+#define DRA7XX_RM_L4CFG_MAILBOX10_CONTEXT_OFFSET		0x068c
+#define DRA7XX_RM_L4CFG_MAILBOX11_CONTEXT_OFFSET		0x0694
+#define DRA7XX_RM_L4CFG_MAILBOX12_CONTEXT_OFFSET		0x069c
+#define DRA7XX_RM_L4CFG_MAILBOX13_CONTEXT_OFFSET		0x06a4
+#define DRA7XX_RM_L4CFG_SPARE_SMARTREFLEX_RTC_CONTEXT_OFFSET	0x06ac
+#define DRA7XX_RM_L4CFG_SPARE_SMARTREFLEX_SDRAM_CONTEXT_OFFSET	0x06b4
+#define DRA7XX_RM_L4CFG_SPARE_SMARTREFLEX_WKUP_CONTEXT_OFFSET	0x06bc
+#define DRA7XX_RM_L4CFG_IO_DELAY_BLOCK_CONTEXT_OFFSET		0x06c4
+#define DRA7XX_RM_L3INSTR_L3_MAIN_2_CONTEXT_OFFSET		0x0724
+#define DRA7XX_RM_L3INSTR_L3_INSTR_CONTEXT_OFFSET		0x072c
+#define DRA7XX_RM_L3INSTR_OCP_WP_NOC_CONTEXT_OFFSET		0x0744
+
+/* PRM.IVA_PRM register offsets */
+#define DRA7XX_PM_IVA_PWRSTCTRL_OFFSET				0x0000
+#define DRA7XX_PM_IVA_PWRSTST_OFFSET				0x0004
+#define DRA7XX_RM_IVA_RSTCTRL_OFFSET				0x0010
+#define DRA7XX_RM_IVA_RSTST_OFFSET				0x0014
+#define DRA7XX_RM_IVA_IVA_CONTEXT_OFFSET			0x0024
+#define DRA7XX_RM_IVA_SL2_CONTEXT_OFFSET			0x002c
+
+/* PRM.CAM_PRM register offsets */
+#define DRA7XX_PM_CAM_PWRSTCTRL_OFFSET				0x0000
+#define DRA7XX_PM_CAM_PWRSTST_OFFSET				0x0004
+#define DRA7XX_PM_CAM_VIP1_WKDEP_OFFSET				0x0020
+#define DRA7XX_RM_CAM_VIP1_CONTEXT_OFFSET			0x0024
+#define DRA7XX_PM_CAM_VIP2_WKDEP_OFFSET				0x0028
+#define DRA7XX_RM_CAM_VIP2_CONTEXT_OFFSET			0x002c
+#define DRA7XX_PM_CAM_VIP3_WKDEP_OFFSET				0x0030
+#define DRA7XX_RM_CAM_VIP3_CONTEXT_OFFSET			0x0034
+#define DRA7XX_RM_CAM_LVDSRX_CONTEXT_OFFSET			0x003c
+#define DRA7XX_RM_CAM_CSI1_CONTEXT_OFFSET			0x0044
+#define DRA7XX_RM_CAM_CSI2_CONTEXT_OFFSET			0x004c
+
+/* PRM.DSS_PRM register offsets */
+#define DRA7XX_PM_DSS_PWRSTCTRL_OFFSET				0x0000
+#define DRA7XX_PM_DSS_PWRSTST_OFFSET				0x0004
+#define DRA7XX_PM_DSS_DSS_WKDEP_OFFSET				0x0020
+#define DRA7XX_RM_DSS_DSS_CONTEXT_OFFSET			0x0024
+#define DRA7XX_PM_DSS_DSS2_WKDEP_OFFSET				0x0028
+#define DRA7XX_RM_DSS_BB2D_CONTEXT_OFFSET			0x0034
+#define DRA7XX_RM_DSS_SDVENC_CONTEXT_OFFSET			0x003c
+
+/* PRM.GPU_PRM register offsets */
+#define DRA7XX_PM_GPU_PWRSTCTRL_OFFSET				0x0000
+#define DRA7XX_PM_GPU_PWRSTST_OFFSET				0x0004
+#define DRA7XX_RM_GPU_GPU_CONTEXT_OFFSET			0x0024
+
+/* PRM.L3INIT_PRM register offsets */
+#define DRA7XX_PM_L3INIT_PWRSTCTRL_OFFSET			0x0000
+#define DRA7XX_PM_L3INIT_PWRSTST_OFFSET				0x0004
+#define DRA7XX_PM_L3INIT_MMC1_WKDEP_OFFSET			0x0028
+#define DRA7XX_RM_L3INIT_MMC1_CONTEXT_OFFSET			0x002c
+#define DRA7XX_PM_L3INIT_MMC2_WKDEP_OFFSET			0x0030
+#define DRA7XX_RM_L3INIT_MMC2_CONTEXT_OFFSET			0x0034
+#define DRA7XX_PM_L3INIT_USB_OTG_SS2_WKDEP_OFFSET		0x0040
+#define DRA7XX_RM_L3INIT_USB_OTG_SS2_CONTEXT_OFFSET		0x0044
+#define DRA7XX_PM_L3INIT_USB_OTG_SS3_WKDEP_OFFSET		0x0048
+#define DRA7XX_RM_L3INIT_USB_OTG_SS3_CONTEXT_OFFSET		0x004c
+#define DRA7XX_PM_L3INIT_USB_OTG_SS4_WKDEP_OFFSET		0x0050
+#define DRA7XX_RM_L3INIT_USB_OTG_SS4_CONTEXT_OFFSET		0x0054
+#define DRA7XX_RM_L3INIT_MLB_SS_CONTEXT_OFFSET			0x005c
+#define DRA7XX_RM_L3INIT_IEEE1500_2_OCP_CONTEXT_OFFSET		0x007c
+#define DRA7XX_PM_L3INIT_SATA_WKDEP_OFFSET			0x0088
+#define DRA7XX_RM_L3INIT_SATA_CONTEXT_OFFSET			0x008c
+#define DRA7XX_RM_GMAC_GMAC_CONTEXT_OFFSET			0x00d4
+#define DRA7XX_RM_L3INIT_OCP2SCP1_CONTEXT_OFFSET		0x00e4
+#define DRA7XX_RM_L3INIT_OCP2SCP3_CONTEXT_OFFSET		0x00ec
+#define DRA7XX_PM_L3INIT_USB_OTG_SS1_WKDEP_OFFSET		0x00f0
+#define DRA7XX_RM_L3INIT_USB_OTG_SS1_CONTEXT_OFFSET		0x00f4
+
+/* PRM.L4PER_PRM register offsets */
+#define DRA7XX_PM_L4PER_PWRSTCTRL_OFFSET			0x0000
+#define DRA7XX_PM_L4PER_PWRSTST_OFFSET				0x0004
+#define DRA7XX_RM_L4PER2_L4PER2_CONTEXT_OFFSET			0x000c
+#define DRA7XX_RM_L4PER3_L4PER3_CONTEXT_OFFSET			0x0014
+#define DRA7XX_RM_L4PER2_PRUSS1_CONTEXT_OFFSET			0x001c
+#define DRA7XX_RM_L4PER2_PRUSS2_CONTEXT_OFFSET			0x0024
+#define DRA7XX_PM_L4PER_TIMER10_WKDEP_OFFSET			0x0028
+#define DRA7XX_RM_L4PER_TIMER10_CONTEXT_OFFSET			0x002c
+#define DRA7XX_PM_L4PER_TIMER11_WKDEP_OFFSET			0x0030
+#define DRA7XX_RM_L4PER_TIMER11_CONTEXT_OFFSET			0x0034
+#define DRA7XX_PM_L4PER_TIMER2_WKDEP_OFFSET			0x0038
+#define DRA7XX_RM_L4PER_TIMER2_CONTEXT_OFFSET			0x003c
+#define DRA7XX_PM_L4PER_TIMER3_WKDEP_OFFSET			0x0040
+#define DRA7XX_RM_L4PER_TIMER3_CONTEXT_OFFSET			0x0044
+#define DRA7XX_PM_L4PER_TIMER4_WKDEP_OFFSET			0x0048
+#define DRA7XX_RM_L4PER_TIMER4_CONTEXT_OFFSET			0x004c
+#define DRA7XX_PM_L4PER_TIMER9_WKDEP_OFFSET			0x0050
+#define DRA7XX_RM_L4PER_TIMER9_CONTEXT_OFFSET			0x0054
+#define DRA7XX_RM_L4PER_ELM_CONTEXT_OFFSET			0x005c
+#define DRA7XX_PM_L4PER_GPIO2_WKDEP_OFFSET			0x0060
+#define DRA7XX_RM_L4PER_GPIO2_CONTEXT_OFFSET			0x0064
+#define DRA7XX_PM_L4PER_GPIO3_WKDEP_OFFSET			0x0068
+#define DRA7XX_RM_L4PER_GPIO3_CONTEXT_OFFSET			0x006c
+#define DRA7XX_PM_L4PER_GPIO4_WKDEP_OFFSET			0x0070
+#define DRA7XX_RM_L4PER_GPIO4_CONTEXT_OFFSET			0x0074
+#define DRA7XX_PM_L4PER_GPIO5_WKDEP_OFFSET			0x0078
+#define DRA7XX_RM_L4PER_GPIO5_CONTEXT_OFFSET			0x007c
+#define DRA7XX_PM_L4PER_GPIO6_WKDEP_OFFSET			0x0080
+#define DRA7XX_RM_L4PER_GPIO6_CONTEXT_OFFSET			0x0084
+#define DRA7XX_RM_L4PER_HDQ1W_CONTEXT_OFFSET			0x008c
+#define DRA7XX_RM_L4PER2_PWMSS2_CONTEXT_OFFSET			0x0094
+#define DRA7XX_RM_L4PER2_PWMSS3_CONTEXT_OFFSET			0x009c
+#define DRA7XX_PM_L4PER_I2C1_WKDEP_OFFSET			0x00a0
+#define DRA7XX_RM_L4PER_I2C1_CONTEXT_OFFSET			0x00a4
+#define DRA7XX_PM_L4PER_I2C2_WKDEP_OFFSET			0x00a8
+#define DRA7XX_RM_L4PER_I2C2_CONTEXT_OFFSET			0x00ac
+#define DRA7XX_PM_L4PER_I2C3_WKDEP_OFFSET			0x00b0
+#define DRA7XX_RM_L4PER_I2C3_CONTEXT_OFFSET			0x00b4
+#define DRA7XX_PM_L4PER_I2C4_WKDEP_OFFSET			0x00b8
+#define DRA7XX_RM_L4PER_I2C4_CONTEXT_OFFSET			0x00bc
+#define DRA7XX_RM_L4PER_L4PER1_CONTEXT_OFFSET			0x00c0
+#define DRA7XX_RM_L4PER2_PWMSS1_CONTEXT_OFFSET			0x00c4
+#define DRA7XX_PM_L4PER_TIMER13_WKDEP_OFFSET			0x00c8
+#define DRA7XX_RM_L4PER3_TIMER13_CONTEXT_OFFSET			0x00cc
+#define DRA7XX_PM_L4PER_TIMER14_WKDEP_OFFSET			0x00d0
+#define DRA7XX_RM_L4PER3_TIMER14_CONTEXT_OFFSET			0x00d4
+#define DRA7XX_PM_L4PER_TIMER15_WKDEP_OFFSET			0x00d8
+#define DRA7XX_RM_L4PER3_TIMER15_CONTEXT_OFFSET			0x00dc
+#define DRA7XX_PM_L4PER_MCSPI1_WKDEP_OFFSET			0x00f0
+#define DRA7XX_RM_L4PER_MCSPI1_CONTEXT_OFFSET			0x00f4
+#define DRA7XX_PM_L4PER_MCSPI2_WKDEP_OFFSET			0x00f8
+#define DRA7XX_RM_L4PER_MCSPI2_CONTEXT_OFFSET			0x00fc
+#define DRA7XX_PM_L4PER_MCSPI3_WKDEP_OFFSET			0x0100
+#define DRA7XX_RM_L4PER_MCSPI3_CONTEXT_OFFSET			0x0104
+#define DRA7XX_PM_L4PER_MCSPI4_WKDEP_OFFSET			0x0108
+#define DRA7XX_RM_L4PER_MCSPI4_CONTEXT_OFFSET			0x010c
+#define DRA7XX_PM_L4PER_GPIO7_WKDEP_OFFSET			0x0110
+#define DRA7XX_RM_L4PER_GPIO7_CONTEXT_OFFSET			0x0114
+#define DRA7XX_PM_L4PER_GPIO8_WKDEP_OFFSET			0x0118
+#define DRA7XX_RM_L4PER_GPIO8_CONTEXT_OFFSET			0x011c
+#define DRA7XX_PM_L4PER_MMC3_WKDEP_OFFSET			0x0120
+#define DRA7XX_RM_L4PER_MMC3_CONTEXT_OFFSET			0x0124
+#define DRA7XX_PM_L4PER_MMC4_WKDEP_OFFSET			0x0128
+#define DRA7XX_RM_L4PER_MMC4_CONTEXT_OFFSET			0x012c
+#define DRA7XX_PM_L4PER_TIMER16_WKDEP_OFFSET			0x0130
+#define DRA7XX_RM_L4PER3_TIMER16_CONTEXT_OFFSET			0x0134
+#define DRA7XX_PM_L4PER2_QSPI_WKDEP_OFFSET			0x0138
+#define DRA7XX_RM_L4PER2_QSPI_CONTEXT_OFFSET			0x013c
+#define DRA7XX_PM_L4PER_UART1_WKDEP_OFFSET			0x0140
+#define DRA7XX_RM_L4PER_UART1_CONTEXT_OFFSET			0x0144
+#define DRA7XX_PM_L4PER_UART2_WKDEP_OFFSET			0x0148
+#define DRA7XX_RM_L4PER_UART2_CONTEXT_OFFSET			0x014c
+#define DRA7XX_PM_L4PER_UART3_WKDEP_OFFSET			0x0150
+#define DRA7XX_RM_L4PER_UART3_CONTEXT_OFFSET			0x0154
+#define DRA7XX_PM_L4PER_UART4_WKDEP_OFFSET			0x0158
+#define DRA7XX_RM_L4PER_UART4_CONTEXT_OFFSET			0x015c
+#define DRA7XX_PM_L4PER2_MCASP2_WKDEP_OFFSET			0x0160
+#define DRA7XX_RM_L4PER2_MCASP2_CONTEXT_OFFSET			0x0164
+#define DRA7XX_PM_L4PER2_MCASP3_WKDEP_OFFSET			0x0168
+#define DRA7XX_RM_L4PER2_MCASP3_CONTEXT_OFFSET			0x016c
+#define DRA7XX_PM_L4PER_UART5_WKDEP_OFFSET			0x0170
+#define DRA7XX_RM_L4PER_UART5_CONTEXT_OFFSET			0x0174
+#define DRA7XX_PM_L4PER2_MCASP5_WKDEP_OFFSET			0x0178
+#define DRA7XX_RM_L4PER2_MCASP5_CONTEXT_OFFSET			0x017c
+#define DRA7XX_PM_L4PER2_MCASP6_WKDEP_OFFSET			0x0180
+#define DRA7XX_RM_L4PER2_MCASP6_CONTEXT_OFFSET			0x0184
+#define DRA7XX_PM_L4PER2_MCASP7_WKDEP_OFFSET			0x0188
+#define DRA7XX_RM_L4PER2_MCASP7_CONTEXT_OFFSET			0x018c
+#define DRA7XX_PM_L4PER2_MCASP8_WKDEP_OFFSET			0x0190
+#define DRA7XX_RM_L4PER2_MCASP8_CONTEXT_OFFSET			0x0194
+#define DRA7XX_PM_L4PER2_MCASP4_WKDEP_OFFSET			0x0198
+#define DRA7XX_RM_L4PER2_MCASP4_CONTEXT_OFFSET			0x019c
+#define DRA7XX_RM_L4SEC_AES1_CONTEXT_OFFSET			0x01a4
+#define DRA7XX_RM_L4SEC_AES2_CONTEXT_OFFSET			0x01ac
+#define DRA7XX_RM_L4SEC_DES3DES_CONTEXT_OFFSET			0x01b4
+#define DRA7XX_RM_L4SEC_FPKA_CONTEXT_OFFSET			0x01bc
+#define DRA7XX_RM_L4SEC_RNG_CONTEXT_OFFSET			0x01c4
+#define DRA7XX_RM_L4SEC_SHA2MD51_CONTEXT_OFFSET			0x01cc
+#define DRA7XX_PM_L4PER2_UART7_WKDEP_OFFSET			0x01d0
+#define DRA7XX_RM_L4PER2_UART7_CONTEXT_OFFSET			0x01d4
+#define DRA7XX_RM_L4SEC_DMA_CRYPTO_CONTEXT_OFFSET		0x01dc
+#define DRA7XX_PM_L4PER2_UART8_WKDEP_OFFSET			0x01e0
+#define DRA7XX_RM_L4PER2_UART8_CONTEXT_OFFSET			0x01e4
+#define DRA7XX_PM_L4PER2_UART9_WKDEP_OFFSET			0x01e8
+#define DRA7XX_RM_L4PER2_UART9_CONTEXT_OFFSET			0x01ec
+#define DRA7XX_PM_L4PER2_DCAN2_WKDEP_OFFSET			0x01f0
+#define DRA7XX_RM_L4PER2_DCAN2_CONTEXT_OFFSET			0x01f4
+#define DRA7XX_RM_L4SEC_SHA2MD52_CONTEXT_OFFSET			0x01fc
+
+/* PRM.CUSTEFUSE_PRM register offsets */
+#define DRA7XX_PM_CUSTEFUSE_PWRSTCTRL_OFFSET			0x0000
+#define DRA7XX_PM_CUSTEFUSE_PWRSTST_OFFSET			0x0004
+#define DRA7XX_RM_CUSTEFUSE_EFUSE_CTRL_CUST_CONTEXT_OFFSET	0x0024
+
+/* PRM.WKUPAON_PRM register offsets */
+#define DRA7XX_RM_WKUPAON_L4_WKUP_CONTEXT_OFFSET		0x0000
+#define DRA7XX_PM_WKUPAON_WD_TIMER1_WKDEP_OFFSET		0x0004
+#define DRA7XX_RM_WKUPAON_WD_TIMER1_CONTEXT_OFFSET		0x0008
+#define DRA7XX_PM_WKUPAON_WD_TIMER2_WKDEP_OFFSET		0x000c
+#define DRA7XX_RM_WKUPAON_WD_TIMER2_CONTEXT_OFFSET		0x0010
+#define DRA7XX_PM_WKUPAON_GPIO1_WKDEP_OFFSET			0x0014
+#define DRA7XX_RM_WKUPAON_GPIO1_CONTEXT_OFFSET			0x0018
+#define DRA7XX_PM_WKUPAON_TIMER1_WKDEP_OFFSET			0x001c
+#define DRA7XX_RM_WKUPAON_TIMER1_CONTEXT_OFFSET			0x0020
+#define DRA7XX_PM_WKUPAON_TIMER12_WKDEP_OFFSET			0x0024
+#define DRA7XX_RM_WKUPAON_TIMER12_CONTEXT_OFFSET		0x0028
+#define DRA7XX_RM_WKUPAON_COUNTER_32K_CONTEXT_OFFSET		0x0030
+#define DRA7XX_RM_WKUPAON_SAR_RAM_CONTEXT_OFFSET		0x0040
+#define DRA7XX_PM_WKUPAON_KBD_WKDEP_OFFSET			0x0054
+#define DRA7XX_RM_WKUPAON_KBD_CONTEXT_OFFSET			0x0058
+#define DRA7XX_PM_WKUPAON_UART10_WKDEP_OFFSET			0x005c
+#define DRA7XX_RM_WKUPAON_UART10_CONTEXT_OFFSET			0x0060
+#define DRA7XX_PM_WKUPAON_DCAN1_WKDEP_OFFSET			0x0064
+#define DRA7XX_RM_WKUPAON_DCAN1_CONTEXT_OFFSET			0x0068
+#define DRA7XX_PM_WKUPAON_ADC_WKDEP_OFFSET				0x007c
+#define DRA7XX_RM_WKUPAON_ADC_CONTEXT_OFFSET			0x0080
+#define DRA7XX_RM_WKUPAON_SPARE_SAFETY1_CONTEXT_OFFSET		0x0090
+#define DRA7XX_RM_WKUPAON_SPARE_SAFETY2_CONTEXT_OFFSET		0x0098
+#define DRA7XX_RM_WKUPAON_SPARE_SAFETY3_CONTEXT_OFFSET		0x00a0
+#define DRA7XX_RM_WKUPAON_SPARE_SAFETY4_CONTEXT_OFFSET		0x00a8
+#define DRA7XX_RM_WKUPAON_SPARE_UNKNOWN2_CONTEXT_OFFSET		0x00b0
+#define DRA7XX_RM_WKUPAON_SPARE_UNKNOWN3_CONTEXT_OFFSET		0x00b8
+
+/* PRM.WKUPAON_CM register offsets */
+#define DRA7XX_CM_WKUPAON_CLKSTCTRL_OFFSET			0x0000
+#define DRA7XX_CM_WKUPAON_L4_WKUP_CLKCTRL_OFFSET		0x0020
+#define DRA7XX_CM_WKUPAON_L4_WKUP_CLKCTRL			DRA7XX_PRM_REGADDR(DRA7XX_PRM_WKUPAON_CM_INST, 0x0020)
+#define DRA7XX_CM_WKUPAON_WD_TIMER1_CLKCTRL_OFFSET		0x0028
+#define DRA7XX_CM_WKUPAON_WD_TIMER1_CLKCTRL			DRA7XX_PRM_REGADDR(DRA7XX_PRM_WKUPAON_CM_INST, 0x0028)
+#define DRA7XX_CM_WKUPAON_WD_TIMER2_CLKCTRL_OFFSET		0x0030
+#define DRA7XX_CM_WKUPAON_WD_TIMER2_CLKCTRL			DRA7XX_PRM_REGADDR(DRA7XX_PRM_WKUPAON_CM_INST, 0x0030)
+#define DRA7XX_CM_WKUPAON_GPIO1_CLKCTRL_OFFSET			0x0038
+#define DRA7XX_CM_WKUPAON_GPIO1_CLKCTRL				DRA7XX_PRM_REGADDR(DRA7XX_PRM_WKUPAON_CM_INST, 0x0038)
+#define DRA7XX_CM_WKUPAON_TIMER1_CLKCTRL_OFFSET			0x0040
+#define DRA7XX_CM_WKUPAON_TIMER1_CLKCTRL			DRA7XX_PRM_REGADDR(DRA7XX_PRM_WKUPAON_CM_INST, 0x0040)
+#define DRA7XX_CM_WKUPAON_TIMER12_CLKCTRL_OFFSET		0x0048
+#define DRA7XX_CM_WKUPAON_TIMER12_CLKCTRL			DRA7XX_PRM_REGADDR(DRA7XX_PRM_WKUPAON_CM_INST, 0x0048)
+#define DRA7XX_CM_WKUPAON_COUNTER_32K_CLKCTRL_OFFSET		0x0050
+#define DRA7XX_CM_WKUPAON_COUNTER_32K_CLKCTRL			DRA7XX_PRM_REGADDR(DRA7XX_PRM_WKUPAON_CM_INST, 0x0050)
+#define DRA7XX_CM_WKUPAON_SAR_RAM_CLKCTRL_OFFSET		0x0060
+#define DRA7XX_CM_WKUPAON_SAR_RAM_CLKCTRL			DRA7XX_PRM_REGADDR(DRA7XX_PRM_WKUPAON_CM_INST, 0x0060)
+#define DRA7XX_CM_WKUPAON_KBD_CLKCTRL_OFFSET			0x0078
+#define DRA7XX_CM_WKUPAON_KBD_CLKCTRL				DRA7XX_PRM_REGADDR(DRA7XX_PRM_WKUPAON_CM_INST, 0x0078)
+#define DRA7XX_CM_WKUPAON_UART10_CLKCTRL_OFFSET			0x0080
+#define DRA7XX_CM_WKUPAON_UART10_CLKCTRL			DRA7XX_PRM_REGADDR(DRA7XX_PRM_WKUPAON_CM_INST, 0x0080)
+#define DRA7XX_CM_WKUPAON_DCAN1_CLKCTRL_OFFSET			0x0088
+#define DRA7XX_CM_WKUPAON_DCAN1_CLKCTRL				DRA7XX_PRM_REGADDR(DRA7XX_PRM_WKUPAON_CM_INST, 0x0088)
+#define DRA7XX_CM_WKUPAON_SCRM_CLKCTRL_OFFSET			0x0090
+#define DRA7XX_CM_WKUPAON_SCRM_CLKCTRL				DRA7XX_PRM_REGADDR(DRA7XX_PRM_WKUPAON_CM_INST, 0x0090)
+#define DRA7XX_CM_WKUPAON_IO_SRCOMP_CLKCTRL_OFFSET		0x0098
+#define DRA7XX_CM_WKUPAON_IO_SRCOMP_CLKCTRL			DRA7XX_PRM_REGADDR(DRA7XX_PRM_WKUPAON_CM_INST, 0x0098)
+#define DRA7XX_CM_WKUPAON_ADC_CLKCTRL_OFFSET			0x00a0
+#define DRA7XX_CM_WKUPAON_ADC_CLKCTRL				DRA7XX_PRM_REGADDR(DRA7XX_PRM_WKUPAON_CM_INST, 0x00a0)
+#define DRA7XX_CM_WKUPAON_SPARE_SAFETY1_CLKCTRL_OFFSET		0x00b0
+#define DRA7XX_CM_WKUPAON_SPARE_SAFETY1_CLKCTRL			DRA7XX_PRM_REGADDR(DRA7XX_PRM_WKUPAON_CM_INST, 0x00b0)
+#define DRA7XX_CM_WKUPAON_SPARE_SAFETY2_CLKCTRL_OFFSET		0x00b8
+#define DRA7XX_CM_WKUPAON_SPARE_SAFETY2_CLKCTRL			DRA7XX_PRM_REGADDR(DRA7XX_PRM_WKUPAON_CM_INST, 0x00b8)
+#define DRA7XX_CM_WKUPAON_SPARE_SAFETY3_CLKCTRL_OFFSET		0x00c0
+#define DRA7XX_CM_WKUPAON_SPARE_SAFETY3_CLKCTRL			DRA7XX_PRM_REGADDR(DRA7XX_PRM_WKUPAON_CM_INST, 0x00c0)
+#define DRA7XX_CM_WKUPAON_SPARE_SAFETY4_CLKCTRL_OFFSET		0x00c8
+#define DRA7XX_CM_WKUPAON_SPARE_SAFETY4_CLKCTRL			DRA7XX_PRM_REGADDR(DRA7XX_PRM_WKUPAON_CM_INST, 0x00c8)
+#define DRA7XX_CM_WKUPAON_SPARE_UNKNOWN2_CLKCTRL_OFFSET		0x00d0
+#define DRA7XX_CM_WKUPAON_SPARE_UNKNOWN2_CLKCTRL		DRA7XX_PRM_REGADDR(DRA7XX_PRM_WKUPAON_CM_INST, 0x00d0)
+#define DRA7XX_CM_WKUPAON_SPARE_UNKNOWN3_CLKCTRL_OFFSET		0x00d8
+#define DRA7XX_CM_WKUPAON_SPARE_UNKNOWN3_CLKCTRL		DRA7XX_PRM_REGADDR(DRA7XX_PRM_WKUPAON_CM_INST, 0x00d8)
+
+/* PRM.EMU_PRM register offsets */
+#define DRA7XX_PM_EMU_PWRSTCTRL_OFFSET				0x0000
+#define DRA7XX_PM_EMU_PWRSTST_OFFSET				0x0004
+#define DRA7XX_RM_EMU_DEBUGSS_CONTEXT_OFFSET			0x0024
+
+/* PRM.EMU_CM register offsets */
+#define DRA7XX_CM_EMU_CLKSTCTRL_OFFSET				0x0000
+#define DRA7XX_CM_EMU_DEBUGSS_CLKCTRL_OFFSET			0x0004
+#define DRA7XX_CM_EMU_DEBUGSS_CLKCTRL				DRA7XX_PRM_REGADDR(DRA7XX_PRM_EMU_CM_INST, 0x0004)
+#define DRA7XX_CM_EMU_DYNAMICDEP_OFFSET				0x0008
+#define DRA7XX_CM_EMU_MPU_EMU_DBG_CLKCTRL_OFFSET		0x000c
+#define DRA7XX_CM_EMU_MPU_EMU_DBG_CLKCTRL			DRA7XX_PRM_REGADDR(DRA7XX_PRM_EMU_CM_INST, 0x000c)
+
+/* PRM.DSP2_PRM register offsets */
+#define DRA7XX_PM_DSP2_PWRSTCTRL_OFFSET				0x0000
+#define DRA7XX_PM_DSP2_PWRSTST_OFFSET				0x0004
+#define DRA7XX_RM_DSP2_RSTCTRL_OFFSET				0x0010
+#define DRA7XX_RM_DSP2_RSTST_OFFSET				0x0014
+#define DRA7XX_RM_DSP2_DSP2_CONTEXT_OFFSET			0x0024
+
+/* PRM.EVE1_PRM register offsets */
+#define DRA7XX_PM_EVE1_PWRSTCTRL_OFFSET				0x0000
+#define DRA7XX_PM_EVE1_PWRSTST_OFFSET				0x0004
+#define DRA7XX_RM_EVE1_RSTCTRL_OFFSET				0x0010
+#define DRA7XX_RM_EVE1_RSTST_OFFSET				0x0014
+#define DRA7XX_PM_EVE1_EVE1_WKDEP_OFFSET			0x0020
+#define DRA7XX_RM_EVE1_EVE1_CONTEXT_OFFSET			0x0024
+
+/* PRM.EVE2_PRM register offsets */
+#define DRA7XX_PM_EVE2_PWRSTCTRL_OFFSET				0x0000
+#define DRA7XX_PM_EVE2_PWRSTST_OFFSET				0x0004
+#define DRA7XX_RM_EVE2_RSTCTRL_OFFSET				0x0010
+#define DRA7XX_RM_EVE2_RSTST_OFFSET				0x0014
+#define DRA7XX_PM_EVE2_EVE2_WKDEP_OFFSET			0x0020
+#define DRA7XX_RM_EVE2_EVE2_CONTEXT_OFFSET			0x0024
+
+/* PRM.EVE3_PRM register offsets */
+#define DRA7XX_PM_EVE3_PWRSTCTRL_OFFSET				0x0000
+#define DRA7XX_PM_EVE3_PWRSTST_OFFSET				0x0004
+#define DRA7XX_RM_EVE3_RSTCTRL_OFFSET				0x0010
+#define DRA7XX_RM_EVE3_RSTST_OFFSET				0x0014
+#define DRA7XX_PM_EVE3_EVE3_WKDEP_OFFSET			0x0020
+#define DRA7XX_RM_EVE3_EVE3_CONTEXT_OFFSET			0x0024
+
+/* PRM.EVE4_PRM register offsets */
+#define DRA7XX_PM_EVE4_PWRSTCTRL_OFFSET				0x0000
+#define DRA7XX_PM_EVE4_PWRSTST_OFFSET				0x0004
+#define DRA7XX_RM_EVE4_RSTCTRL_OFFSET				0x0010
+#define DRA7XX_RM_EVE4_RSTST_OFFSET				0x0014
+#define DRA7XX_PM_EVE4_EVE4_WKDEP_OFFSET			0x0020
+#define DRA7XX_RM_EVE4_EVE4_CONTEXT_OFFSET			0x0024
+
+/* PRM.RTC_PRM register offsets */
+#define DRA7XX_PM_RTC_RTCSS_WKDEP_OFFSET			0x0000
+#define DRA7XX_RM_RTC_RTCSS_CONTEXT_OFFSET			0x0004
+
+/* PRM.VPE_PRM register offsets */
+#define DRA7XX_PM_VPE_PWRSTCTRL_OFFSET				0x0000
+#define DRA7XX_PM_VPE_PWRSTST_OFFSET				0x0004
+#define DRA7XX_PM_VPE_VPE_WKDEP_OFFSET				0x0020
+#define DRA7XX_RM_VPE_VPE_CONTEXT_OFFSET			0x0024
+
+/* PRM.DEVICE_PRM register offsets */
+#define DRA7XX_PRM_RSTCTRL_OFFSET				0x0000
+#define DRA7XX_PRM_RSTST_OFFSET					0x0004
+#define DRA7XX_PRM_RSTTIME_OFFSET				0x0008
+#define DRA7XX_PRM_CLKREQCTRL_OFFSET				0x000c
+#define DRA7XX_PRM_VOLTCTRL_OFFSET				0x0010
+#define DRA7XX_PRM_PWRREQCTRL_OFFSET				0x0014
+#define DRA7XX_PRM_PSCON_COUNT_OFFSET				0x0018
+#define DRA7XX_PRM_IO_COUNT_OFFSET				0x001c
+#define DRA7XX_PRM_IO_PMCTRL_OFFSET				0x0020
+#define DRA7XX_PRM_VOLTSETUP_WARMRESET_OFFSET			0x0024
+#define DRA7XX_PRM_VOLTSETUP_CORE_OFF_OFFSET			0x0028
+#define DRA7XX_PRM_VOLTSETUP_MPU_OFF_OFFSET			0x002c
+#define DRA7XX_PRM_VOLTSETUP_MM_OFF_OFFSET			0x0030
+#define DRA7XX_PRM_VOLTSETUP_CORE_RET_SLEEP_OFFSET		0x0034
+#define DRA7XX_PRM_VOLTSETUP_MPU_RET_SLEEP_OFFSET		0x0038
+#define DRA7XX_PRM_VOLTSETUP_MM_RET_SLEEP_OFFSET		0x003c
+#define DRA7XX_PRM_SRAM_COUNT_OFFSET				0x00bc
+#define DRA7XX_PRM_SRAM_WKUP_SETUP_OFFSET			0x00c0
+#define DRA7XX_PRM_SLDO_CORE_SETUP_OFFSET			0x00c4
+#define DRA7XX_PRM_SLDO_CORE_CTRL_OFFSET			0x00c8
+#define DRA7XX_PRM_SLDO_MPU_SETUP_OFFSET			0x00cc
+#define DRA7XX_PRM_SLDO_MPU_CTRL_OFFSET				0x00d0
+#define DRA7XX_PRM_SLDO_GPU_SETUP_OFFSET			0x00d4
+#define DRA7XX_PRM_SLDO_GPU_CTRL_OFFSET				0x00d8
+#define DRA7XX_PRM_ABBLDO_MPU_SETUP_OFFSET			0x00dc
+#define DRA7XX_PRM_ABBLDO_MPU_CTRL_OFFSET			0x00e0
+#define DRA7XX_PRM_ABBLDO_GPU_SETUP_OFFSET			0x00e4
+#define DRA7XX_PRM_ABBLDO_GPU_CTRL_OFFSET			0x00e8
+#define DRA7XX_PRM_BANDGAP_SETUP_OFFSET				0x00ec
+#define DRA7XX_PRM_DEVICE_OFF_CTRL_OFFSET			0x00f0
+#define DRA7XX_PRM_PHASE1_CNDP_OFFSET				0x00f4
+#define DRA7XX_PRM_PHASE2A_CNDP_OFFSET				0x00f8
+#define DRA7XX_PRM_PHASE2B_CNDP_OFFSET				0x00fc
+#define DRA7XX_PRM_MODEM_IF_CTRL_OFFSET				0x0100
+#define DRA7XX_PRM_VOLTST_MPU_OFFSET				0x0110
+#define DRA7XX_PRM_VOLTST_MM_OFFSET				0x0114
+#define DRA7XX_PRM_SLDO_DSPEVE_SETUP_OFFSET			0x0118
+#define DRA7XX_PRM_SLDO_IVA_SETUP_OFFSET			0x011c
+#define DRA7XX_PRM_ABBLDO_DSPEVE_CTRL_OFFSET			0x0120
+#define DRA7XX_PRM_ABBLDO_IVA_CTRL_OFFSET			0x0124
+#define DRA7XX_PRM_SLDO_DSPEVE_CTRL_OFFSET			0x0128
+#define DRA7XX_PRM_SLDO_IVA_CTRL_OFFSET				0x012c
+#define DRA7XX_PRM_ABBLDO_DSPEVE_SETUP_OFFSET			0x0130
+#define DRA7XX_PRM_ABBLDO_IVA_SETUP_OFFSET			0x0134
+
+#endif
diff --git a/arch/arm/mach-omap2/prminst44xx.c b/arch/arm/mach-omap2/prminst44xx.c
index c12320c0..6334b96 100644
--- a/arch/arm/mach-omap2/prminst44xx.c
+++ b/arch/arm/mach-omap2/prminst44xx.c
@@ -20,10 +20,13 @@
 #include "common.h"
 #include "prcm-common.h"
 #include "prm44xx.h"
+#include "prm54xx.h"
+#include "prm7xx.h"
 #include "prminst44xx.h"
 #include "prm-regbits-44xx.h"
 #include "prcm44xx.h"
 #include "prcm_mpu44xx.h"
+#include "soc.h"
 
 static void __iomem *_prm_bases[OMAP4_MAX_PRCM_PARTITIONS];
 
@@ -165,10 +168,19 @@
 void omap4_prminst_global_warm_sw_reset(void)
 {
 	u32 v;
+	s16 dev_inst;
 
-	v = omap4_prminst_read_inst_reg(OMAP4430_PRM_PARTITION,
-				    OMAP4430_PRM_DEVICE_INST,
-				    OMAP4_PRM_RSTCTRL_OFFSET);
+	if (cpu_is_omap44xx())
+		dev_inst = OMAP4430_PRM_DEVICE_INST;
+	else if (soc_is_omap54xx())
+		dev_inst = OMAP54XX_PRM_DEVICE_INST;
+	else if (soc_is_dra7xx())
+		dev_inst = DRA7XX_PRM_DEVICE_INST;
+	else
+		return;
+
+	v = omap4_prminst_read_inst_reg(OMAP4430_PRM_PARTITION, dev_inst,
+					OMAP4_PRM_RSTCTRL_OFFSET);
 	v |= OMAP4430_RST_GLOBAL_WARM_SW_MASK;
 	omap4_prminst_write_inst_reg(v, OMAP4430_PRM_PARTITION,
 				 OMAP4430_PRM_DEVICE_INST,
diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig
index e817fde..1f94c31 100644
--- a/arch/arm/mach-shmobile/Kconfig
+++ b/arch/arm/mach-shmobile/Kconfig
@@ -109,18 +109,22 @@
 
 comment "SH-Mobile Board Type"
 
-config MACH_AG5EVM
-	bool "AG5EVM board"
-	depends on ARCH_SH73A0
-	select ARCH_REQUIRE_GPIOLIB
-	select REGULATOR_FIXED_VOLTAGE if REGULATOR
-	select SH_LCD_MIPI_DSI
-
 config MACH_APE6EVM
 	bool "APE6EVM board"
 	depends on ARCH_R8A73A4
 	select USE_OF
 
+config MACH_APE6EVM_REFERENCE
+	bool "APE6EVM board - Reference Device Tree Implementation"
+	depends on ARCH_R8A73A4
+	select USE_OF
+	---help---
+	   Use reference implementation of APE6EVM board support
+	   which makes a greater use of device tree at the expense
+	   of not supporting a number of devices.
+
+	   This is intended to aid developers
+
 config MACH_MACKEREL
 	bool "mackerel board"
 	depends on ARCH_SH7372
@@ -129,12 +133,6 @@
 	select SND_SOC_AK4642 if SND_SIMPLE_CARD
 	select USE_OF
 
-config MACH_KOTA2
-	bool "KOTA2 board"
-	depends on ARCH_SH73A0
-	select ARCH_REQUIRE_GPIOLIB
-	select REGULATOR_FIXED_VOLTAGE if REGULATOR
-
 config MACH_ARMADILLO800EVA
 	bool "Armadillo-800 EVA board"
 	depends on ARCH_R8A7740
@@ -165,11 +163,26 @@
 	select REGULATOR_FIXED_VOLTAGE if REGULATOR
 	select USE_OF
 
+config MACH_BOCKW_REFERENCE
+	bool "BOCK-W  - Reference Device Tree Implementation"
+	depends on ARCH_R8A7778
+	select ARCH_REQUIRE_GPIOLIB
+	select RENESAS_INTC_IRQPIN
+	select REGULATOR_FIXED_VOLTAGE if REGULATOR
+	select USE_OF
+	---help---
+	   Use reference implementation of BockW board support
+	   which makes use of device tree at the expense
+	   of not supporting a number of devices.
+
+	   This is intended to aid developers
+
 config MACH_MARZEN
 	bool "MARZEN board"
 	depends on ARCH_R8A7779
 	select ARCH_REQUIRE_GPIOLIB
 	select REGULATOR_FIXED_VOLTAGE if REGULATOR
+	select USE_OF
 
 config MACH_MARZEN_REFERENCE
 	bool "MARZEN board - Reference Device Tree Implementation"
@@ -189,6 +202,17 @@
 	depends on ARCH_R8A7790
 	select USE_OF
 
+config MACH_LAGER_REFERENCE
+	bool "Lager board - Reference Device Tree Implementation"
+	depends on ARCH_R8A7790
+	select USE_OF
+	---help---
+	   Use reference implementation of Lager board support
+	   which makes use of device tree at the expense
+	   of not supporting a number of devices.
+
+	   This is intended to aid developers
+
 config MACH_KZM9D
 	bool "KZM9D board"
 	depends on ARCH_EMEV2
diff --git a/arch/arm/mach-shmobile/Makefile b/arch/arm/mach-shmobile/Makefile
index b150c45..2705bfa 100644
--- a/arch/arm/mach-shmobile/Makefile
+++ b/arch/arm/mach-shmobile/Makefile
@@ -11,9 +11,9 @@
 obj-$(CONFIG_ARCH_SH7372)	+= setup-sh7372.o intc-sh7372.o
 obj-$(CONFIG_ARCH_SH73A0)	+= setup-sh73a0.o intc-sh73a0.o
 obj-$(CONFIG_ARCH_R8A73A4)	+= setup-r8a73a4.o
-obj-$(CONFIG_ARCH_R8A7740)	+= setup-r8a7740.o intc-r8a7740.o
+obj-$(CONFIG_ARCH_R8A7740)	+= setup-r8a7740.o
 obj-$(CONFIG_ARCH_R8A7778)	+= setup-r8a7778.o
-obj-$(CONFIG_ARCH_R8A7779)	+= setup-r8a7779.o intc-r8a7779.o
+obj-$(CONFIG_ARCH_R8A7779)	+= setup-r8a7779.o
 obj-$(CONFIG_ARCH_R8A7790)	+= setup-r8a7790.o
 obj-$(CONFIG_ARCH_EMEV2)	+= setup-emev2.o
 
@@ -32,32 +32,31 @@
 
 # SMP objects
 smp-y				:= platsmp.o headsmp.o
-smp-$(CONFIG_ARCH_SH73A0)	+= smp-sh73a0.o headsmp-scu.o
-smp-$(CONFIG_ARCH_R8A7779)	+= smp-r8a7779.o headsmp-scu.o
-smp-$(CONFIG_ARCH_EMEV2)	+= smp-emev2.o headsmp-scu.o
+smp-$(CONFIG_ARCH_SH73A0)	+= smp-sh73a0.o headsmp-scu.o platsmp-scu.o
+smp-$(CONFIG_ARCH_R8A7779)	+= smp-r8a7779.o headsmp-scu.o platsmp-scu.o
+smp-$(CONFIG_ARCH_EMEV2)	+= smp-emev2.o headsmp-scu.o platsmp-scu.o
 
 # IRQ objects
 obj-$(CONFIG_ARCH_SH7372)	+= entry-intc.o
-obj-$(CONFIG_ARCH_R8A7740)	+= entry-intc.o
 
 # PM objects
 obj-$(CONFIG_SUSPEND)		+= suspend.o
 obj-$(CONFIG_CPU_IDLE)		+= cpuidle.o
-obj-$(CONFIG_ARCH_SHMOBILE)	+= pm-rmobile.o
-obj-$(CONFIG_ARCH_SH7372)	+= pm-sh7372.o sleep-sh7372.o
-obj-$(CONFIG_ARCH_R8A7740)	+= pm-r8a7740.o
-obj-$(CONFIG_ARCH_R8A7779)	+= pm-r8a7779.o
+obj-$(CONFIG_ARCH_SH7372)	+= pm-sh7372.o sleep-sh7372.o pm-rmobile.o
 obj-$(CONFIG_ARCH_SH73A0)	+= pm-sh73a0.o
+obj-$(CONFIG_ARCH_R8A7740)	+= pm-r8a7740.o pm-rmobile.o
+obj-$(CONFIG_ARCH_R8A7779)	+= pm-r8a7779.o
 
 # Board objects
-obj-$(CONFIG_MACH_AG5EVM)	+= board-ag5evm.o
 obj-$(CONFIG_MACH_APE6EVM)	+= board-ape6evm.o
+obj-$(CONFIG_MACH_APE6EVM_REFERENCE)	+= board-ape6evm-reference.o
 obj-$(CONFIG_MACH_MACKEREL)	+= board-mackerel.o
-obj-$(CONFIG_MACH_KOTA2)	+= board-kota2.o
 obj-$(CONFIG_MACH_BOCKW)	+= board-bockw.o
+obj-$(CONFIG_MACH_BOCKW_REFERENCE)	+= board-bockw-reference.o
 obj-$(CONFIG_MACH_MARZEN)	+= board-marzen.o
 obj-$(CONFIG_MACH_MARZEN_REFERENCE)	+= board-marzen-reference.o
 obj-$(CONFIG_MACH_LAGER)	+= board-lager.o
+obj-$(CONFIG_MACH_LAGER_REFERENCE)	+= board-lager-reference.o
 obj-$(CONFIG_MACH_ARMADILLO800EVA)	+= board-armadillo800eva.o
 obj-$(CONFIG_MACH_ARMADILLO800EVA_REFERENCE)	+= board-armadillo800eva-reference.o
 obj-$(CONFIG_MACH_KZM9D)	+= board-kzm9d.o
diff --git a/arch/arm/mach-shmobile/Makefile.boot b/arch/arm/mach-shmobile/Makefile.boot
index 7785c52..6a504fe 100644
--- a/arch/arm/mach-shmobile/Makefile.boot
+++ b/arch/arm/mach-shmobile/Makefile.boot
@@ -1,16 +1,17 @@
 # per-board load address for uImage
 loadaddr-y	:=
-loadaddr-$(CONFIG_MACH_AG5EVM) += 0x40008000
 loadaddr-$(CONFIG_MACH_APE6EVM) += 0x40008000
+loadaddr-$(CONFIG_MACH_APE6EVM_REFERENCE) += 0x40008000
 loadaddr-$(CONFIG_MACH_ARMADILLO800EVA) += 0x40008000
 loadaddr-$(CONFIG_MACH_ARMADILLO800EVA_REFERENCE) += 0x40008000
 loadaddr-$(CONFIG_MACH_BOCKW) += 0x60008000
-loadaddr-$(CONFIG_MACH_KOTA2) += 0x41008000
+loadaddr-$(CONFIG_MACH_BOCKW_REFERENCE) += 0x60008000
 loadaddr-$(CONFIG_MACH_KZM9D) += 0x40008000
 loadaddr-$(CONFIG_MACH_KZM9D_REFERENCE) += 0x40008000
 loadaddr-$(CONFIG_MACH_KZM9G) += 0x41008000
 loadaddr-$(CONFIG_MACH_KZM9G_REFERENCE) += 0x41008000
 loadaddr-$(CONFIG_MACH_LAGER) += 0x40008000
+loadaddr-$(CONFIG_MACH_LAGER_REFERENCE) += 0x40008000
 loadaddr-$(CONFIG_MACH_MACKEREL) += 0x40008000
 loadaddr-$(CONFIG_MACH_MARZEN) += 0x60008000
 loadaddr-$(CONFIG_MACH_MARZEN_REFERENCE) += 0x60008000
diff --git a/arch/arm/mach-shmobile/board-ag5evm.c b/arch/arm/mach-shmobile/board-ag5evm.c
deleted file mode 100644
index f6d6449..0000000
--- a/arch/arm/mach-shmobile/board-ag5evm.c
+++ /dev/null
@@ -1,639 +0,0 @@
-/*
- * arch/arm/mach-shmobile/board-ag5evm.c
- *
- * Copyright (C) 2010  Takashi Yoshii <yoshii.takashi.zj@renesas.com>
- * Copyright (C) 2009  Yoshihiro Shimoda <shimoda.yoshihiro@renesas.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/pinctrl/machine.h>
-#include <linux/pinctrl/pinconf-generic.h>
-#include <linux/platform_device.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-#include <linux/dma-mapping.h>
-#include <linux/regulator/fixed.h>
-#include <linux/regulator/machine.h>
-#include <linux/serial_sci.h>
-#include <linux/smsc911x.h>
-#include <linux/gpio.h>
-#include <linux/videodev2.h>
-#include <linux/input.h>
-#include <linux/input/sh_keysc.h>
-#include <linux/mmc/host.h>
-#include <linux/mmc/sh_mmcif.h>
-#include <linux/mmc/sh_mobile_sdhi.h>
-#include <linux/mfd/tmio.h>
-#include <linux/platform_data/bd6107.h>
-#include <linux/sh_clk.h>
-#include <linux/irqchip/arm-gic.h>
-#include <video/sh_mobile_lcdc.h>
-#include <video/sh_mipi_dsi.h>
-#include <sound/sh_fsi.h>
-#include <mach/hardware.h>
-#include <mach/irqs.h>
-#include <mach/sh73a0.h>
-#include <mach/common.h>
-#include <asm/mach-types.h>
-#include <asm/mach/arch.h>
-#include <asm/hardware/cache-l2x0.h>
-#include <asm/traps.h>
-
-/* Dummy supplies, where voltage doesn't matter */
-static struct regulator_consumer_supply dummy_supplies[] = {
-	REGULATOR_SUPPLY("vddvario", "smsc911x"),
-	REGULATOR_SUPPLY("vdd33a", "smsc911x"),
-};
-
-static struct resource smsc9220_resources[] = {
-	[0] = {
-		.start		= 0x14000000,
-		.end		= 0x14000000 + SZ_64K - 1,
-		.flags		= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start		= SH73A0_PINT0_IRQ(2), /* PINTA2 */
-		.flags		= IORESOURCE_IRQ,
-	},
-};
-
-static struct smsc911x_platform_config smsc9220_platdata = {
-	.flags		= SMSC911X_USE_32BIT | SMSC911X_SAVE_MAC_ADDRESS,
-	.phy_interface	= PHY_INTERFACE_MODE_MII,
-	.irq_polarity	= SMSC911X_IRQ_POLARITY_ACTIVE_LOW,
-	.irq_type	= SMSC911X_IRQ_TYPE_PUSH_PULL,
-};
-
-static struct platform_device eth_device = {
-	.name		= "smsc911x",
-	.id		= 0,
-	.dev  = {
-		.platform_data = &smsc9220_platdata,
-	},
-	.resource	= smsc9220_resources,
-	.num_resources	= ARRAY_SIZE(smsc9220_resources),
-};
-
-static struct sh_keysc_info keysc_platdata = {
-	.mode		= SH_KEYSC_MODE_6,
-	.scan_timing	= 3,
-	.delay		= 100,
-	.keycodes	= {
-		KEY_A, KEY_B, KEY_C, KEY_D, KEY_E, KEY_F, KEY_G,
-		KEY_H, KEY_I, KEY_J, KEY_K, KEY_L, KEY_M, KEY_N,
-		KEY_O, KEY_P, KEY_Q, KEY_R, KEY_S, KEY_T, KEY_U,
-		KEY_V, KEY_W, KEY_X, KEY_Y, KEY_Z, KEY_HOME, KEY_SLEEP,
-		KEY_SPACE, KEY_9, KEY_6, KEY_3, KEY_WAKEUP, KEY_RIGHT, \
-		KEY_COFFEE,
-		KEY_0, KEY_8, KEY_5, KEY_2, KEY_DOWN, KEY_ENTER, KEY_UP,
-		KEY_KPASTERISK, KEY_7, KEY_4, KEY_1, KEY_STOP, KEY_LEFT, \
-		KEY_COMPUTER,
-	},
-};
-
-static struct resource keysc_resources[] = {
-	[0] = {
-		.name	= "KEYSC",
-		.start	= 0xe61b0000,
-		.end	= 0xe61b0098 - 1,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= gic_spi(71),
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-static struct platform_device keysc_device = {
-	.name		= "sh_keysc",
-	.id		= 0,
-	.num_resources	= ARRAY_SIZE(keysc_resources),
-	.resource	= keysc_resources,
-	.dev		= {
-		.platform_data	= &keysc_platdata,
-	},
-};
-
-/* FSI A */
-static struct resource fsi_resources[] = {
-	[0] = {
-		.name	= "FSI",
-		.start	= 0xEC230000,
-		.end	= 0xEC230400 - 1,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start  = gic_spi(146),
-		.flags  = IORESOURCE_IRQ,
-	},
-};
-
-static struct platform_device fsi_device = {
-	.name		= "sh_fsi2",
-	.id		= -1,
-	.num_resources	= ARRAY_SIZE(fsi_resources),
-	.resource	= fsi_resources,
-};
-
-/* Fixed 1.8V regulator to be used by MMCIF */
-static struct regulator_consumer_supply fixed1v8_power_consumers[] =
-{
-	REGULATOR_SUPPLY("vmmc", "sh_mmcif.0"),
-	REGULATOR_SUPPLY("vqmmc", "sh_mmcif.0"),
-};
-
-static struct resource sh_mmcif_resources[] = {
-	[0] = {
-		.name	= "MMCIF",
-		.start	= 0xe6bd0000,
-		.end	= 0xe6bd00ff,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= gic_spi(141),
-		.flags	= IORESOURCE_IRQ,
-	},
-	[2] = {
-		.start	= gic_spi(140),
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-static struct sh_mmcif_plat_data sh_mmcif_platdata = {
-	.sup_pclk	= 0,
-	.ocr		= MMC_VDD_165_195,
-	.caps		= MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE,
-	.slave_id_tx	= SHDMA_SLAVE_MMCIF_TX,
-	.slave_id_rx	= SHDMA_SLAVE_MMCIF_RX,
-};
-
-static struct platform_device mmc_device = {
-	.name		= "sh_mmcif",
-	.id		= 0,
-	.dev		= {
-		.dma_mask		= NULL,
-		.coherent_dma_mask	= 0xffffffff,
-		.platform_data		= &sh_mmcif_platdata,
-	},
-	.num_resources	= ARRAY_SIZE(sh_mmcif_resources),
-	.resource	= sh_mmcif_resources,
-};
-
-/* IrDA */
-static struct resource irda_resources[] = {
-	[0] = {
-		.start	= 0xE6D00000,
-		.end	= 0xE6D01FD4 - 1,
-		.flags  = IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= gic_spi(95),
-		.flags  = IORESOURCE_IRQ,
-	},
-};
-
-static struct platform_device irda_device = {
-	.name           = "sh_irda",
-	.id		= 0,
-	.resource       = irda_resources,
-	.num_resources  = ARRAY_SIZE(irda_resources),
-};
-
-/* MIPI-DSI */
-static struct resource mipidsi0_resources[] = {
-	[0] = {
-		.name	= "DSI0",
-		.start  = 0xfeab0000,
-		.end    = 0xfeab3fff,
-		.flags  = IORESOURCE_MEM,
-	},
-	[1] = {
-		.name	= "DSI0",
-		.start  = 0xfeab4000,
-		.end    = 0xfeab7fff,
-		.flags  = IORESOURCE_MEM,
-	},
-};
-
-static int sh_mipi_set_dot_clock(struct platform_device *pdev,
-				 void __iomem *base,
-				 int enable)
-{
-	struct clk *pck, *phy;
-	int ret;
-
-	pck = clk_get(&pdev->dev, "dsip_clk");
-	if (IS_ERR(pck)) {
-		ret = PTR_ERR(pck);
-		goto sh_mipi_set_dot_clock_pck_err;
-	}
-
-	phy = clk_get(&pdev->dev, "dsiphy_clk");
-	if (IS_ERR(phy)) {
-		ret = PTR_ERR(phy);
-		goto sh_mipi_set_dot_clock_phy_err;
-	}
-
-	if (enable) {
-		clk_set_rate(pck, clk_round_rate(pck,  24000000));
-		clk_set_rate(phy, clk_round_rate(pck, 510000000));
-		clk_enable(pck);
-		clk_enable(phy);
-	} else {
-		clk_disable(pck);
-		clk_disable(phy);
-	}
-
-	ret = 0;
-
-	clk_put(phy);
-sh_mipi_set_dot_clock_phy_err:
-	clk_put(pck);
-sh_mipi_set_dot_clock_pck_err:
-	return ret;
-}
-
-static struct sh_mipi_dsi_info mipidsi0_info = {
-	.data_format	= MIPI_RGB888,
-	.channel	= LCDC_CHAN_MAINLCD,
-	.lane		= 2,
-	.vsynw_offset	= 20,
-	.clksrc		= 1,
-	.flags		= SH_MIPI_DSI_HSABM		|
-			  SH_MIPI_DSI_SYNC_PULSES_MODE	|
-			  SH_MIPI_DSI_HSbyteCLK,
-	.set_dot_clock	= sh_mipi_set_dot_clock,
-};
-
-static struct platform_device mipidsi0_device = {
-	.name           = "sh-mipi-dsi",
-	.num_resources  = ARRAY_SIZE(mipidsi0_resources),
-	.resource       = mipidsi0_resources,
-	.id             = 0,
-	.dev	= {
-		.platform_data	= &mipidsi0_info,
-	},
-};
-
-/* LCDC0 and backlight */
-static const struct fb_videomode lcdc0_modes[] = {
-	{
-		.name		= "R63302(QHD)",
-		.xres		= 544,
-		.yres		= 961,
-		.left_margin	= 72,
-		.right_margin	= 600,
-		.hsync_len	= 16,
-		.upper_margin	= 8,
-		.lower_margin	= 8,
-		.vsync_len	= 2,
-		.sync		= FB_SYNC_VERT_HIGH_ACT | FB_SYNC_HOR_HIGH_ACT,
-	},
-};
-
-static struct sh_mobile_lcdc_info lcdc0_info = {
-	.clock_source = LCDC_CLK_PERIPHERAL,
-	.ch[0] = {
-		.chan = LCDC_CHAN_MAINLCD,
-		.interface_type = RGB24,
-		.clock_divider = 1,
-		.flags = LCDC_FLAGS_DWPOL,
-		.fourcc = V4L2_PIX_FMT_RGB565,
-		.lcd_modes = lcdc0_modes,
-		.num_modes = ARRAY_SIZE(lcdc0_modes),
-		.panel_cfg = {
-			.width = 44,
-			.height = 79,
-		},
-		.tx_dev = &mipidsi0_device,
-	}
-};
-
-static struct resource lcdc0_resources[] = {
-	[0] = {
-		.name	= "LCDC0",
-		.start	= 0xfe940000, /* P4-only space */
-		.end	= 0xfe943fff,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= intcs_evt2irq(0x580),
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-static struct platform_device lcdc0_device = {
-	.name		= "sh_mobile_lcdc_fb",
-	.num_resources	= ARRAY_SIZE(lcdc0_resources),
-	.resource	= lcdc0_resources,
-	.id             = 0,
-	.dev	= {
-		.platform_data	= &lcdc0_info,
-		.coherent_dma_mask = ~0,
-	},
-};
-
-static struct bd6107_platform_data backlight_data = {
-	.fbdev = &lcdc0_device.dev,
-	.reset = 235,
-	.def_value = 0,
-};
-
-static struct i2c_board_info backlight_board_info = {
-	I2C_BOARD_INFO("bd6107", 0x6d),
-	.platform_data = &backlight_data,
-};
-
-/* Fixed 2.8V regulators to be used by SDHI0 */
-static struct regulator_consumer_supply fixed2v8_power_consumers[] =
-{
-	REGULATOR_SUPPLY("vmmc", "sh_mobile_sdhi.0"),
-	REGULATOR_SUPPLY("vqmmc", "sh_mobile_sdhi.0"),
-};
-
-/* SDHI0 */
-static struct sh_mobile_sdhi_info sdhi0_info = {
-	.dma_slave_tx	= SHDMA_SLAVE_SDHI0_TX,
-	.dma_slave_rx	= SHDMA_SLAVE_SDHI0_RX,
-	.tmio_flags	= TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_USE_GPIO_CD,
-	.tmio_caps	= MMC_CAP_SD_HIGHSPEED,
-	.tmio_ocr_mask	= MMC_VDD_27_28 | MMC_VDD_28_29,
-	.cd_gpio	= 251,
-};
-
-static struct resource sdhi0_resources[] = {
-	[0] = {
-		.name	= "SDHI0",
-		.start	= 0xee100000,
-		.end	= 0xee1000ff,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.name	= SH_MOBILE_SDHI_IRQ_CARD_DETECT,
-		.start	= gic_spi(83),
-		.flags	= IORESOURCE_IRQ,
-	},
-	[2] = {
-		.name	= SH_MOBILE_SDHI_IRQ_SDCARD,
-		.start	= gic_spi(84),
-		.flags	= IORESOURCE_IRQ,
-	},
-	[3] = {
-		.name	= SH_MOBILE_SDHI_IRQ_SDIO,
-		.start	= gic_spi(85),
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-static struct platform_device sdhi0_device = {
-	.name		= "sh_mobile_sdhi",
-	.id		= 0,
-	.num_resources	= ARRAY_SIZE(sdhi0_resources),
-	.resource	= sdhi0_resources,
-	.dev	= {
-		.platform_data	= &sdhi0_info,
-	},
-};
-
-/* Fixed 3.3V regulator to be used by SDHI1 */
-static struct regulator_consumer_supply cn4_power_consumers[] =
-{
-	REGULATOR_SUPPLY("vmmc", "sh_mobile_sdhi.1"),
-	REGULATOR_SUPPLY("vqmmc", "sh_mobile_sdhi.1"),
-};
-
-static struct regulator_init_data cn4_power_init_data = {
-	.constraints = {
-		.valid_ops_mask = REGULATOR_CHANGE_STATUS,
-	},
-	.num_consumer_supplies  = ARRAY_SIZE(cn4_power_consumers),
-	.consumer_supplies      = cn4_power_consumers,
-};
-
-static struct fixed_voltage_config cn4_power_info = {
-	.supply_name = "CN4 SD/MMC Vdd",
-	.microvolts = 3300000,
-	.gpio = 114,
-	.enable_high = 1,
-	.init_data = &cn4_power_init_data,
-};
-
-static struct platform_device cn4_power = {
-	.name = "reg-fixed-voltage",
-	.id   = 2,
-	.dev  = {
-		.platform_data = &cn4_power_info,
-	},
-};
-
-static void ag5evm_sdhi1_set_pwr(struct platform_device *pdev, int state)
-{
-	static int power_gpio = -EINVAL;
-
-	if (power_gpio < 0) {
-		int ret = gpio_request_one(114, GPIOF_OUT_INIT_LOW,
-					   "sdhi1_power");
-		if (!ret)
-			power_gpio = 114;
-	}
-
-	/*
-	 * If requesting the GPIO above failed, it means, that the regulator got
-	 * probed and grabbed the GPIO, but we don't know, whether the sdhi
-	 * driver already uses the regulator. If it doesn't, we have to toggle
-	 * the GPIO ourselves, even though it is now owned by the fixed
-	 * regulator driver. We have to live with the race in case the driver
-	 * gets unloaded and the GPIO freed between these two steps.
-	 */
-	gpio_set_value(114, state);
-}
-
-static struct sh_mobile_sdhi_info sh_sdhi1_info = {
-	.tmio_flags	= TMIO_MMC_WRPROTECT_DISABLE | TMIO_MMC_HAS_IDLE_WAIT,
-	.tmio_caps	= MMC_CAP_NONREMOVABLE | MMC_CAP_SDIO_IRQ,
-	.tmio_ocr_mask	= MMC_VDD_32_33 | MMC_VDD_33_34,
-	.set_pwr	= ag5evm_sdhi1_set_pwr,
-};
-
-static struct resource sdhi1_resources[] = {
-	[0] = {
-		.name	= "SDHI1",
-		.start	= 0xee120000,
-		.end	= 0xee1200ff,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.name	= SH_MOBILE_SDHI_IRQ_CARD_DETECT,
-		.start	= gic_spi(87),
-		.flags	= IORESOURCE_IRQ,
-	},
-	[2] = {
-		.name	= SH_MOBILE_SDHI_IRQ_SDCARD,
-		.start	= gic_spi(88),
-		.flags	= IORESOURCE_IRQ,
-	},
-	[3] = {
-		.name	= SH_MOBILE_SDHI_IRQ_SDIO,
-		.start	= gic_spi(89),
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-static struct platform_device sdhi1_device = {
-	.name		= "sh_mobile_sdhi",
-	.id		= 1,
-	.dev		= {
-		.platform_data	= &sh_sdhi1_info,
-	},
-	.num_resources	= ARRAY_SIZE(sdhi1_resources),
-	.resource	= sdhi1_resources,
-};
-
-static struct platform_device *ag5evm_devices[] __initdata = {
-	&cn4_power,
-	&eth_device,
-	&keysc_device,
-	&fsi_device,
-	&mmc_device,
-	&irda_device,
-	&mipidsi0_device,
-	&lcdc0_device,
-	&sdhi0_device,
-	&sdhi1_device,
-};
-
-static unsigned long pin_pullup_conf[] = {
-	PIN_CONF_PACKED(PIN_CONFIG_BIAS_PULL_UP, 0),
-};
-
-static const struct pinctrl_map ag5evm_pinctrl_map[] = {
-	/* FSIA */
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_fsi2.0", "pfc-sh73a0",
-				  "fsia_mclk_in", "fsia"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_fsi2.0", "pfc-sh73a0",
-				  "fsia_sclk_in", "fsia"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_fsi2.0", "pfc-sh73a0",
-				  "fsia_data_in", "fsia"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_fsi2.0", "pfc-sh73a0",
-				  "fsia_data_out", "fsia"),
-	/* I2C2 & I2C3 */
-	PIN_MAP_MUX_GROUP_DEFAULT("i2c-sh_mobile.2", "pfc-sh73a0",
-				  "i2c2_0", "i2c2"),
-	PIN_MAP_MUX_GROUP_DEFAULT("i2c-sh_mobile.3", "pfc-sh73a0",
-				  "i2c3_0", "i2c3"),
-	/* IrDA */
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_irda.0", "pfc-sh73a0",
-				  "irda_0", "irda"),
-	/* KEYSC */
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_keysc.0", "pfc-sh73a0",
-				  "keysc_in8", "keysc"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_keysc.0", "pfc-sh73a0",
-				  "keysc_out04", "keysc"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_keysc.0", "pfc-sh73a0",
-				  "keysc_out5", "keysc"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_keysc.0", "pfc-sh73a0",
-				  "keysc_out6_0", "keysc"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_keysc.0", "pfc-sh73a0",
-				  "keysc_out7_0", "keysc"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_keysc.0", "pfc-sh73a0",
-				  "keysc_out8_0", "keysc"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_keysc.0", "pfc-sh73a0",
-				  "keysc_out9_2", "keysc"),
-	PIN_MAP_CONFIGS_GROUP_DEFAULT("sh_keysc.0", "pfc-sh73a0",
-				      "keysc_in8", pin_pullup_conf),
-	/* MMCIF */
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_mmcif.0", "pfc-sh73a0",
-				  "mmc0_data8_0", "mmc0"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_mmcif.0", "pfc-sh73a0",
-				  "mmc0_ctrl_0", "mmc0"),
-	PIN_MAP_CONFIGS_PIN_DEFAULT("sh_mmcif.0", "pfc-sh73a0",
-				    "PORT279", pin_pullup_conf),
-	PIN_MAP_CONFIGS_GROUP_DEFAULT("sh_mmcif.0", "pfc-sh73a0",
-				      "mmc0_data8_0", pin_pullup_conf),
-	/* SCIFA2 */
-	PIN_MAP_MUX_GROUP_DEFAULT("sh-sci.2", "pfc-sh73a0",
-				  "scifa2_data_0", "scifa2"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh-sci.2", "pfc-sh73a0",
-				  "scifa2_ctrl_0", "scifa2"),
-	/* SDHI0 (CN15 [SD I/F]) */
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_mobile_sdhi.0", "pfc-sh73a0",
-				  "sdhi0_data4", "sdhi0"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_mobile_sdhi.0", "pfc-sh73a0",
-				  "sdhi0_ctrl", "sdhi0"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_mobile_sdhi.0", "pfc-sh73a0",
-				  "sdhi0_wp", "sdhi0"),
-	/* SDHI1 (CN4 [WLAN I/F]) */
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_mobile_sdhi.1", "pfc-sh73a0",
-				  "sdhi1_data4", "sdhi1"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_mobile_sdhi.1", "pfc-sh73a0",
-				  "sdhi1_ctrl", "sdhi1"),
-	PIN_MAP_CONFIGS_GROUP_DEFAULT("sh_mobile_sdhi.1", "pfc-sh73a0",
-				      "sdhi1_data4", pin_pullup_conf),
-	PIN_MAP_CONFIGS_PIN_DEFAULT("sh_mobile_sdhi.1", "pfc-sh73a0",
-				    "PORT263", pin_pullup_conf),
-};
-
-static void __init ag5evm_init(void)
-{
-	regulator_register_always_on(0, "fixed-1.8V", fixed1v8_power_consumers,
-				     ARRAY_SIZE(fixed1v8_power_consumers), 1800000);
-	regulator_register_always_on(1, "fixed-2.8V", fixed2v8_power_consumers,
-				     ARRAY_SIZE(fixed2v8_power_consumers), 3300000);
-	regulator_register_fixed(3, dummy_supplies, ARRAY_SIZE(dummy_supplies));
-
-	pinctrl_register_mappings(ag5evm_pinctrl_map,
-				  ARRAY_SIZE(ag5evm_pinctrl_map));
-	sh73a0_pinmux_init();
-
-	/* enable MMCIF */
-	gpio_request_one(208, GPIOF_OUT_INIT_HIGH, NULL); /* Reset */
-
-	/* enable SMSC911X */
-	gpio_request_one(144, GPIOF_IN, NULL); /* PINTA2 */
-	gpio_request_one(145, GPIOF_OUT_INIT_HIGH, NULL); /* RESET */
-
-	/* LCD panel */
-	gpio_request_one(217, GPIOF_OUT_INIT_LOW, NULL); /* RESET */
-	mdelay(1);
-	gpio_set_value(217, 1);
-	mdelay(100);
-
-
-#ifdef CONFIG_CACHE_L2X0
-	/* Shared attribute override enable, 64K*8way */
-	l2x0_init(IOMEM(0xf0100000), 0x00460000, 0xc2000fff);
-#endif
-	sh73a0_add_standard_devices();
-
-	i2c_register_board_info(1, &backlight_board_info, 1);
-
-	platform_add_devices(ag5evm_devices, ARRAY_SIZE(ag5evm_devices));
-}
-
-MACHINE_START(AG5EVM, "ag5evm")
-	.smp		= smp_ops(sh73a0_smp_ops),
-	.map_io		= sh73a0_map_io,
-	.init_early	= sh73a0_add_early_devices,
-	.nr_irqs	= NR_IRQS_LEGACY,
-	.init_irq	= sh73a0_init_irq,
-	.init_machine	= ag5evm_init,
-	.init_late	= shmobile_init_late,
-	.init_time	= sh73a0_earlytimer_init,
-MACHINE_END
diff --git a/arch/arm/mach-shmobile/board-ape6evm-reference.c b/arch/arm/mach-shmobile/board-ape6evm-reference.c
new file mode 100644
index 0000000..a23fa71
--- /dev/null
+++ b/arch/arm/mach-shmobile/board-ape6evm-reference.c
@@ -0,0 +1,63 @@
+/*
+ * APE6EVM board support
+ *
+ * Copyright (C) 2013  Renesas Solutions Corp.
+ * Copyright (C) 2013  Magnus Damm
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <linux/gpio.h>
+#include <linux/kernel.h>
+#include <linux/of_platform.h>
+#include <linux/pinctrl/machine.h>
+#include <linux/platform_device.h>
+#include <linux/sh_clk.h>
+#include <mach/common.h>
+#include <mach/r8a73a4.h>
+#include <asm/mach-types.h>
+#include <asm/mach/arch.h>
+
+static void __init ape6evm_add_standard_devices(void)
+{
+
+	struct clk *parent;
+	struct clk *mp;
+
+	r8a73a4_clock_init();
+
+	/* MP clock parent = extal2 */
+	parent      = clk_get(NULL, "extal2");
+	mp          = clk_get(NULL, "mp");
+	BUG_ON(IS_ERR(parent) || IS_ERR(mp));
+
+	clk_set_parent(mp, parent);
+	clk_put(parent);
+	clk_put(mp);
+
+	r8a73a4_add_dt_devices();
+	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
+	platform_device_register_simple("cpufreq-cpu0", -1, NULL, 0);
+}
+
+static const char *ape6evm_boards_compat_dt[] __initdata = {
+	"renesas,ape6evm-reference",
+	NULL,
+};
+
+DT_MACHINE_START(APE6EVM_DT, "ape6evm")
+	.init_early	= r8a73a4_init_delay,
+	.init_machine	= ape6evm_add_standard_devices,
+	.dt_compat	= ape6evm_boards_compat_dt,
+MACHINE_END
diff --git a/arch/arm/mach-shmobile/board-ape6evm.c b/arch/arm/mach-shmobile/board-ape6evm.c
index 38c6c73..24b87eea 100644
--- a/arch/arm/mach-shmobile/board-ape6evm.c
+++ b/arch/arm/mach-shmobile/board-ape6evm.c
@@ -241,7 +241,6 @@
 
 DT_MACHINE_START(APE6EVM_DT, "ape6evm")
 	.init_early	= r8a73a4_init_delay,
-	.init_time	= shmobile_timer_init,
 	.init_machine	= ape6evm_add_standard_devices,
 	.dt_compat	= ape6evm_boards_compat_dt,
 MACHINE_END
diff --git a/arch/arm/mach-shmobile/board-armadillo800eva-reference.c b/arch/arm/mach-shmobile/board-armadillo800eva-reference.c
index fd2446d..57d1a78 100644
--- a/arch/arm/mach-shmobile/board-armadillo800eva-reference.c
+++ b/arch/arm/mach-shmobile/board-armadillo800eva-reference.c
@@ -190,7 +190,6 @@
 	.init_early	= r8a7740_init_delay,
 	.init_irq	= r8a7740_init_irq_of,
 	.init_machine	= eva_init,
-	.init_time	= shmobile_timer_init,
 	.init_late	= shmobile_init_late,
 	.dt_compat	= eva_boards_compat_dt,
 	.restart	= eva_restart,
diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c
index 6b4b77d..5bd1479 100644
--- a/arch/arm/mach-shmobile/board-armadillo800eva.c
+++ b/arch/arm/mach-shmobile/board-armadillo800eva.c
@@ -1313,7 +1313,7 @@
 DT_MACHINE_START(ARMADILLO800EVA_DT, "armadillo800eva")
 	.map_io		= r8a7740_map_io,
 	.init_early	= eva_add_early_devices,
-	.init_irq	= r8a7740_init_irq,
+	.init_irq	= r8a7740_init_irq_of,
 	.init_machine	= eva_init,
 	.init_late	= shmobile_init_late,
 	.init_time	= eva_earlytimer_init,
diff --git a/arch/arm/mach-shmobile/board-bockw-reference.c b/arch/arm/mach-shmobile/board-bockw-reference.c
new file mode 100644
index 0000000..1a7c893
--- /dev/null
+++ b/arch/arm/mach-shmobile/board-bockw-reference.c
@@ -0,0 +1,61 @@
+/*
+ * Bock-W board support
+ *
+ * Copyright (C) 2013  Renesas Solutions Corp.
+ * Copyright (C) 2013  Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <linux/of_platform.h>
+#include <linux/pinctrl/machine.h>
+#include <mach/common.h>
+#include <mach/r8a7778.h>
+#include <asm/mach/arch.h>
+
+/*
+ *	see board-bock.c for checking detail of dip-switch
+ */
+
+static const struct pinctrl_map bockw_pinctrl_map[] = {
+	/* SCIF0 */
+	PIN_MAP_MUX_GROUP_DEFAULT("sh-sci.0", "pfc-r8a7778",
+				  "scif0_data_a", "scif0"),
+	PIN_MAP_MUX_GROUP_DEFAULT("sh-sci.0", "pfc-r8a7778",
+				  "scif0_ctrl", "scif0"),
+};
+
+static void __init bockw_init(void)
+{
+	r8a7778_clock_init();
+
+	pinctrl_register_mappings(bockw_pinctrl_map,
+				  ARRAY_SIZE(bockw_pinctrl_map));
+	r8a7778_pinmux_init();
+	r8a7778_add_dt_devices();
+
+	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
+}
+
+static const char *bockw_boards_compat_dt[] __initdata = {
+	"renesas,bockw-reference",
+	NULL,
+};
+
+DT_MACHINE_START(BOCKW_DT, "bockw")
+	.init_early	= r8a7778_init_delay,
+	.init_irq	= r8a7778_init_irq_dt,
+	.init_machine	= bockw_init,
+	.dt_compat	= bockw_boards_compat_dt,
+MACHINE_END
diff --git a/arch/arm/mach-shmobile/board-bockw.c b/arch/arm/mach-shmobile/board-bockw.c
index 35dd7f2..6b9faf3 100644
--- a/arch/arm/mach-shmobile/board-bockw.c
+++ b/arch/arm/mach-shmobile/board-bockw.c
@@ -21,8 +21,11 @@
 
 #include <linux/mfd/tmio.h>
 #include <linux/mmc/host.h>
+#include <linux/mmc/sh_mobile_sdhi.h>
+#include <linux/mmc/sh_mmcif.h>
 #include <linux/mtd/partitions.h>
 #include <linux/pinctrl/machine.h>
+#include <linux/platform_data/usb-rcar-phy.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/fixed.h>
 #include <linux/regulator/machine.h>
@@ -66,28 +69,38 @@
 	REGULATOR_SUPPLY("vdd33a", "smsc911x"),
 };
 
-static struct smsc911x_platform_config smsc911x_data = {
+static struct smsc911x_platform_config smsc911x_data __initdata = {
 	.irq_polarity	= SMSC911X_IRQ_POLARITY_ACTIVE_LOW,
 	.irq_type	= SMSC911X_IRQ_TYPE_PUSH_PULL,
 	.flags		= SMSC911X_USE_32BIT,
 	.phy_interface	= PHY_INTERFACE_MODE_MII,
 };
 
-static struct resource smsc911x_resources[] = {
+static struct resource smsc911x_resources[] __initdata = {
 	DEFINE_RES_MEM(0x18300000, 0x1000),
 	DEFINE_RES_IRQ(irq_pin(0)), /* IRQ 0 */
 };
 
 /* USB */
+static struct resource usb_phy_resources[] __initdata = {
+	DEFINE_RES_MEM(0xffe70800, 0x100),
+	DEFINE_RES_MEM(0xffe76000, 0x100),
+};
+
 static struct rcar_phy_platform_data usb_phy_platform_data __initdata;
 
 /* SDHI */
-static struct sh_mobile_sdhi_info sdhi0_info = {
+static struct sh_mobile_sdhi_info sdhi0_info __initdata = {
 	.tmio_caps	= MMC_CAP_SD_HIGHSPEED,
 	.tmio_ocr_mask	= MMC_VDD_165_195 | MMC_VDD_32_33 | MMC_VDD_33_34,
 	.tmio_flags	= TMIO_MMC_HAS_IDLE_WAIT,
 };
 
+static struct resource sdhi0_resources[] __initdata = {
+	DEFINE_RES_MEM(0xFFE4C000, 0x100),
+	DEFINE_RES_IRQ(gic_iid(0x77)),
+};
+
 static struct sh_eth_plat_data ether_platform_data __initdata = {
 	.phy		= 0x01,
 	.edmac_endian	= EDMAC_LITTLE_ENDIAN,
@@ -136,7 +149,12 @@
 };
 
 /* MMC */
-static struct sh_mmcif_plat_data sh_mmcif_plat = {
+static struct resource mmc_resources[] __initdata = {
+	DEFINE_RES_MEM(0xffe4e000, 0x100),
+	DEFINE_RES_IRQ(gic_iid(0x5d)),
+};
+
+static struct sh_mmcif_plat_data sh_mmcif_plat __initdata = {
 	.sup_pclk	= 0,
 	.ocr		= MMC_VDD_165_195 | MMC_VDD_32_33 | MMC_VDD_33_34,
 	.caps		= MMC_CAP_4_BIT_DATA |
@@ -217,11 +235,7 @@
 	r8a7778_clock_init();
 	r8a7778_init_irq_extpin(1);
 	r8a7778_add_standard_devices();
-	r8a7778_add_usb_phy_device(&usb_phy_platform_data);
 	r8a7778_add_ether_device(&ether_platform_data);
-	r8a7778_add_i2c_device(0);
-	r8a7778_add_hspi_device(0);
-	r8a7778_add_mmc_device(&sh_mmcif_plat);
 	r8a7778_add_vin_device(0, &vin_platform_data);
 	/* VIN1 has a pin conflict with Ether */
 	if (!IS_ENABLED(CONFIG_SH_ETH))
@@ -241,6 +255,19 @@
 				  ARRAY_SIZE(bockw_pinctrl_map));
 	r8a7778_pinmux_init();
 
+	platform_device_register_resndata(
+		&platform_bus, "sh_mmcif", -1,
+		mmc_resources, ARRAY_SIZE(mmc_resources),
+		&sh_mmcif_plat, sizeof(struct sh_mmcif_plat_data));
+
+	platform_device_register_resndata(
+		&platform_bus, "rcar_usb_phy", -1,
+		usb_phy_resources,
+		ARRAY_SIZE(usb_phy_resources),
+		&usb_phy_platform_data,
+		sizeof(struct rcar_phy_platform_data));
+
+
 	/* for SMSC */
 	base = ioremap_nocache(FPGA, SZ_1M);
 	if (base) {
@@ -276,7 +303,10 @@
 		iowrite32(ioread32(base + PUPR4) | (3 << 26), base + PUPR4);
 		iounmap(base);
 
-		r8a7778_sdhi_init(0, &sdhi0_info);
+		platform_device_register_resndata(
+			&platform_bus, "sh_mobile_sdhi", 0,
+			sdhi0_resources, ARRAY_SIZE(sdhi0_resources),
+			&sdhi0_info, sizeof(struct sh_mobile_sdhi_info));
 	}
 }
 
@@ -289,7 +319,6 @@
 	.init_early	= r8a7778_init_delay,
 	.init_irq	= r8a7778_init_irq_dt,
 	.init_machine	= bockw_init,
-	.init_time	= shmobile_timer_init,
 	.dt_compat	= bockw_boards_compat_dt,
 	.init_late      = r8a7778_init_late,
 MACHINE_END
diff --git a/arch/arm/mach-shmobile/board-kota2.c b/arch/arm/mach-shmobile/board-kota2.c
deleted file mode 100644
index 6af20d9..0000000
--- a/arch/arm/mach-shmobile/board-kota2.c
+++ /dev/null
@@ -1,550 +0,0 @@
-/*
- * kota2 board support
- *
- * Copyright (C) 2011  Renesas Solutions Corp.
- * Copyright (C) 2011  Magnus Damm
- * Copyright (C) 2010  Takashi Yoshii <yoshii.takashi.zj@renesas.com>
- * Copyright (C) 2009  Yoshihiro Shimoda <shimoda.yoshihiro@renesas.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/pinctrl/machine.h>
-#include <linux/pinctrl/pinconf-generic.h>
-#include <linux/platform_data/pwm-renesas-tpu.h>
-#include <linux/platform_device.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-#include <linux/regulator/fixed.h>
-#include <linux/regulator/machine.h>
-#include <linux/smsc911x.h>
-#include <linux/gpio.h>
-#include <linux/input.h>
-#include <linux/input/sh_keysc.h>
-#include <linux/gpio_keys.h>
-#include <linux/leds.h>
-#include <linux/leds_pwm.h>
-#include <linux/irqchip/arm-gic.h>
-#include <linux/mmc/host.h>
-#include <linux/mmc/sh_mmcif.h>
-#include <linux/mfd/tmio.h>
-#include <linux/mmc/sh_mobile_sdhi.h>
-#include <mach/hardware.h>
-#include <mach/irqs.h>
-#include <mach/sh73a0.h>
-#include <mach/common.h>
-#include <asm/mach-types.h>
-#include <asm/mach/arch.h>
-#include <asm/mach/time.h>
-#include <asm/hardware/cache-l2x0.h>
-#include <asm/traps.h>
-
-/* Dummy supplies, where voltage doesn't matter */
-static struct regulator_consumer_supply dummy_supplies[] = {
-	REGULATOR_SUPPLY("vddvario", "smsc911x"),
-	REGULATOR_SUPPLY("vdd33a", "smsc911x"),
-};
-
-/* SMSC 9220 */
-static struct resource smsc9220_resources[] = {
-	[0] = {
-		.start		= 0x14000000, /* CS5A */
-		.end		= 0x140000ff, /* A1->A7 */
-		.flags		= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start		= SH73A0_PINT0_IRQ(2), /* PINTA2 */
-		.flags		= IORESOURCE_IRQ,
-	},
-};
-
-static struct smsc911x_platform_config smsc9220_platdata = {
-	.flags		= SMSC911X_USE_32BIT, /* 32-bit SW on 16-bit HW bus */
-	.phy_interface	= PHY_INTERFACE_MODE_MII,
-	.irq_polarity	= SMSC911X_IRQ_POLARITY_ACTIVE_LOW,
-	.irq_type	= SMSC911X_IRQ_TYPE_PUSH_PULL,
-};
-
-static struct platform_device eth_device = {
-	.name		= "smsc911x",
-	.id		= 0,
-	.dev  = {
-		.platform_data = &smsc9220_platdata,
-	},
-	.resource	= smsc9220_resources,
-	.num_resources	= ARRAY_SIZE(smsc9220_resources),
-};
-
-/* KEYSC */
-static struct sh_keysc_info keysc_platdata = {
-	.mode		= SH_KEYSC_MODE_6,
-	.scan_timing	= 3,
-	.delay		= 100,
-	.keycodes	= {
-		KEY_NUMERIC_STAR, KEY_NUMERIC_0, KEY_NUMERIC_POUND,
-		0, 0, 0, 0, 0,
-		KEY_NUMERIC_7, KEY_NUMERIC_8, KEY_NUMERIC_9,
-		0, KEY_DOWN, 0, 0, 0,
-		KEY_NUMERIC_4, KEY_NUMERIC_5, KEY_NUMERIC_6,
-		KEY_LEFT, KEY_ENTER, KEY_RIGHT, 0, 0,
-		KEY_NUMERIC_1, KEY_NUMERIC_2, KEY_NUMERIC_3,
-		0, KEY_UP, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0,
-	},
-};
-
-static struct resource keysc_resources[] = {
-	[0] = {
-		.name	= "KEYSC",
-		.start	= 0xe61b0000,
-		.end	= 0xe61b0098 - 1,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= gic_spi(71),
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-static struct platform_device keysc_device = {
-	.name		= "sh_keysc",
-	.id		= 0,
-	.num_resources	= ARRAY_SIZE(keysc_resources),
-	.resource	= keysc_resources,
-	.dev		= {
-		.platform_data	= &keysc_platdata,
-	},
-};
-
-/* GPIO KEY */
-#define GPIO_KEY(c, g, d) { .code = c, .gpio = g, .desc = d, .active_low = 1 }
-
-static struct gpio_keys_button gpio_buttons[] = {
-	GPIO_KEY(KEY_VOLUMEUP, 56, "+"), /* S2: VOL+ [IRQ9] */
-	GPIO_KEY(KEY_VOLUMEDOWN, 54, "-"), /* S3: VOL- [IRQ10] */
-	GPIO_KEY(KEY_MENU, 27, "Menu"), /* S4: MENU [IRQ30] */
-	GPIO_KEY(KEY_HOMEPAGE, 26, "Home"), /* S5: HOME [IRQ31] */
-	GPIO_KEY(KEY_BACK, 11, "Back"), /* S6: BACK [IRQ0] */
-	GPIO_KEY(KEY_PHONE, 238, "Tel"), /* S7: TEL [IRQ11] */
-	GPIO_KEY(KEY_POWER, 239, "C1"), /* S8: CAM [IRQ13] */
-	GPIO_KEY(KEY_MAIL, 224, "Mail"), /* S9: MAIL [IRQ3] */
-	/* Omitted button "C3?": 223 - S10: CUST [IRQ8] */
-	GPIO_KEY(KEY_CAMERA, 164, "C2"), /* S11: CAM_HALF [IRQ25] */
-	/* Omitted button "?": 152 - S12: CAM_FULL [No IRQ] */
-};
-
-static struct gpio_keys_platform_data gpio_key_info = {
-	.buttons        = gpio_buttons,
-	.nbuttons       = ARRAY_SIZE(gpio_buttons),
-};
-
-static struct platform_device gpio_keys_device = {
-	.name   = "gpio-keys",
-	.id     = -1,
-	.dev    = {
-		.platform_data  = &gpio_key_info,
-	},
-};
-
-/* GPIO LED */
-#define GPIO_LED(n, g) { .name = n, .gpio = g }
-
-static struct gpio_led gpio_leds[] = {
-	GPIO_LED("G", 20), /* PORT20 [GPO0] -> LED7 -> "G" */
-	GPIO_LED("H", 21), /* PORT21 [GPO1] -> LED8 -> "H" */
-	GPIO_LED("J", 22), /* PORT22 [GPO2] -> LED9 -> "J" */
-};
-
-static struct gpio_led_platform_data gpio_leds_info = {
-	.leds		= gpio_leds,
-	.num_leds	= ARRAY_SIZE(gpio_leds),
-};
-
-static struct platform_device gpio_leds_device = {
-	.name   = "leds-gpio",
-	.id     = -1,
-	.dev    = {
-		.platform_data  = &gpio_leds_info,
-	},
-};
-
-/* TPU LED */
-static struct resource tpu1_pwm_resources[] = {
-	[0] = {
-		.start	= 0xe6610000,
-		.end	= 0xe66100ff,
-		.flags	= IORESOURCE_MEM,
-	},
-};
-
-static struct platform_device tpu1_pwm_device = {
-	.name = "renesas-tpu-pwm",
-	.id = 1,
-	.num_resources	= ARRAY_SIZE(tpu1_pwm_resources),
-	.resource	= tpu1_pwm_resources,
-};
-
-static struct resource tpu2_pwm_resources[] = {
-	[0] = {
-		.start	= 0xe6620000,
-		.end	= 0xe66200ff,
-		.flags	= IORESOURCE_MEM,
-	},
-};
-
-static struct platform_device tpu2_pwm_device = {
-	.name = "renesas-tpu-pwm",
-	.id = 2,
-	.num_resources	= ARRAY_SIZE(tpu2_pwm_resources),
-	.resource	= tpu2_pwm_resources,
-};
-
-static struct resource tpu3_pwm_resources[] = {
-	[0] = {
-		.start	= 0xe6630000,
-		.end	= 0xe66300ff,
-		.flags	= IORESOURCE_MEM,
-	},
-};
-
-static struct platform_device tpu3_pwm_device = {
-	.name = "renesas-tpu-pwm",
-	.id = 3,
-	.num_resources	= ARRAY_SIZE(tpu3_pwm_resources),
-	.resource	= tpu3_pwm_resources,
-};
-
-static struct resource tpu4_pwm_resources[] = {
-	[0] = {
-		.start	= 0xe6640000,
-		.end	= 0xe66400ff,
-		.flags	= IORESOURCE_MEM,
-	},
-};
-
-static struct platform_device tpu4_pwm_device = {
-	.name = "renesas-tpu-pwm",
-	.id = 4,
-	.num_resources	= ARRAY_SIZE(tpu4_pwm_resources),
-	.resource	= tpu4_pwm_resources,
-};
-
-static struct pwm_lookup pwm_lookup[] = {
-	PWM_LOOKUP("renesas-tpu-pwm.1", 2, "leds-pwm.0", "V2513"),
-	PWM_LOOKUP("renesas-tpu-pwm.2", 1, "leds-pwm.0", "V2515"),
-	PWM_LOOKUP("renesas-tpu-pwm.3", 0, "leds-pwm.0", "KEYLED"),
-	PWM_LOOKUP("renesas-tpu-pwm.4", 1, "leds-pwm.0", "V2514"),
-};
-
-static struct led_pwm tpu_pwm_leds[] = {
-	{
-		.name		= "V2513",
-		.max_brightness	= 1000,
-	}, {
-		.name		= "V2515",
-		.max_brightness	= 1000,
-	}, {
-		.name		= "KEYLED",
-		.max_brightness	= 1000,
-	}, {
-		.name		= "V2514",
-		.max_brightness	= 1000,
-	},
-};
-
-static struct led_pwm_platform_data leds_pwm_pdata = {
-	.num_leds = ARRAY_SIZE(tpu_pwm_leds),
-	.leds = tpu_pwm_leds,
-};
-
-static struct platform_device leds_pwm_device = {
-	.name = "leds-pwm",
-	.id = 0,
-	.dev = {
-		.platform_data = &leds_pwm_pdata,
-	},
-};
-
-/* Fixed 1.8V regulator to be used by MMCIF */
-static struct regulator_consumer_supply fixed1v8_power_consumers[] =
-{
-	REGULATOR_SUPPLY("vmmc", "sh_mmcif.0"),
-	REGULATOR_SUPPLY("vqmmc", "sh_mmcif.0"),
-};
-
-/* MMCIF */
-static struct resource mmcif_resources[] = {
-	[0] = {
-		.name   = "MMCIF",
-		.start  = 0xe6bd0000,
-		.end    = 0xe6bd00ff,
-		.flags  = IORESOURCE_MEM,
-	},
-	[1] = {
-		.start  = gic_spi(140),
-		.flags  = IORESOURCE_IRQ,
-	},
-	[2] = {
-		.start  = gic_spi(141),
-		.flags  = IORESOURCE_IRQ,
-	},
-};
-
-static struct sh_mmcif_plat_data mmcif_info = {
-	.ocr            = MMC_VDD_165_195,
-	.caps           = MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE,
-};
-
-static struct platform_device mmcif_device = {
-	.name           = "sh_mmcif",
-	.id             = 0,
-	.dev            = {
-		.platform_data          = &mmcif_info,
-	},
-	.num_resources  = ARRAY_SIZE(mmcif_resources),
-	.resource       = mmcif_resources,
-};
-
-/* Fixed 3.3V regulator to be used by SDHI0 and SDHI1 */
-static struct regulator_consumer_supply fixed3v3_power_consumers[] =
-{
-	REGULATOR_SUPPLY("vmmc", "sh_mobile_sdhi.0"),
-	REGULATOR_SUPPLY("vqmmc", "sh_mobile_sdhi.0"),
-	REGULATOR_SUPPLY("vmmc", "sh_mobile_sdhi.1"),
-	REGULATOR_SUPPLY("vqmmc", "sh_mobile_sdhi.1"),
-};
-
-/* SDHI0 */
-static struct sh_mobile_sdhi_info sdhi0_info = {
-	.tmio_caps      = MMC_CAP_SD_HIGHSPEED,
-	.tmio_flags     = TMIO_MMC_WRPROTECT_DISABLE | TMIO_MMC_HAS_IDLE_WAIT,
-};
-
-static struct resource sdhi0_resources[] = {
-	[0] = {
-		.name   = "SDHI0",
-		.start  = 0xee100000,
-		.end    = 0xee1000ff,
-		.flags  = IORESOURCE_MEM,
-	},
-	[1] = {
-		.start  = gic_spi(83),
-		.flags  = IORESOURCE_IRQ,
-	},
-	[2] = {
-		.start  = gic_spi(84),
-		.flags  = IORESOURCE_IRQ,
-	},
-	[3] = {
-		.start	= gic_spi(85),
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-static struct platform_device sdhi0_device = {
-	.name           = "sh_mobile_sdhi",
-	.id             = 0,
-	.num_resources  = ARRAY_SIZE(sdhi0_resources),
-	.resource       = sdhi0_resources,
-	.dev    = {
-		.platform_data  = &sdhi0_info,
-	},
-};
-
-/* SDHI1 */
-static struct sh_mobile_sdhi_info sdhi1_info = {
-	.tmio_caps      = MMC_CAP_NONREMOVABLE | MMC_CAP_SDIO_IRQ,
-	.tmio_flags     = TMIO_MMC_WRPROTECT_DISABLE | TMIO_MMC_HAS_IDLE_WAIT,
-};
-
-static struct resource sdhi1_resources[] = {
-	[0] = {
-		.name   = "SDHI1",
-		.start  = 0xee120000,
-		.end    = 0xee1200ff,
-		.flags  = IORESOURCE_MEM,
-	},
-	[1] = {
-		.start  = gic_spi(87),
-		.flags  = IORESOURCE_IRQ,
-	},
-	[2] = {
-		.start  = gic_spi(88),
-		.flags  = IORESOURCE_IRQ,
-	},
-	[3] = {
-		.start	= gic_spi(89),
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-static struct platform_device sdhi1_device = {
-	.name           = "sh_mobile_sdhi",
-	.id             = 1,
-	.num_resources  = ARRAY_SIZE(sdhi1_resources),
-	.resource       = sdhi1_resources,
-	.dev    = {
-		.platform_data  = &sdhi1_info,
-	},
-};
-
-static struct platform_device *kota2_devices[] __initdata = {
-	&eth_device,
-	&keysc_device,
-	&gpio_keys_device,
-	&gpio_leds_device,
-	&tpu1_pwm_device,
-	&tpu2_pwm_device,
-	&tpu3_pwm_device,
-	&tpu4_pwm_device,
-	&leds_pwm_device,
-	&mmcif_device,
-	&sdhi0_device,
-	&sdhi1_device,
-};
-
-static unsigned long pin_pullup_conf[] = {
-	PIN_CONF_PACKED(PIN_CONFIG_BIAS_PULL_UP, 0),
-};
-
-static const struct pinctrl_map kota2_pinctrl_map[] = {
-	/* KEYSC */
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_keysc.0", "pfc-sh73a0",
-				  "keysc_in8", "keysc"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_keysc.0", "pfc-sh73a0",
-				  "keysc_out04", "keysc"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_keysc.0", "pfc-sh73a0",
-				  "keysc_out5", "keysc"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_keysc.0", "pfc-sh73a0",
-				  "keysc_out6_0", "keysc"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_keysc.0", "pfc-sh73a0",
-				  "keysc_out7_0", "keysc"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_keysc.0", "pfc-sh73a0",
-				  "keysc_out8_0", "keysc"),
-	PIN_MAP_CONFIGS_GROUP_DEFAULT("sh_keysc.0", "pfc-sh73a0",
-				      "keysc_in8", pin_pullup_conf),
-	/* MMCIF */
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_mmcif.0", "pfc-sh73a0",
-				  "mmc0_data8_0", "mmc0"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_mmcif.0", "pfc-sh73a0",
-				  "mmc0_ctrl_0", "mmc0"),
-	PIN_MAP_CONFIGS_PIN_DEFAULT("sh_mmcif.0", "pfc-sh73a0",
-				    "PORT279", pin_pullup_conf),
-	PIN_MAP_CONFIGS_GROUP_DEFAULT("sh_mmcif.0", "pfc-sh73a0",
-				      "mmc0_data8_0", pin_pullup_conf),
-	/* SCIFA2 (UART2) */
-	PIN_MAP_MUX_GROUP_DEFAULT("sh-sci.2", "pfc-sh73a0",
-				  "scifa2_data_0", "scifa2"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh-sci.2", "pfc-sh73a0",
-				  "scifa2_ctrl_0", "scifa2"),
-	/* SCIFA4 (UART1) */
-	PIN_MAP_MUX_GROUP_DEFAULT("sh-sci.4", "pfc-sh73a0",
-				  "scifa4_data", "scifa4"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh-sci.4", "pfc-sh73a0",
-				  "scifa4_ctrl", "scifa4"),
-	/* SCIFB (BT) */
-	PIN_MAP_MUX_GROUP_DEFAULT("sh-sci.8", "pfc-sh73a0",
-				  "scifb_data_0", "scifb"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh-sci.8", "pfc-sh73a0",
-				  "scifb_clk_0", "scifb"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh-sci.8", "pfc-sh73a0",
-				  "scifb_ctrl_0", "scifb"),
-	/* SDHI0 (microSD) */
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_mobile_sdhi.0", "pfc-sh73a0",
-				  "sdhi0_data4", "sdhi0"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_mobile_sdhi.0", "pfc-sh73a0",
-				  "sdhi0_ctrl", "sdhi0"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_mobile_sdhi.0", "pfc-sh73a0",
-				  "sdhi0_cd", "sdhi0"),
-	PIN_MAP_CONFIGS_GROUP_DEFAULT("sh_mobile_sdhi.0", "pfc-sh73a0",
-				      "sdhi0_data4", pin_pullup_conf),
-	PIN_MAP_CONFIGS_PIN_DEFAULT("sh_mobile_sdhi.0", "pfc-sh73a0",
-				    "PORT256", pin_pullup_conf),
-	PIN_MAP_CONFIGS_PIN_DEFAULT("sh_mobile_sdhi.0", "pfc-sh73a0",
-				    "PORT251", pin_pullup_conf),
-	/* SDHI1 (BCM4330) */
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_mobile_sdhi.1", "pfc-sh73a0",
-				  "sdhi1_data4", "sdhi1"),
-	PIN_MAP_MUX_GROUP_DEFAULT("sh_mobile_sdhi.1", "pfc-sh73a0",
-				  "sdhi1_ctrl", "sdhi1"),
-	PIN_MAP_CONFIGS_GROUP_DEFAULT("sh_mobile_sdhi.1", "pfc-sh73a0",
-				      "sdhi1_data4", pin_pullup_conf),
-	PIN_MAP_CONFIGS_PIN_DEFAULT("sh_mobile_sdhi.1", "pfc-sh73a0",
-				    "PORT263", pin_pullup_conf),
-	/* SMSC911X */
-	PIN_MAP_MUX_GROUP_DEFAULT("smsc911x.0", "pfc-sh73a0",
-				  "bsc_data_0_7", "bsc"),
-	PIN_MAP_MUX_GROUP_DEFAULT("smsc911x.0", "pfc-sh73a0",
-				  "bsc_data_8_15", "bsc"),
-	PIN_MAP_MUX_GROUP_DEFAULT("smsc911x.0", "pfc-sh73a0",
-				  "bsc_cs5_a", "bsc"),
-	PIN_MAP_MUX_GROUP_DEFAULT("smsc911x.0", "pfc-sh73a0",
-				  "bsc_we0", "bsc"),
-	/* TPU */
-	PIN_MAP_MUX_GROUP_DEFAULT("renesas-tpu-pwm.1", "pfc-sh73a0",
-				  "tpu1_to2", "tpu1"),
-	PIN_MAP_MUX_GROUP_DEFAULT("renesas-tpu-pwm.2", "pfc-sh73a0",
-				  "tpu2_to1", "tpu2"),
-	PIN_MAP_MUX_GROUP_DEFAULT("renesas-tpu-pwm.3", "pfc-sh73a0",
-				  "tpu3_to0", "tpu3"),
-	PIN_MAP_MUX_GROUP_DEFAULT("renesas-tpu-pwm.4", "pfc-sh73a0",
-				  "tpu4_to1", "tpu4"),
-};
-
-static void __init kota2_init(void)
-{
-	regulator_register_always_on(0, "fixed-1.8V", fixed1v8_power_consumers,
-				     ARRAY_SIZE(fixed1v8_power_consumers), 1800000);
-	regulator_register_always_on(1, "fixed-3.3V", fixed3v3_power_consumers,
-				     ARRAY_SIZE(fixed3v3_power_consumers), 3300000);
-	regulator_register_fixed(2, dummy_supplies, ARRAY_SIZE(dummy_supplies));
-
-	pinctrl_register_mappings(kota2_pinctrl_map,
-				  ARRAY_SIZE(kota2_pinctrl_map));
-	pwm_add_table(pwm_lookup, ARRAY_SIZE(pwm_lookup));
-
-	sh73a0_pinmux_init();
-
-	/* SMSC911X */
-	gpio_request_one(144, GPIOF_IN, NULL); /* PINTA2 */
-	gpio_request_one(145, GPIOF_OUT_INIT_HIGH, NULL); /* RESET */
-
-	/* MMCIF */
-	gpio_request_one(208, GPIOF_OUT_INIT_HIGH, NULL); /* Reset */
-
-#ifdef CONFIG_CACHE_L2X0
-	/* Early BRESP enable, Shared attribute override enable, 64K*8way */
-	l2x0_init(IOMEM(0xf0100000), 0x40460000, 0x82000fff);
-#endif
-	sh73a0_add_standard_devices();
-	platform_add_devices(kota2_devices, ARRAY_SIZE(kota2_devices));
-}
-
-MACHINE_START(KOTA2, "kota2")
-	.smp		= smp_ops(sh73a0_smp_ops),
-	.map_io		= sh73a0_map_io,
-	.init_early	= sh73a0_add_early_devices,
-	.nr_irqs	= NR_IRQS_LEGACY,
-	.init_irq	= sh73a0_init_irq,
-	.init_machine	= kota2_init,
-	.init_late	= shmobile_init_late,
-	.init_time	= sh73a0_earlytimer_init,
-MACHINE_END
diff --git a/arch/arm/mach-shmobile/board-kzm9g-reference.c b/arch/arm/mach-shmobile/board-kzm9g-reference.c
index a66a808..598e324 100644
--- a/arch/arm/mach-shmobile/board-kzm9g-reference.c
+++ b/arch/arm/mach-shmobile/board-kzm9g-reference.c
@@ -52,6 +52,5 @@
 	.init_early	= sh73a0_init_delay,
 	.nr_irqs	= NR_IRQS_LEGACY,
 	.init_machine	= kzm_init,
-	.init_time	= shmobile_timer_init,
 	.dt_compat	= kzm9g_boards_compat_dt,
 MACHINE_END
diff --git a/arch/arm/mach-shmobile/board-kzm9g.c b/arch/arm/mach-shmobile/board-kzm9g.c
index 1068120..f199496 100644
--- a/arch/arm/mach-shmobile/board-kzm9g.c
+++ b/arch/arm/mach-shmobile/board-kzm9g.c
@@ -54,14 +54,14 @@
 /*
  * external GPIO
  */
-#define GPIO_PCF8575_BASE	(GPIO_NR)
-#define GPIO_PCF8575_PORT10	(GPIO_NR + 8)
-#define GPIO_PCF8575_PORT11	(GPIO_NR + 9)
-#define GPIO_PCF8575_PORT12	(GPIO_NR + 10)
-#define GPIO_PCF8575_PORT13	(GPIO_NR + 11)
-#define GPIO_PCF8575_PORT14	(GPIO_NR + 12)
-#define GPIO_PCF8575_PORT15	(GPIO_NR + 13)
-#define GPIO_PCF8575_PORT16	(GPIO_NR + 14)
+#define GPIO_PCF8575_BASE	(310)
+#define GPIO_PCF8575_PORT10	(GPIO_PCF8575_BASE + 8)
+#define GPIO_PCF8575_PORT11	(GPIO_PCF8575_BASE + 9)
+#define GPIO_PCF8575_PORT12	(GPIO_PCF8575_BASE + 10)
+#define GPIO_PCF8575_PORT13	(GPIO_PCF8575_BASE + 11)
+#define GPIO_PCF8575_PORT14	(GPIO_PCF8575_BASE + 12)
+#define GPIO_PCF8575_PORT15	(GPIO_PCF8575_BASE + 13)
+#define GPIO_PCF8575_PORT16	(GPIO_PCF8575_BASE + 14)
 
 /* Dummy supplies, where voltage doesn't matter */
 static struct regulator_consumer_supply dummy_supplies[] = {
diff --git a/arch/arm/mach-shmobile/board-lager-reference.c b/arch/arm/mach-shmobile/board-lager-reference.c
new file mode 100644
index 0000000..9c316a1
--- /dev/null
+++ b/arch/arm/mach-shmobile/board-lager-reference.c
@@ -0,0 +1,45 @@
+/*
+ * Lager board support - Reference DT implementation
+ *
+ * Copyright (C) 2013  Renesas Solutions Corp.
+ * Copyright (C) 2013  Simon Horman
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+#include <mach/r8a7790.h>
+#include <asm/mach/arch.h>
+
+static void __init lager_add_standard_devices(void)
+{
+	/* clocks are setup late during boot in the case of DT */
+	r8a7790_clock_init();
+
+	r8a7790_add_dt_devices();
+        of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
+}
+
+static const char *lager_boards_compat_dt[] __initdata = {
+	"renesas,lager-reference",
+	NULL,
+};
+
+DT_MACHINE_START(LAGER_DT, "lager")
+	.init_early	= r8a7790_init_delay,
+	.init_machine	= lager_add_standard_devices,
+	.init_time	= r8a7790_timer_init,
+	.dt_compat	= lager_boards_compat_dt,
+MACHINE_END
diff --git a/arch/arm/mach-shmobile/board-marzen-reference.c b/arch/arm/mach-shmobile/board-marzen-reference.c
index 3d1c439..3f4250a 100644
--- a/arch/arm/mach-shmobile/board-marzen-reference.c
+++ b/arch/arm/mach-shmobile/board-marzen-reference.c
@@ -42,6 +42,5 @@
 	.nr_irqs	= NR_IRQS_LEGACY,
 	.init_irq	= r8a7779_init_irq_dt,
 	.init_machine	= marzen_init,
-	.init_time	= shmobile_timer_init,
 	.dt_compat	= marzen_boards_compat_dt,
 MACHINE_END
diff --git a/arch/arm/mach-shmobile/board-marzen.c b/arch/arm/mach-shmobile/board-marzen.c
index ca7fb2e..3f5044f 100644
--- a/arch/arm/mach-shmobile/board-marzen.c
+++ b/arch/arm/mach-shmobile/board-marzen.c
@@ -30,6 +30,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/pinctrl/machine.h>
 #include <linux/platform_data/gpio-rcar.h>
+#include <linux/platform_data/usb-rcar-phy.h>
 #include <linux/regulator/fixed.h>
 #include <linux/regulator/machine.h>
 #include <linux/smsc911x.h>
@@ -39,7 +40,6 @@
 #include <linux/mmc/sh_mobile_sdhi.h>
 #include <linux/mfd/tmio.h>
 #include <media/soc_camera.h>
-#include <mach/hardware.h>
 #include <mach/r8a7779.h>
 #include <mach/common.h>
 #include <mach/irqs.h>
@@ -59,7 +59,26 @@
 	REGULATOR_SUPPLY("vdd33a", "smsc911x"),
 };
 
-static struct rcar_phy_platform_data usb_phy_platform_data __initdata;
+/* USB PHY */
+static struct resource usb_phy_resources[] = {
+	[0] = {
+		.start		= 0xffe70800,
+		.end		= 0xffe70900 - 1,
+		.flags		= IORESOURCE_MEM,
+	},
+};
+
+static struct rcar_phy_platform_data usb_phy_platform_data;
+
+static struct platform_device usb_phy = {
+	.name		= "rcar_usb_phy",
+	.id		= -1,
+	.dev  = {
+		.platform_data = &usb_phy_platform_data,
+	},
+	.resource	= usb_phy_resources,
+	.num_resources	= ARRAY_SIZE(usb_phy_resources),
+};
 
 /* SMSC LAN89218 */
 static struct resource smsc911x_resources[] = {
@@ -212,6 +231,7 @@
 	&thermal_device,
 	&hspi_device,
 	&leds_device,
+	&usb_phy,
 	&camera0_device,
 	&camera1_device,
 };
@@ -274,19 +294,23 @@
 	r8a7779_init_irq_extpin(1); /* IRQ1 as individual interrupt */
 
 	r8a7779_add_standard_devices();
-	r8a7779_add_usb_phy_device(&usb_phy_platform_data);
 	r8a7779_add_vin_device(1, &vin_platform_data);
 	r8a7779_add_vin_device(3, &vin_platform_data);
 	platform_add_devices(marzen_devices, ARRAY_SIZE(marzen_devices));
 }
 
-MACHINE_START(MARZEN, "marzen")
+static const char *marzen_boards_compat_dt[] __initdata = {
+        "renesas,marzen",
+        NULL,
+};
+
+DT_MACHINE_START(MARZEN, "marzen")
 	.smp		= smp_ops(r8a7779_smp_ops),
 	.map_io		= r8a7779_map_io,
 	.init_early	= r8a7779_add_early_devices,
-	.nr_irqs	= NR_IRQS_LEGACY,
-	.init_irq	= r8a7779_init_irq,
+	.init_irq	= r8a7779_init_irq_dt,
 	.init_machine	= marzen_init,
 	.init_late	= r8a7779_init_late,
+	.dt_compat	= marzen_boards_compat_dt,
 	.init_time	= r8a7779_earlytimer_init,
 MACHINE_END
diff --git a/arch/arm/mach-shmobile/headsmp.S b/arch/arm/mach-shmobile/headsmp.S
index 2667db8..f93751c 100644
--- a/arch/arm/mach-shmobile/headsmp.S
+++ b/arch/arm/mach-shmobile/headsmp.S
@@ -40,3 +40,52 @@
 	.globl	shmobile_boot_arg
 shmobile_boot_arg:
 2:	.space	4
+
+/*
+ * Per-CPU SMP boot function/argument selection code based on MPIDR
+ */
+
+ENTRY(shmobile_smp_boot)
+						@ r0 = MPIDR_HWID_BITMASK
+	mrc	p15, 0, r1, c0, c0, 5		@ r1 = MPIDR
+	and	r0, r1, r0			@ r0 = cpu_logical_map() value
+	mov	r1, #0				@ r1 = CPU index
+	adr	r5, 1f				@ array of per-cpu mpidr values
+	adr	r6, 2f				@ array of per-cpu functions
+	adr	r7, 3f				@ array of per-cpu arguments
+
+shmobile_smp_boot_find_mpidr:
+	ldr	r8, [r5, r1, lsl #2]
+	cmp	r8, r0
+	bne	shmobile_smp_boot_next
+
+	ldr	r9, [r6, r1, lsl #2]
+	cmp	r9, #0
+	bne	shmobile_smp_boot_found
+
+shmobile_smp_boot_next:
+	add	r1, r1, #1
+	cmp	r1, #CONFIG_NR_CPUS
+	blo	shmobile_smp_boot_find_mpidr
+
+	b	shmobile_smp_sleep
+
+shmobile_smp_boot_found:
+	ldr	r0, [r7, r1, lsl #2]
+	mov	pc, r9
+ENDPROC(shmobile_smp_boot)
+
+ENTRY(shmobile_smp_sleep)
+	wfi
+	b	shmobile_smp_boot
+ENDPROC(shmobile_smp_sleep)
+
+	.globl	shmobile_smp_mpidr
+shmobile_smp_mpidr:
+1:	.space	CONFIG_NR_CPUS * 4
+	.globl	shmobile_smp_fn
+shmobile_smp_fn:
+2:	.space	CONFIG_NR_CPUS * 4
+	.globl	shmobile_smp_arg
+shmobile_smp_arg:
+3:	.space	CONFIG_NR_CPUS * 4
diff --git a/arch/arm/mach-shmobile/include/mach/common.h b/arch/arm/mach-shmobile/include/mach/common.h
index e818f02..7b93868 100644
--- a/arch/arm/mach-shmobile/include/mach/common.h
+++ b/arch/arm/mach-shmobile/include/mach/common.h
@@ -2,7 +2,6 @@
 #define __ARCH_MACH_COMMON_H
 
 extern void shmobile_earlytimer_init(void);
-extern void shmobile_timer_init(void);
 extern void shmobile_setup_delay(unsigned int max_cpu_core_mhz,
 			 unsigned int mult, unsigned int div);
 struct twd_local_timer;
@@ -10,7 +9,16 @@
 extern void shmobile_boot_vector(void);
 extern unsigned long shmobile_boot_fn;
 extern unsigned long shmobile_boot_arg;
+extern void shmobile_smp_boot(void);
+extern void shmobile_smp_sleep(void);
+extern void shmobile_smp_hook(unsigned int cpu, unsigned long fn,
+			      unsigned long arg);
 extern void shmobile_boot_scu(void);
+extern void shmobile_smp_scu_prepare_cpus(unsigned int max_cpus);
+extern int shmobile_smp_scu_boot_secondary(unsigned int cpu,
+					   struct task_struct *idle);
+extern void shmobile_smp_scu_cpu_die(unsigned int cpu);
+extern int shmobile_smp_scu_cpu_kill(unsigned int cpu);
 struct clk;
 extern int shmobile_clk_init(void);
 extern void shmobile_handle_irq_intc(struct pt_regs *);
diff --git a/arch/arm/mach-shmobile/include/mach/hardware.h b/arch/arm/mach-shmobile/include/mach/hardware.h
deleted file mode 100644
index 99264a5..0000000
--- a/arch/arm/mach-shmobile/include/mach/hardware.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __ASM_MACH_HARDWARE_H
-#define __ASM_MACH_HARDWARE_H
-
-#endif /* __ASM_MACH_HARDWARE_H */
diff --git a/arch/arm/mach-shmobile/include/mach/r8a73a4.h b/arch/arm/mach-shmobile/include/mach/r8a73a4.h
index 144a85e..f3a9b70 100644
--- a/arch/arm/mach-shmobile/include/mach/r8a73a4.h
+++ b/arch/arm/mach-shmobile/include/mach/r8a73a4.h
@@ -2,6 +2,7 @@
 #define __ASM_R8A73A4_H__
 
 void r8a73a4_add_standard_devices(void);
+void r8a73a4_add_dt_devices(void);
 void r8a73a4_clock_init(void);
 void r8a73a4_pinmux_init(void);
 void r8a73a4_init_delay(void);
diff --git a/arch/arm/mach-shmobile/include/mach/r8a7740.h b/arch/arm/mach-shmobile/include/mach/r8a7740.h
index 56f3750..d07932f 100644
--- a/arch/arm/mach-shmobile/include/mach/r8a7740.h
+++ b/arch/arm/mach-shmobile/include/mach/r8a7740.h
@@ -48,7 +48,6 @@
 
 extern void r8a7740_meram_workaround(void);
 extern void r8a7740_init_delay(void);
-extern void r8a7740_init_irq(void);
 extern void r8a7740_init_irq_of(void);
 extern void r8a7740_map_io(void);
 extern void r8a7740_add_early_devices(void);
diff --git a/arch/arm/mach-shmobile/include/mach/r8a7778.h b/arch/arm/mach-shmobile/include/mach/r8a7778.h
index 2866704..adfcf51 100644
--- a/arch/arm/mach-shmobile/include/mach/r8a7778.h
+++ b/arch/arm/mach-shmobile/include/mach/r8a7778.h
@@ -18,21 +18,15 @@
 #ifndef __ASM_R8A7778_H__
 #define __ASM_R8A7778_H__
 
-#include <linux/mmc/sh_mmcif.h>
-#include <linux/mmc/sh_mobile_sdhi.h>
 #include <linux/sh_eth.h>
-#include <linux/platform_data/usb-rcar-phy.h>
 #include <linux/platform_data/camera-rcar.h>
 
 extern void r8a7778_add_standard_devices(void);
 extern void r8a7778_add_standard_devices_dt(void);
 extern void r8a7778_add_ether_device(struct sh_eth_plat_data *pdata);
-extern void r8a7778_add_usb_phy_device(struct rcar_phy_platform_data *pdata);
-extern void r8a7778_add_i2c_device(int id);
-extern void r8a7778_add_hspi_device(int id);
-extern void r8a7778_add_mmc_device(struct sh_mmcif_plat_data *info);
 extern void r8a7778_add_vin_device(int id,
 				   struct rcar_vin_platform_data *pdata);
+extern void r8a7778_add_dt_devices(void);
 
 extern void r8a7778_init_late(void);
 extern void r8a7778_init_delay(void);
@@ -40,6 +34,5 @@
 extern void r8a7778_clock_init(void);
 extern void r8a7778_init_irq_extpin(int irlm);
 extern void r8a7778_pinmux_init(void);
-extern void r8a7778_sdhi_init(int id, struct sh_mobile_sdhi_info *info);
 
 #endif /* __ASM_R8A7778_H__ */
diff --git a/arch/arm/mach-shmobile/include/mach/r8a7779.h b/arch/arm/mach-shmobile/include/mach/r8a7779.h
index 6d2b641..11c7400 100644
--- a/arch/arm/mach-shmobile/include/mach/r8a7779.h
+++ b/arch/arm/mach-shmobile/include/mach/r8a7779.h
@@ -4,7 +4,6 @@
 #include <linux/sh_clk.h>
 #include <linux/pm_domain.h>
 #include <linux/sh_eth.h>
-#include <linux/platform_data/usb-rcar-phy.h>
 #include <linux/platform_data/camera-rcar.h>
 
 struct platform_device;
@@ -26,7 +25,6 @@
 }
 
 extern void r8a7779_init_delay(void);
-extern void r8a7779_init_irq(void);
 extern void r8a7779_init_irq_extpin(int irlm);
 extern void r8a7779_init_irq_dt(void);
 extern void r8a7779_map_io(void);
@@ -35,7 +33,6 @@
 extern void r8a7779_add_standard_devices(void);
 extern void r8a7779_add_standard_devices_dt(void);
 extern void r8a7779_add_ether_device(struct sh_eth_plat_data *pdata);
-extern void r8a7779_add_usb_phy_device(struct rcar_phy_platform_data *pdata);
 extern void r8a7779_add_vin_device(int idx,
 				   struct rcar_vin_platform_data *pdata);
 extern void r8a7779_init_late(void);
diff --git a/arch/arm/mach-shmobile/include/mach/r8a7790.h b/arch/arm/mach-shmobile/include/mach/r8a7790.h
index 7aaef40..788d559 100644
--- a/arch/arm/mach-shmobile/include/mach/r8a7790.h
+++ b/arch/arm/mach-shmobile/include/mach/r8a7790.h
@@ -2,6 +2,7 @@
 #define __ASM_R8A7790_H__
 
 void r8a7790_add_standard_devices(void);
+void r8a7790_add_dt_devices(void);
 void r8a7790_clock_init(void);
 void r8a7790_pinmux_init(void);
 void r8a7790_init_delay(void);
diff --git a/arch/arm/mach-shmobile/include/mach/sh73a0.h b/arch/arm/mach-shmobile/include/mach/sh73a0.h
index 680dc5f..359b582 100644
--- a/arch/arm/mach-shmobile/include/mach/sh73a0.h
+++ b/arch/arm/mach-shmobile/include/mach/sh73a0.h
@@ -1,8 +1,6 @@
 #ifndef __ASM_SH73A0_H__
 #define __ASM_SH73A0_H__
 
-#define GPIO_NR			310
-
 /* DMA slave IDs */
 enum {
 	SHDMA_SLAVE_INVALID,
diff --git a/arch/arm/mach-shmobile/intc-r8a7740.c b/arch/arm/mach-shmobile/intc-r8a7740.c
deleted file mode 100644
index 8871f77..0000000
--- a/arch/arm/mach-shmobile/intc-r8a7740.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * R8A7740 processor support
- *
- * Copyright (C) 2011  Renesas Solutions Corp.
- * Copyright (C) 2011  Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/irqchip.h>
-#include <linux/irqchip/arm-gic.h>
-
-static void __init r8a7740_init_irq_common(void)
-{
-	void __iomem *intc_prio_base = ioremap_nocache(0xe6900010, 0x10);
-	void __iomem *intc_msk_base = ioremap_nocache(0xe6900040, 0x10);
-	void __iomem *pfc_inta_ctrl = ioremap_nocache(0xe605807c, 0x4);
-
-	/* route signals to GIC */
-	iowrite32(0x0, pfc_inta_ctrl);
-
-	/*
-	 * To mask the shared interrupt to SPI 149 we must ensure to set
-	 * PRIO *and* MASK. Else we run into IRQ floods when registering
-	 * the intc_irqpin devices
-	 */
-	iowrite32(0x0, intc_prio_base + 0x0);
-	iowrite32(0x0, intc_prio_base + 0x4);
-	iowrite32(0x0, intc_prio_base + 0x8);
-	iowrite32(0x0, intc_prio_base + 0xc);
-	iowrite8(0xff, intc_msk_base + 0x0);
-	iowrite8(0xff, intc_msk_base + 0x4);
-	iowrite8(0xff, intc_msk_base + 0x8);
-	iowrite8(0xff, intc_msk_base + 0xc);
-
-	iounmap(intc_prio_base);
-	iounmap(intc_msk_base);
-	iounmap(pfc_inta_ctrl);
-}
-
-void __init r8a7740_init_irq_of(void)
-{
-	irqchip_init();
-	r8a7740_init_irq_common();
-}
-
-void __init r8a7740_init_irq(void)
-{
-	void __iomem *gic_dist_base = ioremap_nocache(0xc2800000, 0x1000);
-	void __iomem *gic_cpu_base = ioremap_nocache(0xc2000000, 0x1000);
-
-	/* initialize the Generic Interrupt Controller PL390 r0p0 */
-	gic_init(0, 29, gic_dist_base, gic_cpu_base);
-	r8a7740_init_irq_common();
-}
diff --git a/arch/arm/mach-shmobile/intc-r8a7779.c b/arch/arm/mach-shmobile/intc-r8a7779.c
deleted file mode 100644
index b86dc89..0000000
--- a/arch/arm/mach-shmobile/intc-r8a7779.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * r8a7779 processor support - INTC hardware block
- *
- * Copyright (C) 2011  Renesas Solutions Corp.
- * Copyright (C) 2011  Magnus Damm
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/platform_device.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/io.h>
-#include <linux/irqchip/arm-gic.h>
-#include <linux/platform_data/irq-renesas-intc-irqpin.h>
-#include <linux/irqchip.h>
-#include <mach/common.h>
-#include <mach/intc.h>
-#include <mach/irqs.h>
-#include <mach/r8a7779.h>
-#include <asm/mach-types.h>
-#include <asm/mach/arch.h>
-
-#define INT2SMSKCR0 IOMEM(0xfe7822a0)
-#define INT2SMSKCR1 IOMEM(0xfe7822a4)
-#define INT2SMSKCR2 IOMEM(0xfe7822a8)
-#define INT2SMSKCR3 IOMEM(0xfe7822ac)
-#define INT2SMSKCR4 IOMEM(0xfe7822b0)
-
-#define INT2NTSR0 IOMEM(0xfe700060)
-#define INT2NTSR1 IOMEM(0xfe700064)
-
-static struct renesas_intc_irqpin_config irqpin0_platform_data = {
-	.irq_base = irq_pin(0), /* IRQ0 -> IRQ3 */
-	.sense_bitfield_width = 2,
-};
-
-static struct resource irqpin0_resources[] = {
-	DEFINE_RES_MEM(0xfe78001c, 4), /* ICR1 */
-	DEFINE_RES_MEM(0xfe780010, 4), /* INTPRI */
-	DEFINE_RES_MEM(0xfe780024, 4), /* INTREQ */
-	DEFINE_RES_MEM(0xfe780044, 4), /* INTMSK0 */
-	DEFINE_RES_MEM(0xfe780064, 4), /* INTMSKCLR0 */
-	DEFINE_RES_IRQ(gic_spi(27)), /* IRQ0 */
-	DEFINE_RES_IRQ(gic_spi(28)), /* IRQ1 */
-	DEFINE_RES_IRQ(gic_spi(29)), /* IRQ2 */
-	DEFINE_RES_IRQ(gic_spi(30)), /* IRQ3 */
-};
-
-static struct platform_device irqpin0_device = {
-	.name		= "renesas_intc_irqpin",
-	.id		= 0,
-	.resource	= irqpin0_resources,
-	.num_resources	= ARRAY_SIZE(irqpin0_resources),
-	.dev		= {
-		.platform_data	= &irqpin0_platform_data,
-	},
-};
-
-void __init r8a7779_init_irq_extpin(int irlm)
-{
-	void __iomem *icr0 = ioremap_nocache(0xfe780000, PAGE_SIZE);
-	unsigned long tmp;
-
-	if (icr0) {
-		tmp = ioread32(icr0);
-		if (irlm)
-			tmp |= 1 << 23; /* IRQ0 -> IRQ3 as individual pins */
-		else
-			tmp &= ~(1 << 23); /* IRL mode - not supported */
-		tmp |= (1 << 21); /* LVLMODE = 1 */
-		iowrite32(tmp, icr0);
-		iounmap(icr0);
-
-		if (irlm)
-			platform_device_register(&irqpin0_device);
-	} else
-		pr_warn("r8a7779: unable to setup external irq pin mode\n");
-}
-
-static int r8a7779_set_wake(struct irq_data *data, unsigned int on)
-{
-	return 0; /* always allow wakeup */
-}
-
-static void __init r8a7779_init_irq_common(void)
-{
-	gic_arch_extn.irq_set_wake = r8a7779_set_wake;
-
-	/* route all interrupts to ARM */
-	__raw_writel(0xffffffff, INT2NTSR0);
-	__raw_writel(0x3fffffff, INT2NTSR1);
-
-	/* unmask all known interrupts in INTCS2 */
-	__raw_writel(0xfffffff0, INT2SMSKCR0);
-	__raw_writel(0xfff7ffff, INT2SMSKCR1);
-	__raw_writel(0xfffbffdf, INT2SMSKCR2);
-	__raw_writel(0xbffffffc, INT2SMSKCR3);
-	__raw_writel(0x003fee3f, INT2SMSKCR4);
-}
-
-void __init r8a7779_init_irq(void)
-{
-	void __iomem *gic_dist_base = IOMEM(0xf0001000);
-	void __iomem *gic_cpu_base = IOMEM(0xf0000100);
-
-	/* use GIC to handle interrupts */
-	gic_init(0, 29, gic_dist_base, gic_cpu_base);
-
-	r8a7779_init_irq_common();
-}
-
-#ifdef CONFIG_OF
-void __init r8a7779_init_irq_dt(void)
-{
-	irqchip_init();
-	r8a7779_init_irq_common();
-}
-#endif
diff --git a/arch/arm/mach-shmobile/platsmp-scu.c b/arch/arm/mach-shmobile/platsmp-scu.c
new file mode 100644
index 0000000..c96f501
--- /dev/null
+++ b/arch/arm/mach-shmobile/platsmp-scu.c
@@ -0,0 +1,81 @@
+/*
+ * SMP support for SoCs with SCU covered by mach-shmobile
+ *
+ * Copyright (C) 2013  Magnus Damm
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/smp.h>
+#include <asm/cacheflush.h>
+#include <asm/smp_plat.h>
+#include <asm/smp_scu.h>
+#include <mach/common.h>
+
+void __init shmobile_smp_scu_prepare_cpus(unsigned int max_cpus)
+{
+	/* install boot code shared by all CPUs */
+	shmobile_boot_fn = virt_to_phys(shmobile_smp_boot);
+	shmobile_boot_arg = MPIDR_HWID_BITMASK;
+
+	/* enable SCU and cache coherency on booting CPU */
+	scu_enable(shmobile_scu_base);
+	scu_power_mode(shmobile_scu_base, SCU_PM_NORMAL);
+}
+
+int shmobile_smp_scu_boot_secondary(unsigned int cpu, struct task_struct *idle)
+{
+	/* For this particular CPU register SCU boot vector */
+	shmobile_smp_hook(cpu, virt_to_phys(shmobile_boot_scu),
+			  (unsigned long)shmobile_scu_base);
+	return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void shmobile_smp_scu_cpu_die(unsigned int cpu)
+{
+	/* For this particular CPU deregister boot vector */
+	shmobile_smp_hook(cpu, 0, 0);
+
+	dsb();
+	flush_cache_all();
+
+	/* disable cache coherency */
+	scu_power_mode(shmobile_scu_base, SCU_PM_POWEROFF);
+
+	/* jump to shared mach-shmobile sleep / reset code */
+	shmobile_smp_sleep();
+}
+
+static int shmobile_smp_scu_psr_core_disabled(int cpu)
+{
+	unsigned long mask = SCU_PM_POWEROFF << (cpu * 8);
+
+	if ((__raw_readl(shmobile_scu_base + 8) & mask) == mask)
+		return 1;
+
+	return 0;
+}
+
+int shmobile_smp_scu_cpu_kill(unsigned int cpu)
+{
+	int k;
+
+	/* this function is running on another CPU than the offline target,
+	 * here we need wait for shutdown code in platform_cpu_die() to
+	 * finish before asking SoC-specific code to power off the CPU core.
+	 */
+	for (k = 0; k < 1000; k++) {
+		if (shmobile_smp_scu_psr_core_disabled(cpu))
+			return 1;
+
+		mdelay(1);
+	}
+
+	return 0;
+}
+#endif
diff --git a/arch/arm/mach-shmobile/platsmp.c b/arch/arm/mach-shmobile/platsmp.c
index 1f958d7..d4ae616 100644
--- a/arch/arm/mach-shmobile/platsmp.c
+++ b/arch/arm/mach-shmobile/platsmp.c
@@ -12,6 +12,9 @@
  */
 #include <linux/init.h>
 #include <linux/smp.h>
+#include <asm/cacheflush.h>
+#include <asm/smp_plat.h>
+#include <mach/common.h>
 
 void __init shmobile_smp_init_cpus(unsigned int ncores)
 {
@@ -26,3 +29,18 @@
 	for (i = 0; i < ncores; i++)
 		set_cpu_possible(i, true);
 }
+
+extern unsigned long shmobile_smp_fn[];
+extern unsigned long shmobile_smp_arg[];
+extern unsigned long shmobile_smp_mpidr[];
+
+void shmobile_smp_hook(unsigned int cpu, unsigned long fn, unsigned long arg)
+{
+	shmobile_smp_fn[cpu] = 0;
+	flush_cache_all();
+
+	shmobile_smp_mpidr[cpu] = cpu_logical_map(cpu);
+	shmobile_smp_fn[cpu] = fn;
+	shmobile_smp_arg[cpu] = arg;
+	flush_cache_all();
+}
diff --git a/arch/arm/mach-shmobile/setup-emev2.c b/arch/arm/mach-shmobile/setup-emev2.c
index 1553af8..3ad531c 100644
--- a/arch/arm/mach-shmobile/setup-emev2.c
+++ b/arch/arm/mach-shmobile/setup-emev2.c
@@ -27,7 +27,6 @@
 #include <linux/input.h>
 #include <linux/io.h>
 #include <linux/irqchip/arm-gic.h>
-#include <mach/hardware.h>
 #include <mach/common.h>
 #include <mach/emev2.h>
 #include <mach/irqs.h>
diff --git a/arch/arm/mach-shmobile/setup-r8a73a4.c b/arch/arm/mach-shmobile/setup-r8a73a4.c
index d533bd2..89491700 100644
--- a/arch/arm/mach-shmobile/setup-r8a73a4.c
+++ b/arch/arm/mach-shmobile/setup-r8a73a4.c
@@ -188,7 +188,7 @@
 					  &cmt##idx##_platform_data,	\
 					  sizeof(struct sh_timer_config))
 
-void __init r8a73a4_add_standard_devices(void)
+void __init r8a73a4_add_dt_devices(void)
 {
 	r8a73a4_register_scif(SCIFA0);
 	r8a73a4_register_scif(SCIFA1);
@@ -196,10 +196,15 @@
 	r8a73a4_register_scif(SCIFB1);
 	r8a73a4_register_scif(SCIFB2);
 	r8a73a4_register_scif(SCIFB3);
+	r8a7790_register_cmt(10);
+}
+
+void __init r8a73a4_add_standard_devices(void)
+{
+	r8a73a4_add_dt_devices();
 	r8a73a4_register_irqc(0);
 	r8a73a4_register_irqc(1);
 	r8a73a4_register_thermal();
-	r8a7790_register_cmt(10);
 }
 
 void __init r8a73a4_init_delay(void)
@@ -210,11 +215,6 @@
 }
 
 #ifdef CONFIG_USE_OF
-void __init r8a73a4_add_standard_devices_dt(void)
-{
-	platform_device_register_simple("cpufreq-cpu0", -1, NULL, 0);
-	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-}
 
 static const char *r8a73a4_boards_compat_dt[] __initdata = {
 	"renesas,r8a73a4",
@@ -223,8 +223,6 @@
 
 DT_MACHINE_START(R8A73A4_DT, "Generic R8A73A4 (Flattened Device Tree)")
 	.init_early	= r8a73a4_init_delay,
-	.init_machine	= r8a73a4_add_standard_devices_dt,
-	.init_time	= shmobile_timer_init,
 	.dt_compat	= r8a73a4_boards_compat_dt,
 MACHINE_END
 #endif /* CONFIG_USE_OF */
diff --git a/arch/arm/mach-shmobile/setup-r8a7740.c b/arch/arm/mach-shmobile/setup-r8a7740.c
index 84c5bb6..b7d4b2c 100644
--- a/arch/arm/mach-shmobile/setup-r8a7740.c
+++ b/arch/arm/mach-shmobile/setup-r8a7740.c
@@ -22,6 +22,8 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/io.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/arm-gic.h>
 #include <linux/platform_data/irq-renesas-intc-irqpin.h>
 #include <linux/platform_device.h>
 #include <linux/of_platform.h>
@@ -1019,6 +1021,36 @@
 	shmobile_setup_delay(800, 1, 3); /* Cortex-A9 @ 800MHz */
 };
 
+void __init r8a7740_init_irq_of(void)
+{
+	void __iomem *intc_prio_base = ioremap_nocache(0xe6900010, 0x10);
+	void __iomem *intc_msk_base = ioremap_nocache(0xe6900040, 0x10);
+	void __iomem *pfc_inta_ctrl = ioremap_nocache(0xe605807c, 0x4);
+
+	irqchip_init();
+
+	/* route signals to GIC */
+	iowrite32(0x0, pfc_inta_ctrl);
+
+	/*
+	 * To mask the shared interrupt to SPI 149 we must ensure to set
+	 * PRIO *and* MASK. Else we run into IRQ floods when registering
+	 * the intc_irqpin devices
+	 */
+	iowrite32(0x0, intc_prio_base + 0x0);
+	iowrite32(0x0, intc_prio_base + 0x4);
+	iowrite32(0x0, intc_prio_base + 0x8);
+	iowrite32(0x0, intc_prio_base + 0xc);
+	iowrite8(0xff, intc_msk_base + 0x0);
+	iowrite8(0xff, intc_msk_base + 0x4);
+	iowrite8(0xff, intc_msk_base + 0x8);
+	iowrite8(0xff, intc_msk_base + 0xc);
+
+	iounmap(intc_prio_base);
+	iounmap(intc_msk_base);
+	iounmap(pfc_inta_ctrl);
+}
+
 static void __init r8a7740_generic_init(void)
 {
 	r8a7740_clock_init(0);
@@ -1035,7 +1067,6 @@
 	.init_early	= r8a7740_init_delay,
 	.init_irq	= r8a7740_init_irq_of,
 	.init_machine	= r8a7740_generic_init,
-	.init_time	= shmobile_timer_init,
 	.dt_compat	= r8a7740_boards_compat_dt,
 MACHINE_END
 
diff --git a/arch/arm/mach-shmobile/setup-r8a7778.c b/arch/arm/mach-shmobile/setup-r8a7778.c
index 203becf..6a2657e 100644
--- a/arch/arm/mach-shmobile/setup-r8a7778.c
+++ b/arch/arm/mach-shmobile/setup-r8a7778.c
@@ -95,20 +95,6 @@
 		&sh_tmu##idx##_platform_data,		\
 		sizeof(sh_tmu##idx##_platform_data))
 
-/* USB PHY */
-static struct resource usb_phy_resources[] __initdata = {
-	DEFINE_RES_MEM(0xffe70800, 0x100),
-	DEFINE_RES_MEM(0xffe76000, 0x100),
-};
-
-void __init r8a7778_add_usb_phy_device(struct rcar_phy_platform_data *pdata)
-{
-	platform_device_register_resndata(&platform_bus, "rcar_usb_phy", -1,
-					  usb_phy_resources,
-					  ARRAY_SIZE(usb_phy_resources),
-					  pdata, sizeof(*pdata));
-}
-
 /* USB */
 static struct usb_phy *phy;
 
@@ -248,30 +234,6 @@
 	r8a7778_register_gpio(4);
 };
 
-/* SDHI */
-static struct resource sdhi_resources[] __initdata = {
-	/* SDHI0 */
-	DEFINE_RES_MEM(0xFFE4C000, 0x100),
-	DEFINE_RES_IRQ(gic_iid(0x77)),
-	/* SDHI1 */
-	DEFINE_RES_MEM(0xFFE4D000, 0x100),
-	DEFINE_RES_IRQ(gic_iid(0x78)),
-	/* SDHI2 */
-	DEFINE_RES_MEM(0xFFE4F000, 0x100),
-	DEFINE_RES_IRQ(gic_iid(0x76)),
-};
-
-void __init r8a7778_sdhi_init(int id,
-			      struct sh_mobile_sdhi_info *info)
-{
-	BUG_ON(id < 0 || id > 2);
-
-	platform_device_register_resndata(
-		&platform_bus, "sh_mobile_sdhi", id,
-		sdhi_resources + (2 * id), 2,
-		info, sizeof(*info));
-}
-
 /* I2C */
 static struct resource i2c_resources[] __initdata = {
 	/* I2C0 */
@@ -288,7 +250,7 @@
 	DEFINE_RES_IRQ(gic_iid(0x6d)),
 };
 
-void __init r8a7778_add_i2c_device(int id)
+static void __init r8a7778_register_i2c(int id)
 {
 	BUG_ON(id < 0 || id > 3);
 
@@ -310,7 +272,7 @@
 	DEFINE_RES_IRQ(gic_iid(0x75)),
 };
 
-void __init r8a7778_add_hspi_device(int id)
+void __init r8a7778_register_hspi(int id)
 {
 	BUG_ON(id < 0 || id > 2);
 
@@ -319,20 +281,6 @@
 		hspi_resources + (2 * id), 2);
 }
 
-/* MMC */
-static struct resource mmc_resources[] __initdata = {
-	DEFINE_RES_MEM(0xffe4e000, 0x100),
-	DEFINE_RES_IRQ(gic_iid(0x5d)),
-};
-
-void __init r8a7778_add_mmc_device(struct sh_mmcif_plat_data *info)
-{
-	platform_device_register_resndata(
-		&platform_bus, "sh_mmcif", -1,
-		mmc_resources, ARRAY_SIZE(mmc_resources),
-		info, sizeof(*info));
-}
-
 /* VIN */
 #define R8A7778_VIN(idx)						\
 static struct resource vin##idx##_resources[] __initdata = {		\
@@ -367,7 +315,7 @@
 	platform_device_register_full(vin_info_table[id]);
 }
 
-void __init r8a7778_add_standard_devices(void)
+void __init r8a7778_add_dt_devices(void)
 {
 	int i;
 
@@ -391,6 +339,18 @@
 	r8a7778_register_tmu(1);
 }
 
+void __init r8a7778_add_standard_devices(void)
+{
+	r8a7778_add_dt_devices();
+	r8a7778_register_i2c(0);
+	r8a7778_register_i2c(1);
+	r8a7778_register_i2c(2);
+	r8a7778_register_i2c(3);
+	r8a7778_register_hspi(0);
+	r8a7778_register_hspi(1);
+	r8a7778_register_hspi(2);
+}
+
 void __init r8a7778_init_late(void)
 {
 	phy = usb_get_phy(USB_PHY_TYPE_USB2);
@@ -480,7 +440,6 @@
 DT_MACHINE_START(R8A7778_DT, "Generic R8A7778 (Flattened Device Tree)")
 	.init_early	= r8a7778_init_delay,
 	.init_irq	= r8a7778_init_irq_dt,
-	.init_time	= shmobile_timer_init,
 	.dt_compat	= r8a7778_compat_dt,
 	.init_late      = r8a7778_init_late,
 MACHINE_END
diff --git a/arch/arm/mach-shmobile/setup-r8a7779.c b/arch/arm/mach-shmobile/setup-r8a7779.c
index 41bab62..b5b2f78 100644
--- a/arch/arm/mach-shmobile/setup-r8a7779.c
+++ b/arch/arm/mach-shmobile/setup-r8a7779.c
@@ -22,14 +22,16 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/arm-gic.h>
 #include <linux/of_platform.h>
 #include <linux/platform_data/gpio-rcar.h>
+#include <linux/platform_data/irq-renesas-intc-irqpin.h>
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/input.h>
 #include <linux/io.h>
 #include <linux/serial_sci.h>
-#include <linux/sh_intc.h>
 #include <linux/sh_timer.h>
 #include <linux/dma-mapping.h>
 #include <linux/usb/otg.h>
@@ -37,7 +39,6 @@
 #include <linux/usb/ehci_pdriver.h>
 #include <linux/usb/ohci_pdriver.h>
 #include <linux/pm_runtime.h>
-#include <mach/hardware.h>
 #include <mach/irqs.h>
 #include <mach/r8a7779.h>
 #include <mach/common.h>
@@ -69,6 +70,60 @@
 	iotable_init(r8a7779_io_desc, ARRAY_SIZE(r8a7779_io_desc));
 }
 
+/* IRQ */
+#define INT2SMSKCR0 IOMEM(0xfe7822a0)
+#define INT2SMSKCR1 IOMEM(0xfe7822a4)
+#define INT2SMSKCR2 IOMEM(0xfe7822a8)
+#define INT2SMSKCR3 IOMEM(0xfe7822ac)
+#define INT2SMSKCR4 IOMEM(0xfe7822b0)
+
+#define INT2NTSR0 IOMEM(0xfe700060)
+#define INT2NTSR1 IOMEM(0xfe700064)
+
+static struct renesas_intc_irqpin_config irqpin0_platform_data __initdata = {
+	.irq_base = irq_pin(0), /* IRQ0 -> IRQ3 */
+	.sense_bitfield_width = 2,
+};
+
+static struct resource irqpin0_resources[] __initdata = {
+	DEFINE_RES_MEM(0xfe78001c, 4), /* ICR1 */
+	DEFINE_RES_MEM(0xfe780010, 4), /* INTPRI */
+	DEFINE_RES_MEM(0xfe780024, 4), /* INTREQ */
+	DEFINE_RES_MEM(0xfe780044, 4), /* INTMSK0 */
+	DEFINE_RES_MEM(0xfe780064, 4), /* INTMSKCLR0 */
+	DEFINE_RES_IRQ(gic_spi(27)), /* IRQ0 */
+	DEFINE_RES_IRQ(gic_spi(28)), /* IRQ1 */
+	DEFINE_RES_IRQ(gic_spi(29)), /* IRQ2 */
+	DEFINE_RES_IRQ(gic_spi(30)), /* IRQ3 */
+};
+
+void __init r8a7779_init_irq_extpin(int irlm)
+{
+	void __iomem *icr0 = ioremap_nocache(0xfe780000, PAGE_SIZE);
+	u32 tmp;
+
+	if (!icr0) {
+		pr_warn("r8a7779: unable to setup external irq pin mode\n");
+		return;
+	}
+
+	tmp = ioread32(icr0);
+	if (irlm)
+		tmp |= 1 << 23; /* IRQ0 -> IRQ3 as individual pins */
+	else
+		tmp &= ~(1 << 23); /* IRL mode - not supported */
+	tmp |= (1 << 21); /* LVLMODE = 1 */
+	iowrite32(tmp, icr0);
+	iounmap(icr0);
+
+	if (irlm)
+		platform_device_register_resndata(
+			&platform_bus, "renesas_intc_irqpin", -1,
+			irqpin0_resources, ARRAY_SIZE(irqpin0_resources),
+			&irqpin0_platform_data, sizeof(irqpin0_platform_data));
+}
+
+/* PFC/GPIO */
 static struct resource r8a7779_pfc_resources[] = {
 	DEFINE_RES_MEM(0xfffc0000, 0x023c),
 };
@@ -388,15 +443,6 @@
 	},
 };
 
-/* USB PHY */
-static struct resource usb_phy_resources[] __initdata = {
-	[0] = {
-		.start		= 0xffe70800,
-		.end		= 0xffe70900 - 1,
-		.flags		= IORESOURCE_MEM,
-	},
-};
-
 /* USB */
 static struct usb_phy *phy;
 
@@ -548,7 +594,7 @@
 };
 
 /* Ether */
-static struct resource ether_resources[] = {
+static struct resource ether_resources[] __initdata = {
 	{
 		.start	= 0xfde00000,
 		.end	= 0xfde003ff,
@@ -629,14 +675,6 @@
 					  pdata, sizeof(*pdata));
 }
 
-void __init r8a7779_add_usb_phy_device(struct rcar_phy_platform_data *pdata)
-{
-	platform_device_register_resndata(&platform_bus, "rcar_usb_phy", -1,
-					  usb_phy_resources,
-					  ARRAY_SIZE(usb_phy_resources),
-					  pdata, sizeof(*pdata));
-}
-
 void __init r8a7779_add_vin_device(int id, struct rcar_vin_platform_data *pdata)
 {
 	BUG_ON(id < 0 || id > 3);
@@ -697,6 +735,29 @@
 }
 
 #ifdef CONFIG_USE_OF
+static int r8a7779_set_wake(struct irq_data *data, unsigned int on)
+{
+	return 0; /* always allow wakeup */
+}
+
+void __init r8a7779_init_irq_dt(void)
+{
+	gic_arch_extn.irq_set_wake = r8a7779_set_wake;
+
+	irqchip_init();
+
+	/* route all interrupts to ARM */
+	__raw_writel(0xffffffff, INT2NTSR0);
+	__raw_writel(0x3fffffff, INT2NTSR1);
+
+	/* unmask all known interrupts in INTCS2 */
+	__raw_writel(0xfffffff0, INT2SMSKCR0);
+	__raw_writel(0xfff7ffff, INT2SMSKCR1);
+	__raw_writel(0xfffbffdf, INT2SMSKCR2);
+	__raw_writel(0xbffffffc, INT2SMSKCR3);
+	__raw_writel(0x003fee3f, INT2SMSKCR4);
+}
+
 void __init r8a7779_init_delay(void)
 {
 	shmobile_setup_delay(1000, 2, 4); /* Cortex-A9 @ 1000MHz */
@@ -723,7 +784,6 @@
 	.nr_irqs	= NR_IRQS_LEGACY,
 	.init_irq	= r8a7779_init_irq_dt,
 	.init_machine	= r8a7779_add_standard_devices_dt,
-	.init_time	= shmobile_timer_init,
 	.init_late	= r8a7779_init_late,
 	.dt_compat	= r8a7779_compat_dt,
 MACHINE_END
diff --git a/arch/arm/mach-shmobile/setup-r8a7790.c b/arch/arm/mach-shmobile/setup-r8a7790.c
index 4c96dad..d0f5c9f 100644
--- a/arch/arm/mach-shmobile/setup-r8a7790.c
+++ b/arch/arm/mach-shmobile/setup-r8a7790.c
@@ -18,6 +18,7 @@
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
+#include <linux/clocksource.h>
 #include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/of_platform.h>
@@ -160,13 +161,13 @@
 					thermal_resources,		\
 					ARRAY_SIZE(thermal_resources))
 
-static struct sh_timer_config cmt00_platform_data = {
+static struct sh_timer_config cmt00_platform_data __initdata = {
 	.name = "CMT00",
 	.timer_bit = 0,
 	.clockevent_rating = 80,
 };
 
-static struct resource cmt00_resources[] = {
+static struct resource cmt00_resources[] __initdata = {
 	DEFINE_RES_MEM(0xffca0510, 0x0c),
 	DEFINE_RES_MEM(0xffca0500, 0x04),
 	DEFINE_RES_IRQ(gic_spi(142)), /* CMT0_0 */
@@ -179,7 +180,7 @@
 					  &cmt##idx##_platform_data,	\
 					  sizeof(struct sh_timer_config))
 
-void __init r8a7790_add_standard_devices(void)
+void __init r8a7790_add_dt_devices(void)
 {
 	r8a7790_register_scif(SCIFA0);
 	r8a7790_register_scif(SCIFA1);
@@ -191,9 +192,14 @@
 	r8a7790_register_scif(SCIF1);
 	r8a7790_register_scif(HSCIF0);
 	r8a7790_register_scif(HSCIF1);
+	r8a7790_register_cmt(00);
+}
+
+void __init r8a7790_add_standard_devices(void)
+{
+	r8a7790_add_dt_devices();
 	r8a7790_register_irqc(0);
 	r8a7790_register_thermal();
-	r8a7790_register_cmt(00);
 }
 
 #define MODEMR 0xe6160060
@@ -258,7 +264,7 @@
 	iounmap(base);
 #endif /* CONFIG_ARM_ARCH_TIMER */
 
-	shmobile_timer_init();
+	clocksource_of_init();
 }
 
 void __init r8a7790_init_delay(void)
diff --git a/arch/arm/mach-shmobile/setup-sh7372.c b/arch/arm/mach-shmobile/setup-sh7372.c
index 13e6fdb..3118783 100644
--- a/arch/arm/mach-shmobile/setup-sh7372.c
+++ b/arch/arm/mach-shmobile/setup-sh7372.c
@@ -35,7 +35,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/platform_data/sh_ipmmu.h>
 #include <mach/dma-register.h>
-#include <mach/hardware.h>
 #include <mach/irqs.h>
 #include <mach/sh7372.h>
 #include <mach/common.h>
diff --git a/arch/arm/mach-shmobile/setup-sh73a0.c b/arch/arm/mach-shmobile/setup-sh73a0.c
index 516c239..22de174 100644
--- a/arch/arm/mach-shmobile/setup-sh73a0.c
+++ b/arch/arm/mach-shmobile/setup-sh73a0.c
@@ -34,7 +34,6 @@
 #include <linux/platform_data/sh_ipmmu.h>
 #include <linux/platform_data/irq-renesas-intc-irqpin.h>
 #include <mach/dma-register.h>
-#include <mach/hardware.h>
 #include <mach/irqs.h>
 #include <mach/sh73a0.h>
 #include <mach/common.h>
diff --git a/arch/arm/mach-shmobile/smp-emev2.c b/arch/arm/mach-shmobile/smp-emev2.c
index 78e84c5..522de5e 100644
--- a/arch/arm/mach-shmobile/smp-emev2.c
+++ b/arch/arm/mach-shmobile/smp-emev2.c
@@ -34,6 +34,12 @@
 
 static int emev2_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
+	int ret;
+
+	ret = shmobile_smp_scu_boot_secondary(cpu, idle);
+	if (ret)
+		return ret;
+
 	arch_send_wakeup_ipi_mask(cpumask_of(cpu_logical_map(cpu)));
 	return 0;
 }
@@ -42,21 +48,16 @@
 {
 	void __iomem *smu;
 
-	/* setup EMEV2 specific SCU base, enable */
-	shmobile_scu_base = ioremap(EMEV2_SCU_BASE, PAGE_SIZE);
-	scu_enable(shmobile_scu_base);
-
-	/* Tell ROM loader about our vector (in headsmp-scu.S, headsmp.S) */
+	/* Tell ROM loader about our vector (in headsmp.S) */
 	smu = ioremap(EMEV2_SMU_BASE, PAGE_SIZE);
 	if (smu) {
 		iowrite32(__pa(shmobile_boot_vector), smu + SMU_GENERAL_REG0);
 		iounmap(smu);
 	}
-	shmobile_boot_fn = virt_to_phys(shmobile_boot_scu);
-	shmobile_boot_arg = (unsigned long)shmobile_scu_base;
 
-	/* enable cache coherency on booting CPU */
-	scu_power_mode(shmobile_scu_base, SCU_PM_NORMAL);
+	/* setup EMEV2 specific SCU bits */
+	shmobile_scu_base = ioremap(EMEV2_SCU_BASE, PAGE_SIZE);
+	shmobile_smp_scu_prepare_cpus(max_cpus);
 }
 
 struct smp_operations emev2_smp_ops __initdata = {
diff --git a/arch/arm/mach-shmobile/smp-r8a7779.c b/arch/arm/mach-shmobile/smp-r8a7779.c
index 9bdf810..0f05e9f 100644
--- a/arch/arm/mach-shmobile/smp-r8a7779.c
+++ b/arch/arm/mach-shmobile/smp-r8a7779.c
@@ -84,30 +84,34 @@
 static int r8a7779_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
 	struct r8a7779_pm_ch *ch = NULL;
-	int ret = -EIO;
+	unsigned int lcpu = cpu_logical_map(cpu);
+	int ret;
 
-	cpu = cpu_logical_map(cpu);
+	ret = shmobile_smp_scu_boot_secondary(cpu, idle);
+	if (ret)
+		return ret;
 
-	if (cpu < ARRAY_SIZE(r8a7779_ch_cpu))
-		ch = r8a7779_ch_cpu[cpu];
+	if (lcpu < ARRAY_SIZE(r8a7779_ch_cpu))
+		ch = r8a7779_ch_cpu[lcpu];
 
 	if (ch)
 		ret = r8a7779_sysc_power_up(ch);
+	else
+		ret = -EIO;
 
 	return ret;
 }
 
 static void __init r8a7779_smp_prepare_cpus(unsigned int max_cpus)
 {
-	scu_enable(shmobile_scu_base);
-
 	/* Map the reset vector (in headsmp-scu.S, headsmp.S) */
 	__raw_writel(__pa(shmobile_boot_vector), AVECR);
 	shmobile_boot_fn = virt_to_phys(shmobile_boot_scu);
 	shmobile_boot_arg = (unsigned long)shmobile_scu_base;
 
-	/* enable cache coherency on booting CPU */
-	scu_power_mode(shmobile_scu_base, SCU_PM_NORMAL);
+	/* setup r8a7779 specific SCU bits */
+	shmobile_scu_base = IOMEM(R8A7779_SCU_BASE);
+	shmobile_smp_scu_prepare_cpus(max_cpus);
 
 	r8a7779_pm_init();
 
@@ -117,56 +121,15 @@
 	r8a7779_platform_cpu_kill(3);
 }
 
-static void __init r8a7779_smp_init_cpus(void)
-{
-	/* setup r8a7779 specific SCU base */
-	shmobile_scu_base = IOMEM(R8A7779_SCU_BASE);
-
-	shmobile_smp_init_cpus(scu_get_core_count(shmobile_scu_base));
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
-static int r8a7779_scu_psr_core_disabled(int cpu)
-{
-	unsigned long mask = 3 << (cpu * 8);
-
-	if ((__raw_readl(shmobile_scu_base + 8) & mask) == mask)
-		return 1;
-
-	return 0;
-}
-
 static int r8a7779_cpu_kill(unsigned int cpu)
 {
-	int k;
-
-	/* this function is running on another CPU than the offline target,
-	 * here we need wait for shutdown code in platform_cpu_die() to
-	 * finish before asking SoC-specific code to power off the CPU core.
-	 */
-	for (k = 0; k < 1000; k++) {
-		if (r8a7779_scu_psr_core_disabled(cpu))
-			return r8a7779_platform_cpu_kill(cpu);
-
-		mdelay(1);
-	}
+	if (shmobile_smp_scu_cpu_kill(cpu))
+		return r8a7779_platform_cpu_kill(cpu);
 
 	return 0;
 }
 
-static void r8a7779_cpu_die(unsigned int cpu)
-{
-	dsb();
-	flush_cache_all();
-
-	/* disable cache coherency */
-	scu_power_mode(shmobile_scu_base, SCU_PM_POWEROFF);
-
-	/* Endless loop until power off from r8a7779_cpu_kill() */
-	while (1)
-		cpu_do_idle();
-}
-
 static int r8a7779_cpu_disable(unsigned int cpu)
 {
 	/* only CPU1->3 have power domains, do not allow hotplug of CPU0 */
@@ -175,12 +138,11 @@
 #endif /* CONFIG_HOTPLUG_CPU */
 
 struct smp_operations r8a7779_smp_ops  __initdata = {
-	.smp_init_cpus		= r8a7779_smp_init_cpus,
 	.smp_prepare_cpus	= r8a7779_smp_prepare_cpus,
 	.smp_boot_secondary	= r8a7779_boot_secondary,
 #ifdef CONFIG_HOTPLUG_CPU
-	.cpu_kill		= r8a7779_cpu_kill,
-	.cpu_die		= r8a7779_cpu_die,
 	.cpu_disable		= r8a7779_cpu_disable,
+	.cpu_die		= shmobile_smp_scu_cpu_die,
+	.cpu_kill		= r8a7779_cpu_kill,
 #endif
 };
diff --git a/arch/arm/mach-shmobile/smp-sh73a0.c b/arch/arm/mach-shmobile/smp-sh73a0.c
index d5fc3ed..0baa244 100644
--- a/arch/arm/mach-shmobile/smp-sh73a0.c
+++ b/arch/arm/mach-shmobile/smp-sh73a0.c
@@ -20,14 +20,11 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/smp.h>
-#include <linux/spinlock.h>
 #include <linux/io.h>
 #include <linux/delay.h>
 #include <mach/common.h>
-#include <asm/cacheflush.h>
-#include <asm/smp_plat.h>
 #include <mach/sh73a0.h>
-#include <asm/smp_scu.h>
+#include <asm/smp_plat.h>
 #include <asm/smp_twd.h>
 
 #define WUPCR		IOMEM(0xe6151010)
@@ -36,8 +33,6 @@
 #define SBAR		IOMEM(0xe6180020)
 #define APARMBAREA	IOMEM(0xe6f10020)
 
-#define PSTR_SHUTDOWN_MODE	3
-
 #define SH73A0_SCU_BASE 0xf0000000
 
 #ifdef CONFIG_HAVE_ARM_TWD
@@ -50,69 +45,33 @@
 
 static int sh73a0_boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
-	cpu = cpu_logical_map(cpu);
+	unsigned int lcpu = cpu_logical_map(cpu);
+	int ret;
 
-	if (((__raw_readl(PSTR) >> (4 * cpu)) & 3) == 3)
-		__raw_writel(1 << cpu, WUPCR);	/* wake up */
+	ret = shmobile_smp_scu_boot_secondary(cpu, idle);
+	if (ret)
+		return ret;
+
+	if (((__raw_readl(PSTR) >> (4 * lcpu)) & 3) == 3)
+		__raw_writel(1 << lcpu, WUPCR);	/* wake up */
 	else
-		__raw_writel(1 << cpu, SRESCR);	/* reset */
+		__raw_writel(1 << lcpu, SRESCR);	/* reset */
 
 	return 0;
 }
 
 static void __init sh73a0_smp_prepare_cpus(unsigned int max_cpus)
 {
-	scu_enable(shmobile_scu_base);
-
-	/* Map the reset vector (in headsmp-scu.S, headsmp.S) */
+	/* Map the reset vector (in headsmp.S) */
 	__raw_writel(0, APARMBAREA);      /* 4k */
 	__raw_writel(__pa(shmobile_boot_vector), SBAR);
-	shmobile_boot_fn = virt_to_phys(shmobile_boot_scu);
-	shmobile_boot_arg = (unsigned long)shmobile_scu_base;
 
-	/* enable cache coherency on booting CPU */
-	scu_power_mode(shmobile_scu_base, SCU_PM_NORMAL);
-}
-
-static void __init sh73a0_smp_init_cpus(void)
-{
-	/* setup sh73a0 specific SCU base */
+	/* setup sh73a0 specific SCU bits */
 	shmobile_scu_base = IOMEM(SH73A0_SCU_BASE);
-
-	shmobile_smp_init_cpus(scu_get_core_count(shmobile_scu_base));
+	shmobile_smp_scu_prepare_cpus(max_cpus);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static int sh73a0_cpu_kill(unsigned int cpu)
-{
-
-	int k;
-	u32 pstr;
-
-	/*
-	 * wait until the power status register confirms the shutdown of the
-	 * offline target
-	 */
-	for (k = 0; k < 1000; k++) {
-		pstr = (__raw_readl(PSTR) >> (4 * cpu)) & 3;
-		if (pstr == PSTR_SHUTDOWN_MODE)
-			return 1;
-
-		mdelay(1);
-	}
-
-	return 0;
-}
-
-static void sh73a0_cpu_die(unsigned int cpu)
-{
-	/* Set power off mode. This takes the CPU out of the MP cluster */
-	scu_power_mode(shmobile_scu_base, SCU_PM_POWEROFF);
-
-	/* Enter shutdown mode */
-	cpu_do_idle();
-}
-
 static int sh73a0_cpu_disable(unsigned int cpu)
 {
 	return 0; /* CPU0 and CPU1 supported */
@@ -120,12 +79,11 @@
 #endif /* CONFIG_HOTPLUG_CPU */
 
 struct smp_operations sh73a0_smp_ops __initdata = {
-	.smp_init_cpus		= sh73a0_smp_init_cpus,
 	.smp_prepare_cpus	= sh73a0_smp_prepare_cpus,
 	.smp_boot_secondary	= sh73a0_boot_secondary,
 #ifdef CONFIG_HOTPLUG_CPU
-	.cpu_kill		= sh73a0_cpu_kill,
-	.cpu_die		= sh73a0_cpu_die,
 	.cpu_disable		= sh73a0_cpu_disable,
+	.cpu_die		= shmobile_smp_scu_cpu_die,
+	.cpu_kill		= shmobile_smp_scu_cpu_kill,
 #endif
 };
diff --git a/arch/arm/mach-shmobile/timer.c b/arch/arm/mach-shmobile/timer.c
index f321dbe..62d7052 100644
--- a/arch/arm/mach-shmobile/timer.c
+++ b/arch/arm/mach-shmobile/timer.c
@@ -59,7 +59,3 @@
 	late_time_init = shmobile_late_time_init;
 }
 
-void __init shmobile_timer_init(void)
-{
-	clocksource_of_init();
-}
diff --git a/arch/arm/mach-ux500/board-mop500-audio.c b/arch/arm/mach-ux500/board-mop500-audio.c
index bfe443d..ec08072 100644
--- a/arch/arm/mach-ux500/board-mop500-audio.c
+++ b/arch/arm/mach-ux500/board-mop500-audio.c
@@ -17,7 +17,6 @@
 #include "ste-dma40-db8500.h"
 #include "board-mop500.h"
 #include "devices-db8500.h"
-#include "pins-db8500.h"
 
 static struct stedma40_chan_cfg msp0_dma_rx = {
 	.high_priority = true,
diff --git a/arch/arm/mach-ux500/board-mop500-pins.c b/arch/arm/mach-ux500/board-mop500-pins.c
index 7936d40..0efb156 100644
--- a/arch/arm/mach-ux500/board-mop500-pins.c
+++ b/arch/arm/mach-ux500/board-mop500-pins.c
@@ -14,7 +14,6 @@
 
 #include <asm/mach-types.h>
 
-#include "pins-db8500.h"
 #include "board-mop500.h"
 
 enum custom_pin_cfg_t {
diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c
index 4e7ab3a..ad0806e 100644
--- a/arch/arm/mach-ux500/board-mop500.c
+++ b/arch/arm/mach-ux500/board-mop500.c
@@ -324,21 +324,19 @@
        .clock_mode     = LP55XX_CLOCK_EXT,
 };
 
+/* I2C0 devices only available on the first HREF/MOP500 */
 static struct i2c_board_info __initdata mop500_i2c0_devices[] = {
 	{
 		I2C_BOARD_INFO("tc3589x", 0x42),
 		.irq		= NOMADIK_GPIO_TO_IRQ(217),
 		.platform_data  = &mop500_tc35892_data,
 	},
-	/* I2C0 devices only available prior to HREFv60 */
 	{
 		I2C_BOARD_INFO("tps61052", 0x33),
 		.platform_data  = &mop500_tps61052_data,
 	},
 };
 
-#define NUM_PRE_V60_I2C0_DEVICES 1
-
 static struct i2c_board_info __initdata mop500_i2c2_devices[] = {
 	{
 		/* lp5521 LED driver, 1st device */
@@ -356,6 +354,17 @@
 	},
 };
 
+static int __init mop500_i2c_board_init(void)
+{
+	if (machine_is_u8500())
+		mop500_uib_i2c_add(0, mop500_i2c0_devices,
+				   ARRAY_SIZE(mop500_i2c0_devices));
+	mop500_uib_i2c_add(2, mop500_i2c2_devices,
+			   ARRAY_SIZE(mop500_i2c2_devices));
+	return 0;
+}
+device_initcall(mop500_i2c_board_init);
+
 static void __init mop500_i2c_init(struct device *parent)
 {
 	db8500_add_i2c0(parent, NULL);
@@ -564,7 +573,6 @@
 static void __init mop500_init_machine(void)
 {
 	struct device *parent = NULL;
-	int i2c0_devs;
 	int i;
 
 	platform_device_register(&db8500_prcmu_device);
@@ -587,19 +595,13 @@
 	mop500_spi_init(parent);
 	mop500_audio_init(parent);
 	mop500_uart_init(parent);
-
 	u8500_cryp1_hash1_init(parent);
 
-	i2c0_devs = ARRAY_SIZE(mop500_i2c0_devices);
-
-	i2c_register_board_info(0, mop500_i2c0_devices, i2c0_devs);
-	i2c_register_board_info(2, mop500_i2c2_devices,
-				ARRAY_SIZE(mop500_i2c2_devices));
-
 	/* This board has full regulator constraints */
 	regulator_has_full_constraints();
 }
 
+
 static void __init snowball_init_machine(void)
 {
 	struct device *parent = NULL;
@@ -634,7 +636,6 @@
 static void __init hrefv60_init_machine(void)
 {
 	struct device *parent = NULL;
-	int i2c0_devs;
 	int i;
 
 	platform_device_register(&db8500_prcmu_device);
@@ -663,14 +664,6 @@
 	mop500_audio_init(parent);
 	mop500_uart_init(parent);
 
-	i2c0_devs = ARRAY_SIZE(mop500_i2c0_devices);
-
-	i2c0_devs -= NUM_PRE_V60_I2C0_DEVICES;
-
-	i2c_register_board_info(0, mop500_i2c0_devices, i2c0_devs);
-	i2c_register_board_info(2, mop500_i2c2_devices,
-				ARRAY_SIZE(mop500_i2c2_devices));
-
 	/* This board has full regulator constraints */
 	regulator_has_full_constraints();
 }
diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c
index bfaf95d..301c346 100644
--- a/arch/arm/mach-ux500/cpu-db8500.c
+++ b/arch/arm/mach-ux500/cpu-db8500.c
@@ -156,7 +156,8 @@
 		.supports_sleepmode = true,
 	};
 
-	dbx500_add_gpios(parent, ARRAY_AND_SIZE(db8500_gpio_base),
+	dbx500_add_gpios(parent, db8500_gpio_base,
+			 ARRAY_SIZE(db8500_gpio_base),
 			 IRQ_DB8500_GPIO0, &pdata);
 	dbx500_add_pinctrl(parent, "pinctrl-db8500", U8500_PRCMU_BASE);
 }
diff --git a/arch/arm/mach-ux500/pins-db8500.h b/arch/arm/mach-ux500/pins-db8500.h
deleted file mode 100644
index 062c7ac..0000000
--- a/arch/arm/mach-ux500/pins-db8500.h
+++ /dev/null
@@ -1,746 +0,0 @@
-/*
- * Copyright (C) ST-Ericsson SA 2010
- *
- * License terms: GNU General Public License, version 2
- * Author: Rabin Vincent <rabin.vincent@stericsson.com>
- */
-
-#ifndef __MACH_PINS_DB8500_H
-#define __MACH_PINS_DB8500_H
-
-/*
- * TODO: Eventually encode all non-board specific pull up/down configuration
- * here.
- */
-
-#define GPIO0_GPIO		PIN_CFG(0, GPIO)
-#define GPIO0_U0_CTSn		PIN_CFG(0, ALT_A)
-#define GPIO0_TRIG_OUT		PIN_CFG(0, ALT_B)
-#define GPIO0_IP_TDO		PIN_CFG(0, ALT_C)
-
-#define GPIO1_GPIO		PIN_CFG(1, GPIO)
-#define GPIO1_U0_RTSn		PIN_CFG(1, ALT_A)
-#define GPIO1_TRIG_IN		PIN_CFG(1, ALT_B)
-#define GPIO1_IP_TDI		PIN_CFG(1, ALT_C)
-
-#define GPIO2_GPIO		PIN_CFG(2, GPIO)
-#define GPIO2_U0_RXD		PIN_CFG(2, ALT_A)
-#define GPIO2_NONE		PIN_CFG(2, ALT_B)
-#define GPIO2_IP_TMS		PIN_CFG(2, ALT_C)
-
-#define GPIO3_GPIO		PIN_CFG(3, GPIO)
-#define GPIO3_U0_TXD		PIN_CFG(3, ALT_A)
-#define GPIO3_NONE		PIN_CFG(3, ALT_B)
-#define GPIO3_IP_TCK		PIN_CFG(3, ALT_C)
-
-#define GPIO4_GPIO		PIN_CFG(4, GPIO)
-#define GPIO4_U1_RXD		PIN_CFG(4, ALT_A)
-#define GPIO4_I2C4_SCL		PIN_CFG(4, ALT_B)
-#define GPIO4_IP_TRSTn		PIN_CFG(4, ALT_C)
-
-#define GPIO5_GPIO		PIN_CFG(5, GPIO)
-#define GPIO5_U1_TXD		PIN_CFG(5, ALT_A)
-#define GPIO5_I2C4_SDA		PIN_CFG(5, ALT_B)
-#define GPIO5_IP_GPIO6		PIN_CFG(5, ALT_C)
-
-#define GPIO6_GPIO		PIN_CFG(6, GPIO)
-#define GPIO6_U1_CTSn		PIN_CFG(6, ALT_A)
-#define GPIO6_I2C1_SCL		PIN_CFG(6, ALT_B)
-#define GPIO6_IP_GPIO0		PIN_CFG(6, ALT_C)
-
-#define GPIO7_GPIO		PIN_CFG(7, GPIO)
-#define GPIO7_U1_RTSn		PIN_CFG(7, ALT_A)
-#define GPIO7_I2C1_SDA		PIN_CFG(7, ALT_B)
-#define GPIO7_IP_GPIO1		PIN_CFG(7, ALT_C)
-
-#define GPIO8_GPIO		PIN_CFG(8, GPIO)
-#define GPIO8_IPI2C_SDA		PIN_CFG(8, ALT_A)
-#define GPIO8_I2C2_SDA		PIN_CFG(8, ALT_B)
-
-#define GPIO9_GPIO		PIN_CFG(9, GPIO)
-#define GPIO9_IPI2C_SCL		PIN_CFG(9, ALT_A)
-#define GPIO9_I2C2_SCL		PIN_CFG(9, ALT_B)
-
-#define GPIO10_GPIO		PIN_CFG(10, GPIO)
-#define GPIO10_IPI2C_SDA	PIN_CFG(10, ALT_A)
-#define GPIO10_I2C2_SDA		PIN_CFG(10, ALT_B)
-#define GPIO10_IP_GPIO3		PIN_CFG(10, ALT_C)
-
-#define GPIO11_GPIO		PIN_CFG(11, GPIO)
-#define GPIO11_IPI2C_SCL	PIN_CFG(11, ALT_A)
-#define GPIO11_I2C2_SCL		PIN_CFG(11, ALT_B)
-#define GPIO11_IP_GPIO2		PIN_CFG(11, ALT_C)
-
-#define GPIO12_GPIO		PIN_CFG(12, GPIO)
-#define GPIO12_MSP0_TXD		PIN_CFG(12, ALT_A)
-#define GPIO12_MSP0_RXD		PIN_CFG(12, ALT_B)
-
-#define GPIO13_GPIO		PIN_CFG(13, GPIO)
-#define GPIO13_MSP0_TFS		PIN_CFG(13, ALT_A)
-
-#define GPIO14_GPIO		PIN_CFG(14, GPIO)
-#define GPIO14_MSP0_TCK		PIN_CFG(14, ALT_A)
-
-#define GPIO15_GPIO		PIN_CFG(15, GPIO)
-#define GPIO15_MSP0_RXD		PIN_CFG(15, ALT_A)
-#define GPIO15_MSP0_TXD		PIN_CFG(15, ALT_B)
-
-#define GPIO16_GPIO		PIN_CFG(16, GPIO)
-#define GPIO16_MSP0_RFS		PIN_CFG(16, ALT_A)
-#define GPIO16_I2C1_SCL		PIN_CFG(16, ALT_B)
-#define GPIO16_SLIM0_DAT	PIN_CFG(16, ALT_C)
-
-#define GPIO17_GPIO		PIN_CFG(17, GPIO)
-#define GPIO17_MSP0_RCK		PIN_CFG(17, ALT_A)
-#define GPIO17_I2C1_SDA		PIN_CFG(17, ALT_B)
-#define GPIO17_SLIM0_CLK	PIN_CFG(17, ALT_C)
-
-#define GPIO18_GPIO		PIN_CFG(18, GPIO)
-#define GPIO18_MC0_CMDDIR	PIN_CFG_INPUT(18, ALT_A, PULLUP)
-#define GPIO18_U2_RXD		PIN_CFG(18, ALT_B)
-#define GPIO18_MS_IEP		PIN_CFG(18, ALT_C)
-
-#define GPIO19_GPIO		PIN_CFG(19, GPIO)
-#define GPIO19_MC0_DAT0DIR	PIN_CFG_INPUT(19, ALT_A, PULLUP)
-#define GPIO19_U2_TXD		PIN_CFG(19, ALT_B)
-#define GPIO19_MS_DAT0DIR	PIN_CFG(19, ALT_C)
-
-#define GPIO20_GPIO		PIN_CFG(20, GPIO)
-#define GPIO20_MC0_DAT2DIR	PIN_CFG_INPUT(20, ALT_A, PULLUP)
-#define GPIO20_UARTMOD_TXD	PIN_CFG(20, ALT_B)
-#define GPIO20_IP_TRIGOUT	PIN_CFG(20, ALT_C)
-
-#define GPIO21_GPIO		PIN_CFG(21, GPIO)
-#define GPIO21_MC0_DAT31DIR	PIN_CFG_INPUT(21, ALT_A, PULLUP)
-#define GPIO21_MSP0_SCK		PIN_CFG(21, ALT_B)
-#define GPIO21_MS_DAT31DIR	PIN_CFG(21, ALT_C)
-
-#define GPIO22_GPIO		PIN_CFG(22, GPIO)
-#define GPIO22_MC0_FBCLK	PIN_CFG_INPUT(22, ALT_A, PULLUP)
-#define GPIO22_UARTMOD_RXD	PIN_CFG(22, ALT_B)
-#define GPIO22_MS_FBCLK		PIN_CFG(22, ALT_C)
-
-#define GPIO23_GPIO		PIN_CFG(23, GPIO)
-#define GPIO23_MC0_CLK		PIN_CFG_INPUT(23, ALT_A, PULLUP)
-#define GPIO23_STMMOD_CLK	PIN_CFG(23, ALT_B)
-#define GPIO23_MS_CLK		PIN_CFG(23, ALT_C)
-
-#define GPIO24_GPIO		PIN_CFG(24, GPIO)
-#define GPIO24_MC0_CMD		PIN_CFG_INPUT(24, ALT_A, PULLUP)
-#define GPIO24_UARTMOD_RXD	PIN_CFG(24, ALT_B)
-#define GPIO24_MS_BS		PIN_CFG(24, ALT_C)
-
-#define GPIO25_GPIO		PIN_CFG(25, GPIO)
-#define GPIO25_MC0_DAT0		PIN_CFG_INPUT(25, ALT_A, PULLUP)
-#define GPIO25_STMMOD_DAT0	PIN_CFG(25, ALT_B)
-#define GPIO25_MS_DAT0		PIN_CFG(25, ALT_C)
-
-#define GPIO26_GPIO		PIN_CFG(26, GPIO)
-#define GPIO26_MC0_DAT1		PIN_CFG_INPUT(26, ALT_A, PULLUP)
-#define GPIO26_STMMOD_DAT1	PIN_CFG(26, ALT_B)
-#define GPIO26_MS_DAT1		PIN_CFG(26, ALT_C)
-
-#define GPIO27_GPIO		PIN_CFG(27, GPIO)
-#define GPIO27_MC0_DAT2		PIN_CFG_INPUT(27, ALT_A, PULLUP)
-#define GPIO27_STMMOD_DAT2	PIN_CFG(27, ALT_B)
-#define GPIO27_MS_DAT2		PIN_CFG(27, ALT_C)
-
-#define GPIO28_GPIO		PIN_CFG(28, GPIO)
-#define GPIO28_MC0_DAT3		PIN_CFG_INPUT(28, ALT_A, PULLUP)
-#define GPIO28_STMMOD_DAT3	PIN_CFG(28, ALT_B)
-#define GPIO28_MS_DAT3		PIN_CFG(28, ALT_C)
-
-#define GPIO29_GPIO		PIN_CFG(29, GPIO)
-#define GPIO29_MC0_DAT4		PIN_CFG(29, ALT_A)
-#define GPIO29_SPI3_CLK		PIN_CFG(29, ALT_B)
-#define GPIO29_U2_RXD		PIN_CFG(29, ALT_C)
-
-#define GPIO30_GPIO		PIN_CFG(30, GPIO)
-#define GPIO30_MC0_DAT5		PIN_CFG(30, ALT_A)
-#define GPIO30_SPI3_RXD		PIN_CFG(30, ALT_B)
-#define GPIO30_U2_TXD		PIN_CFG(30, ALT_C)
-
-#define GPIO31_GPIO		PIN_CFG(31, GPIO)
-#define GPIO31_MC0_DAT6		PIN_CFG(31, ALT_A)
-#define GPIO31_SPI3_FRM		PIN_CFG(31, ALT_B)
-#define GPIO31_U2_CTSn		PIN_CFG(31, ALT_C)
-
-#define GPIO32_GPIO		PIN_CFG(32, GPIO)
-#define GPIO32_MC0_DAT7		PIN_CFG(32, ALT_A)
-#define GPIO32_SPI3_TXD		PIN_CFG(32, ALT_B)
-#define GPIO32_U2_RTSn		PIN_CFG(32, ALT_C)
-
-#define GPIO33_GPIO		PIN_CFG(33, GPIO)
-#define GPIO33_MSP1_TXD		PIN_CFG(33, ALT_A)
-#define GPIO33_MSP1_RXD		PIN_CFG(33, ALT_B)
-#define GPIO33_U0_DTRn		PIN_CFG(33, ALT_C)
-
-#define GPIO34_GPIO		PIN_CFG(34, GPIO)
-#define GPIO34_MSP1_TFS		PIN_CFG(34, ALT_A)
-#define GPIO34_NONE		PIN_CFG(34, ALT_B)
-#define GPIO34_U0_DCDn		PIN_CFG(34, ALT_C)
-
-#define GPIO35_GPIO		PIN_CFG(35, GPIO)
-#define GPIO35_MSP1_TCK		PIN_CFG(35, ALT_A)
-#define GPIO35_NONE		PIN_CFG(35, ALT_B)
-#define GPIO35_U0_DSRn		PIN_CFG(35, ALT_C)
-
-#define GPIO36_GPIO		PIN_CFG(36, GPIO)
-#define GPIO36_MSP1_RXD		PIN_CFG(36, ALT_A)
-#define GPIO36_MSP1_TXD		PIN_CFG(36, ALT_B)
-#define GPIO36_U0_RIn		PIN_CFG(36, ALT_C)
-
-#define GPIO64_GPIO		PIN_CFG(64, GPIO)
-#define GPIO64_LCDB_DE		PIN_CFG(64, ALT_A)
-#define GPIO64_KP_O1		PIN_CFG(64, ALT_B)
-#define GPIO64_IP_GPIO4		PIN_CFG(64, ALT_C)
-
-#define GPIO65_GPIO		PIN_CFG(65, GPIO)
-#define GPIO65_LCDB_HSO		PIN_CFG(65, ALT_A)
-#define GPIO65_KP_O0		PIN_CFG(65, ALT_B)
-#define GPIO65_IP_GPIO5		PIN_CFG(65, ALT_C)
-
-#define GPIO66_GPIO		PIN_CFG(66, GPIO)
-#define GPIO66_LCDB_VSO		PIN_CFG(66, ALT_A)
-#define GPIO66_KP_I1		PIN_CFG(66, ALT_B)
-#define GPIO66_IP_GPIO6		PIN_CFG(66, ALT_C)
-
-#define GPIO67_GPIO		PIN_CFG(67, GPIO)
-#define GPIO67_LCDB_CLK		PIN_CFG(67, ALT_A)
-#define GPIO67_KP_I0		PIN_CFG(67, ALT_B)
-#define GPIO67_IP_GPIO7		PIN_CFG(67, ALT_C)
-
-#define GPIO68_GPIO		PIN_CFG(68, GPIO)
-#define GPIO68_LCD_VSI0		PIN_CFG(68, ALT_A)
-#define GPIO68_KP_O7		PIN_CFG(68, ALT_B)
-#define GPIO68_SM_CLE		PIN_CFG(68, ALT_C)
-
-#define GPIO69_GPIO		PIN_CFG(69, GPIO)
-#define GPIO69_LCD_VSI1		PIN_CFG(69, ALT_A)
-#define GPIO69_KP_I7		PIN_CFG(69, ALT_B)
-#define GPIO69_SM_ALE		PIN_CFG(69, ALT_C)
-
-#define GPIO70_GPIO		PIN_CFG(70, GPIO)
-#define GPIO70_LCD_D0		PIN_CFG(70, ALT_A)
-#define GPIO70_KP_O5		PIN_CFG(70, ALT_B)
-#define GPIO70_STMAPE_CLK	PIN_CFG(70, ALT_C)
-
-#define GPIO71_GPIO		PIN_CFG(71, GPIO)
-#define GPIO71_LCD_D1		PIN_CFG(71, ALT_A)
-#define GPIO71_KP_O4		PIN_CFG(71, ALT_B)
-#define GPIO71_STMAPE_DAT3	PIN_CFG(71, ALT_C)
-
-#define GPIO72_GPIO		PIN_CFG(72, GPIO)
-#define GPIO72_LCD_D2		PIN_CFG(72, ALT_A)
-#define GPIO72_KP_O3		PIN_CFG(72, ALT_B)
-#define GPIO72_STMAPE_DAT2	PIN_CFG(72, ALT_C)
-
-#define GPIO73_GPIO		PIN_CFG(73, GPIO)
-#define GPIO73_LCD_D3		PIN_CFG(73, ALT_A)
-#define GPIO73_KP_O2		PIN_CFG(73, ALT_B)
-#define GPIO73_STMAPE_DAT1	PIN_CFG(73, ALT_C)
-
-#define GPIO74_GPIO		PIN_CFG(74, GPIO)
-#define GPIO74_LCD_D4		PIN_CFG(74, ALT_A)
-#define GPIO74_KP_I5		PIN_CFG(74, ALT_B)
-#define GPIO74_STMAPE_DAT0	PIN_CFG(74, ALT_C)
-
-#define GPIO75_GPIO		PIN_CFG(75, GPIO)
-#define GPIO75_LCD_D5		PIN_CFG(75, ALT_A)
-#define GPIO75_KP_I4		PIN_CFG(75, ALT_B)
-#define GPIO75_U2_RXD		PIN_CFG(75, ALT_C)
-
-#define GPIO76_GPIO		PIN_CFG(76, GPIO)
-#define GPIO76_LCD_D6		PIN_CFG(76, ALT_A)
-#define GPIO76_KP_I3		PIN_CFG(76, ALT_B)
-#define GPIO76_U2_TXD		PIN_CFG(76, ALT_C)
-
-#define GPIO77_GPIO		PIN_CFG(77, GPIO)
-#define GPIO77_LCD_D7		PIN_CFG(77, ALT_A)
-#define GPIO77_KP_I2		PIN_CFG(77, ALT_B)
-#define GPIO77_NONE		PIN_CFG(77, ALT_C)
-
-#define GPIO78_GPIO		PIN_CFG(78, GPIO)
-#define GPIO78_LCD_D8		PIN_CFG(78, ALT_A)
-#define GPIO78_KP_O6		PIN_CFG(78, ALT_B)
-#define GPIO78_IP_GPIO2		PIN_CFG(78, ALT_C)
-
-#define GPIO79_GPIO		PIN_CFG(79, GPIO)
-#define GPIO79_LCD_D9		PIN_CFG(79, ALT_A)
-#define GPIO79_KP_I6		PIN_CFG(79, ALT_B)
-#define GPIO79_IP_GPIO3		PIN_CFG(79, ALT_C)
-
-#define GPIO80_GPIO		PIN_CFG(80, GPIO)
-#define GPIO80_LCD_D10		PIN_CFG(80, ALT_A)
-#define GPIO80_KP_SKA0		PIN_CFG(80, ALT_B)
-#define GPIO80_IP_GPIO4		PIN_CFG(80, ALT_C)
-
-#define GPIO81_GPIO		PIN_CFG(81, GPIO)
-#define GPIO81_LCD_D11		PIN_CFG(81, ALT_A)
-#define GPIO81_KP_SKB0		PIN_CFG(81, ALT_B)
-#define GPIO81_IP_GPIO5		PIN_CFG(81, ALT_C)
-
-#define GPIO82_GPIO		PIN_CFG(82, GPIO)
-#define GPIO82_LCD_D12		PIN_CFG(82, ALT_A)
-#define GPIO82_KP_O5		PIN_CFG(82, ALT_B)
-
-#define GPIO83_GPIO		PIN_CFG(83, GPIO)
-#define GPIO83_LCD_D13		PIN_CFG(83, ALT_A)
-#define GPIO83_KP_O4		PIN_CFG(83, ALT_B)
-
-#define GPIO84_GPIO		PIN_CFG(84, GPIO)
-#define GPIO84_LCD_D14		PIN_CFG(84, ALT_A)
-#define GPIO84_KP_I5		PIN_CFG(84, ALT_B)
-
-#define GPIO85_GPIO		PIN_CFG(85, GPIO)
-#define GPIO85_LCD_D15		PIN_CFG(85, ALT_A)
-#define GPIO85_KP_I4		PIN_CFG(85, ALT_B)
-
-#define GPIO86_GPIO		PIN_CFG(86, GPIO)
-#define GPIO86_LCD_D16		PIN_CFG(86, ALT_A)
-#define GPIO86_SM_ADQ0		PIN_CFG(86, ALT_B)
-#define GPIO86_MC5_DAT0		PIN_CFG(86, ALT_C)
-
-#define GPIO87_GPIO		PIN_CFG(87, GPIO)
-#define GPIO87_LCD_D17		PIN_CFG(87, ALT_A)
-#define GPIO87_SM_ADQ1		PIN_CFG(87, ALT_B)
-#define GPIO87_MC5_DAT1		PIN_CFG(87, ALT_C)
-
-#define GPIO88_GPIO		PIN_CFG(88, GPIO)
-#define GPIO88_LCD_D18		PIN_CFG(88, ALT_A)
-#define GPIO88_SM_ADQ2		PIN_CFG(88, ALT_B)
-#define GPIO88_MC5_DAT2		PIN_CFG(88, ALT_C)
-
-#define GPIO89_GPIO		PIN_CFG(89, GPIO)
-#define GPIO89_LCD_D19		PIN_CFG(89, ALT_A)
-#define GPIO89_SM_ADQ3		PIN_CFG(89, ALT_B)
-#define GPIO89_MC5_DAT3		PIN_CFG(89, ALT_C)
-
-#define GPIO90_GPIO		PIN_CFG(90, GPIO)
-#define GPIO90_LCD_D20		PIN_CFG(90, ALT_A)
-#define GPIO90_SM_ADQ4		PIN_CFG(90, ALT_B)
-#define GPIO90_MC5_CMD		PIN_CFG(90, ALT_C)
-
-#define GPIO91_GPIO		PIN_CFG(91, GPIO)
-#define GPIO91_LCD_D21		PIN_CFG(91, ALT_A)
-#define GPIO91_SM_ADQ5		PIN_CFG(91, ALT_B)
-#define GPIO91_MC5_FBCLK	PIN_CFG(91, ALT_C)
-
-#define GPIO92_GPIO		PIN_CFG(92, GPIO)
-#define GPIO92_LCD_D22		PIN_CFG(92, ALT_A)
-#define GPIO92_SM_ADQ6		PIN_CFG(92, ALT_B)
-#define GPIO92_MC5_CLK		PIN_CFG(92, ALT_C)
-
-#define GPIO93_GPIO		PIN_CFG(93, GPIO)
-#define GPIO93_LCD_D23		PIN_CFG(93, ALT_A)
-#define GPIO93_SM_ADQ7		PIN_CFG(93, ALT_B)
-#define GPIO93_MC5_DAT4		PIN_CFG(93, ALT_C)
-
-#define GPIO94_GPIO		PIN_CFG(94, GPIO)
-#define GPIO94_KP_O7		PIN_CFG(94, ALT_A)
-#define GPIO94_SM_ADVn		PIN_CFG(94, ALT_B)
-#define GPIO94_MC5_DAT5		PIN_CFG(94, ALT_C)
-
-#define GPIO95_GPIO		PIN_CFG(95, GPIO)
-#define GPIO95_KP_I7		PIN_CFG(95, ALT_A)
-#define GPIO95_SM_CS0n		PIN_CFG(95, ALT_B)
-#define GPIO95_SM_PS0n		PIN_CFG(95, ALT_C)
-
-#define GPIO96_GPIO		PIN_CFG(96, GPIO)
-#define GPIO96_KP_O6		PIN_CFG(96, ALT_A)
-#define GPIO96_SM_OEn		PIN_CFG(96, ALT_B)
-#define GPIO96_MC5_DAT6		PIN_CFG(96, ALT_C)
-
-#define GPIO97_GPIO		PIN_CFG(97, GPIO)
-#define GPIO97_KP_I6		PIN_CFG(97, ALT_A)
-#define GPIO97_SM_WEn		PIN_CFG(97, ALT_B)
-#define GPIO97_MC5_DAT7		PIN_CFG(97, ALT_C)
-
-#define GPIO128_GPIO		PIN_CFG(128, GPIO)
-#define GPIO128_MC2_CLK		PIN_CFG_INPUT(128, ALT_A, PULLUP)
-#define GPIO128_SM_CKO		PIN_CFG(128, ALT_B)
-
-#define GPIO129_GPIO		PIN_CFG(129, GPIO)
-#define GPIO129_MC2_CMD		PIN_CFG_INPUT(129, ALT_A, PULLUP)
-#define GPIO129_SM_WAIT0n	PIN_CFG(129, ALT_B)
-
-#define GPIO130_GPIO		PIN_CFG(130, GPIO)
-#define GPIO130_MC2_FBCLK	PIN_CFG_INPUT(130, ALT_A, PULLUP)
-#define GPIO130_SM_FBCLK	PIN_CFG(130, ALT_B)
-#define GPIO130_MC2_RSTN	PIN_CFG(130, ALT_C)
-
-#define GPIO131_GPIO		PIN_CFG(131, GPIO)
-#define GPIO131_MC2_DAT0	PIN_CFG_INPUT(131, ALT_A, PULLUP)
-#define GPIO131_SM_ADQ8		PIN_CFG(131, ALT_B)
-
-#define GPIO132_GPIO		PIN_CFG(132, GPIO)
-#define GPIO132_MC2_DAT1	PIN_CFG_INPUT(132, ALT_A, PULLUP)
-#define GPIO132_SM_ADQ9		PIN_CFG(132, ALT_B)
-
-#define GPIO133_GPIO		PIN_CFG(133, GPIO)
-#define GPIO133_MC2_DAT2	PIN_CFG_INPUT(133, ALT_A, PULLUP)
-#define GPIO133_SM_ADQ10	PIN_CFG(133, ALT_B)
-
-#define GPIO134_GPIO		PIN_CFG(134, GPIO)
-#define GPIO134_MC2_DAT3	PIN_CFG_INPUT(134, ALT_A, PULLUP)
-#define GPIO134_SM_ADQ11	PIN_CFG(134, ALT_B)
-
-#define GPIO135_GPIO		PIN_CFG(135, GPIO)
-#define GPIO135_MC2_DAT4	PIN_CFG_INPUT(135, ALT_A, PULLUP)
-#define GPIO135_SM_ADQ12	PIN_CFG(135, ALT_B)
-
-#define GPIO136_GPIO		PIN_CFG(136, GPIO)
-#define GPIO136_MC2_DAT5	PIN_CFG_INPUT(136, ALT_A, PULLUP)
-#define GPIO136_SM_ADQ13	PIN_CFG(136, ALT_B)
-
-#define GPIO137_GPIO		PIN_CFG(137, GPIO)
-#define GPIO137_MC2_DAT6	PIN_CFG_INPUT(137, ALT_A, PULLUP)
-#define GPIO137_SM_ADQ14	PIN_CFG(137, ALT_B)
-
-#define GPIO138_GPIO		PIN_CFG(138, GPIO)
-#define GPIO138_MC2_DAT7	PIN_CFG_INPUT(138, ALT_A, PULLUP)
-#define GPIO138_SM_ADQ15	PIN_CFG(138, ALT_B)
-
-#define GPIO139_GPIO		PIN_CFG(139, GPIO)
-#define GPIO139_SSP1_RXD	PIN_CFG(139, ALT_A)
-#define GPIO139_SM_WAIT1n	PIN_CFG(139, ALT_B)
-#define GPIO139_KP_O8		PIN_CFG(139, ALT_C)
-
-#define GPIO140_GPIO		PIN_CFG(140, GPIO)
-#define GPIO140_SSP1_TXD	PIN_CFG(140, ALT_A)
-#define GPIO140_IP_GPIO7	PIN_CFG(140, ALT_B)
-#define GPIO140_KP_SKA1		PIN_CFG(140, ALT_C)
-
-#define GPIO141_GPIO		PIN_CFG(141, GPIO)
-#define GPIO141_SSP1_CLK	PIN_CFG(141, ALT_A)
-#define GPIO141_IP_GPIO2	PIN_CFG(141, ALT_B)
-#define GPIO141_KP_O9		PIN_CFG(141, ALT_C)
-
-#define GPIO142_GPIO		PIN_CFG(142, GPIO)
-#define GPIO142_SSP1_FRM	PIN_CFG(142, ALT_A)
-#define GPIO142_IP_GPIO3	PIN_CFG(142, ALT_B)
-#define GPIO142_KP_SKB1		PIN_CFG(142, ALT_C)
-
-#define GPIO143_GPIO		PIN_CFG(143, GPIO)
-#define GPIO143_SSP0_CLK	PIN_CFG(143, ALT_A)
-
-#define GPIO144_GPIO		PIN_CFG(144, GPIO)
-#define GPIO144_SSP0_FRM	PIN_CFG(144, ALT_A)
-
-#define GPIO145_GPIO		PIN_CFG(145, GPIO)
-#define GPIO145_SSP0_RXD	PIN_CFG(145, ALT_A)
-
-#define GPIO146_GPIO		PIN_CFG(146, GPIO)
-#define GPIO146_SSP0_TXD	PIN_CFG(146, ALT_A)
-
-#define GPIO147_GPIO		PIN_CFG(147, GPIO)
-#define GPIO147_I2C0_SCL	PIN_CFG(147, ALT_A)
-
-#define GPIO148_GPIO		PIN_CFG(148, GPIO)
-#define GPIO148_I2C0_SDA	PIN_CFG(148, ALT_A)
-
-#define GPIO149_GPIO		PIN_CFG(149, GPIO)
-#define GPIO149_IP_GPIO0	PIN_CFG(149, ALT_A)
-#define GPIO149_SM_CS1n		PIN_CFG(149, ALT_B)
-#define GPIO149_SM_PS1n		PIN_CFG(149, ALT_C)
-
-#define GPIO150_GPIO		PIN_CFG(150, GPIO)
-#define GPIO150_IP_GPIO1	PIN_CFG(150, ALT_A)
-#define GPIO150_LCDA_CLK	PIN_CFG(150, ALT_B)
-
-#define GPIO151_GPIO		PIN_CFG(151, GPIO)
-#define GPIO151_KP_SKA0		PIN_CFG(151, ALT_A)
-#define GPIO151_LCD_VSI0	PIN_CFG(151, ALT_B)
-#define GPIO151_KP_O8		PIN_CFG(151, ALT_C)
-
-#define GPIO152_GPIO		PIN_CFG(152, GPIO)
-#define GPIO152_KP_SKB0		PIN_CFG(152, ALT_A)
-#define GPIO152_LCD_VSI1	PIN_CFG(152, ALT_B)
-#define GPIO152_KP_O9		PIN_CFG(152, ALT_C)
-
-#define GPIO153_GPIO		PIN_CFG(153, GPIO)
-#define GPIO153_KP_I7		PIN_CFG(153, ALT_A)
-#define GPIO153_LCD_D24		PIN_CFG(153, ALT_B)
-#define GPIO153_U2_RXD		PIN_CFG(153, ALT_C)
-
-#define GPIO154_GPIO		PIN_CFG(154, GPIO)
-#define GPIO154_KP_I6		PIN_CFG(154, ALT_A)
-#define GPIO154_LCD_D25		PIN_CFG(154, ALT_B)
-#define GPIO154_U2_TXD		PIN_CFG(154, ALT_C)
-
-#define GPIO155_GPIO		PIN_CFG(155, GPIO)
-#define GPIO155_KP_I5		PIN_CFG(155, ALT_A)
-#define GPIO155_LCD_D26		PIN_CFG(155, ALT_B)
-#define GPIO155_STMAPE_CLK	PIN_CFG(155, ALT_C)
-
-#define GPIO156_GPIO		PIN_CFG(156, GPIO)
-#define GPIO156_KP_I4		PIN_CFG(156, ALT_A)
-#define GPIO156_LCD_D27		PIN_CFG(156, ALT_B)
-#define GPIO156_STMAPE_DAT3	PIN_CFG(156, ALT_C)
-
-#define GPIO157_GPIO		PIN_CFG(157, GPIO)
-#define GPIO157_KP_O7		PIN_CFG(157, ALT_A)
-#define GPIO157_LCD_D28		PIN_CFG(157, ALT_B)
-#define GPIO157_STMAPE_DAT2	PIN_CFG(157, ALT_C)
-
-#define GPIO158_GPIO		PIN_CFG(158, GPIO)
-#define GPIO158_KP_O6		PIN_CFG(158, ALT_A)
-#define GPIO158_LCD_D29		PIN_CFG(158, ALT_B)
-#define GPIO158_STMAPE_DAT1	PIN_CFG(158, ALT_C)
-
-#define GPIO159_GPIO		PIN_CFG(159, GPIO)
-#define GPIO159_KP_O5		PIN_CFG(159, ALT_A)
-#define GPIO159_LCD_D30		PIN_CFG(159, ALT_B)
-#define GPIO159_STMAPE_DAT0	PIN_CFG(159, ALT_C)
-
-#define GPIO160_GPIO		PIN_CFG(160, GPIO)
-#define GPIO160_KP_O4		PIN_CFG(160, ALT_A)
-#define GPIO160_LCD_D31		PIN_CFG(160, ALT_B)
-#define GPIO160_NONE		PIN_CFG(160, ALT_C)
-
-#define GPIO161_GPIO		PIN_CFG(161, GPIO)
-#define GPIO161_KP_I3		PIN_CFG(161, ALT_A)
-#define GPIO161_LCD_D32		PIN_CFG(161, ALT_B)
-#define GPIO161_UARTMOD_RXD	PIN_CFG(161, ALT_C)
-
-#define GPIO162_GPIO		PIN_CFG(162, GPIO)
-#define GPIO162_KP_I2		PIN_CFG(162, ALT_A)
-#define GPIO162_LCD_D33		PIN_CFG(162, ALT_B)
-#define GPIO162_UARTMOD_TXD	PIN_CFG(162, ALT_C)
-
-#define GPIO163_GPIO		PIN_CFG(163, GPIO)
-#define GPIO163_KP_I1		PIN_CFG(163, ALT_A)
-#define GPIO163_LCD_D34		PIN_CFG(163, ALT_B)
-#define GPIO163_STMMOD_CLK	PIN_CFG(163, ALT_C)
-
-#define GPIO164_GPIO		PIN_CFG(164, GPIO)
-#define GPIO164_KP_I0		PIN_CFG(164, ALT_A)
-#define GPIO164_LCD_D35		PIN_CFG(164, ALT_B)
-#define GPIO164_STMMOD_DAT3	PIN_CFG(164, ALT_C)
-
-#define GPIO165_GPIO		PIN_CFG(165, GPIO)
-#define GPIO165_KP_O3		PIN_CFG(165, ALT_A)
-#define GPIO165_LCD_D36		PIN_CFG(165, ALT_B)
-#define GPIO165_STMMOD_DAT2	PIN_CFG(165, ALT_C)
-
-#define GPIO166_GPIO		PIN_CFG(166, GPIO)
-#define GPIO166_KP_O2		PIN_CFG(166, ALT_A)
-#define GPIO166_LCD_D37		PIN_CFG(166, ALT_B)
-#define GPIO166_STMMOD_DAT1	PIN_CFG(166, ALT_C)
-
-#define GPIO167_GPIO		PIN_CFG(167, GPIO)
-#define GPIO167_KP_O1		PIN_CFG(167, ALT_A)
-#define GPIO167_LCD_D38		PIN_CFG(167, ALT_B)
-#define GPIO167_STMMOD_DAT0	PIN_CFG(167, ALT_C)
-
-#define GPIO168_GPIO		PIN_CFG(168, GPIO)
-#define GPIO168_KP_O0		PIN_CFG(168, ALT_A)
-#define GPIO168_LCD_D39		PIN_CFG(168, ALT_B)
-#define GPIO168_NONE		PIN_CFG(168, ALT_C)
-
-#define GPIO169_GPIO		PIN_CFG(169, GPIO)
-#define GPIO169_RF_PURn		PIN_CFG(169, ALT_A)
-#define GPIO169_LCDA_DE		PIN_CFG(169, ALT_B)
-#define GPIO169_USBSIM_PDC	PIN_CFG(169, ALT_C)
-
-#define GPIO170_GPIO		PIN_CFG(170, GPIO)
-#define GPIO170_MODEM_STATE	PIN_CFG(170, ALT_A)
-#define GPIO170_LCDA_VSO	PIN_CFG(170, ALT_B)
-#define GPIO170_KP_SKA1		PIN_CFG(170, ALT_C)
-
-#define GPIO171_GPIO		PIN_CFG(171, GPIO)
-#define GPIO171_MODEM_PWREN	PIN_CFG(171, ALT_A)
-#define GPIO171_LCDA_HSO	PIN_CFG(171, ALT_B)
-#define GPIO171_KP_SKB1		PIN_CFG(171, ALT_C)
-
-#define GPIO192_GPIO		PIN_CFG(192, GPIO)
-#define GPIO192_MSP2_SCK	PIN_CFG(192, ALT_A)
-
-#define GPIO193_GPIO		PIN_CFG(193, GPIO)
-#define GPIO193_MSP2_TXD	PIN_CFG(193, ALT_A)
-
-#define GPIO194_GPIO		PIN_CFG(194, GPIO)
-#define GPIO194_MSP2_TCK	PIN_CFG(194, ALT_A)
-
-#define GPIO195_GPIO		PIN_CFG(195, GPIO)
-#define GPIO195_MSP2_TFS	PIN_CFG(195, ALT_A)
-
-#define GPIO196_GPIO		PIN_CFG(196, GPIO)
-#define GPIO196_MSP2_RXD	PIN_CFG(196, ALT_A)
-
-#define GPIO197_GPIO		PIN_CFG(197, GPIO)
-#define GPIO197_MC4_DAT3	PIN_CFG_INPUT(197, ALT_A, PULLUP)
-
-#define GPIO198_GPIO		PIN_CFG(198, GPIO)
-#define GPIO198_MC4_DAT2	PIN_CFG_INPUT(198, ALT_A, PULLUP)
-
-#define GPIO199_GPIO		PIN_CFG(199, GPIO)
-#define GPIO199_MC4_DAT1	PIN_CFG_INPUT(199, ALT_A, PULLUP)
-
-#define GPIO200_GPIO		PIN_CFG(200, GPIO)
-#define GPIO200_MC4_DAT0	PIN_CFG_INPUT(200, ALT_A, PULLUP)
-
-#define GPIO201_GPIO		PIN_CFG(201, GPIO)
-#define GPIO201_MC4_CMD		PIN_CFG_INPUT(201, ALT_A, PULLUP)
-
-#define GPIO202_GPIO		PIN_CFG(202, GPIO)
-#define GPIO202_MC4_FBCLK	PIN_CFG_INPUT(202, ALT_A, PULLUP)
-#define GPIO202_PWL		PIN_CFG(202, ALT_B)
-#define GPIO202_MC4_RSTN	PIN_CFG(202, ALT_C)
-
-#define GPIO203_GPIO		PIN_CFG(203, GPIO)
-#define GPIO203_MC4_CLK		PIN_CFG_INPUT(203, ALT_A, PULLUP)
-
-#define GPIO204_GPIO		PIN_CFG(204, GPIO)
-#define GPIO204_MC4_DAT7	PIN_CFG_INPUT(204, ALT_A, PULLUP)
-
-#define GPIO205_GPIO		PIN_CFG(205, GPIO)
-#define GPIO205_MC4_DAT6	PIN_CFG_INPUT(205, ALT_A, PULLUP)
-
-#define GPIO206_GPIO		PIN_CFG(206, GPIO)
-#define GPIO206_MC4_DAT5	PIN_CFG_INPUT(206, ALT_A, PULLUP)
-
-#define GPIO207_GPIO		PIN_CFG(207, GPIO)
-#define GPIO207_MC4_DAT4	PIN_CFG_INPUT(207, ALT_A, PULLUP)
-
-#define GPIO208_GPIO		PIN_CFG(208, GPIO)
-#define GPIO208_MC1_CLK		PIN_CFG(208, ALT_A)
-
-#define GPIO209_GPIO		PIN_CFG(209, GPIO)
-#define GPIO209_MC1_FBCLK	PIN_CFG(209, ALT_A)
-#define GPIO209_SPI1_CLK	PIN_CFG(209, ALT_B)
-
-#define GPIO210_GPIO		PIN_CFG(210, GPIO)
-#define GPIO210_MC1_CMD		PIN_CFG(210, ALT_A)
-
-#define GPIO211_GPIO		PIN_CFG(211, GPIO)
-#define GPIO211_MC1_DAT0	PIN_CFG(211, ALT_A)
-
-#define GPIO212_GPIO		PIN_CFG(212, GPIO)
-#define GPIO212_MC1_DAT1	PIN_CFG(212, ALT_A)
-#define GPIO212_SPI1_FRM	PIN_CFG(212, ALT_B)
-
-#define GPIO213_GPIO		PIN_CFG(213, GPIO)
-#define GPIO213_MC1_DAT2	PIN_CFG(213, ALT_A)
-#define GPIO213_SPI1_TXD	PIN_CFG(213, ALT_B)
-
-#define GPIO214_GPIO		PIN_CFG(214, GPIO)
-#define GPIO214_MC1_DAT3	PIN_CFG(214, ALT_A)
-#define GPIO214_SPI1_RXD	PIN_CFG(214, ALT_B)
-
-#define GPIO215_GPIO		PIN_CFG(215, GPIO)
-#define GPIO215_MC1_CMDDIR	PIN_CFG(215, ALT_A)
-#define GPIO215_MC3_DAT2DIR	PIN_CFG(215, ALT_B)
-#define GPIO215_CLKOUT1		PIN_CFG(215, ALT_C)
-#define GPIO215_SPI2_TXD	PIN_CFG(215, ALT_C)
-
-#define GPIO216_GPIO		PIN_CFG(216, GPIO)
-#define GPIO216_MC1_DAT2DIR	PIN_CFG(216, ALT_A)
-#define GPIO216_MC3_CMDDIR	PIN_CFG(216, ALT_B)
-#define GPIO216_I2C3_SDA	PIN_CFG(216, ALT_C)
-#define GPIO216_SPI2_FRM	PIN_CFG(216, ALT_C)
-
-#define GPIO217_GPIO		PIN_CFG(217, GPIO)
-#define GPIO217_MC1_DAT0DIR	PIN_CFG(217, ALT_A)
-#define GPIO217_MC3_DAT31DIR	PIN_CFG(217, ALT_B)
-#define GPIO217_CLKOUT2		PIN_CFG(217, ALT_C)
-#define GPIO217_SPI2_CLK	PIN_CFG(217, ALT_C)
-
-#define GPIO218_GPIO		PIN_CFG(218, GPIO)
-#define GPIO218_MC1_DAT31DIR	PIN_CFG(218, ALT_A)
-#define GPIO218_MC3_DAT0DIR	PIN_CFG(218, ALT_B)
-#define GPIO218_I2C3_SCL	PIN_CFG(218, ALT_C)
-#define GPIO218_SPI2_RXD	PIN_CFG(218, ALT_C)
-
-#define GPIO219_GPIO		PIN_CFG(219, GPIO)
-#define GPIO219_HSIR_FLA0	PIN_CFG(219, ALT_A)
-#define GPIO219_MC3_CLK		PIN_CFG(219, ALT_B)
-
-#define GPIO220_GPIO		PIN_CFG(220, GPIO)
-#define GPIO220_HSIR_DAT0	PIN_CFG(220, ALT_A)
-#define GPIO220_MC3_FBCLK	PIN_CFG(220, ALT_B)
-#define GPIO220_SPI0_CLK	PIN_CFG(220, ALT_C)
-
-#define GPIO221_GPIO		PIN_CFG(221, GPIO)
-#define GPIO221_HSIR_RDY0	PIN_CFG(221, ALT_A)
-#define GPIO221_MC3_CMD		PIN_CFG(221, ALT_B)
-
-#define GPIO222_GPIO		PIN_CFG(222, GPIO)
-#define GPIO222_HSIT_FLA0	PIN_CFG(222, ALT_A)
-#define GPIO222_MC3_DAT0	PIN_CFG(222, ALT_B)
-
-#define GPIO223_GPIO		PIN_CFG(223, GPIO)
-#define GPIO223_HSIT_DAT0	PIN_CFG(223, ALT_A)
-#define GPIO223_MC3_DAT1	PIN_CFG(223, ALT_B)
-#define GPIO223_SPI0_FRM	PIN_CFG(223, ALT_C)
-
-#define GPIO224_GPIO		PIN_CFG(224, GPIO)
-#define GPIO224_HSIT_RDY0	PIN_CFG(224, ALT_A)
-#define GPIO224_MC3_DAT2	PIN_CFG(224, ALT_B)
-#define GPIO224_SPI0_TXD	PIN_CFG(224, ALT_C)
-
-#define GPIO225_GPIO		PIN_CFG(225, GPIO)
-#define GPIO225_HSIT_CAWAKE0	PIN_CFG(225, ALT_A)
-#define GPIO225_MC3_DAT3	PIN_CFG(225, ALT_B)
-#define GPIO225_SPI0_RXD	PIN_CFG(225, ALT_C)
-
-#define GPIO226_GPIO		PIN_CFG(226, GPIO)
-#define GPIO226_HSIT_ACWAKE0	PIN_CFG(226, ALT_A)
-#define GPIO226_PWL		PIN_CFG(226, ALT_B)
-#define GPIO226_USBSIM_PDC	PIN_CFG(226, ALT_C)
-
-#define GPIO227_GPIO		PIN_CFG(227, GPIO)
-#define GPIO227_CLKOUT1		PIN_CFG(227, ALT_A)
-
-#define GPIO228_GPIO		PIN_CFG(228, GPIO)
-#define GPIO228_CLKOUT2		PIN_CFG(228, ALT_A)
-
-#define GPIO229_GPIO		PIN_CFG(229, GPIO)
-#define GPIO229_CLKOUT1		PIN_CFG(229, ALT_A)
-#define GPIO229_PWL		PIN_CFG(229, ALT_B)
-#define GPIO229_I2C3_SDA	PIN_CFG(229, ALT_C)
-
-#define GPIO230_GPIO		PIN_CFG(230, GPIO)
-#define GPIO230_CLKOUT2		PIN_CFG(230, ALT_A)
-#define GPIO230_PWL		PIN_CFG(230, ALT_B)
-#define GPIO230_I2C3_SCL	PIN_CFG(230, ALT_C)
-
-#define GPIO256_GPIO		PIN_CFG(256, GPIO)
-#define GPIO256_USB_NXT		PIN_CFG(256, ALT_A)
-
-#define GPIO257_GPIO		PIN_CFG(257, GPIO)
-#define GPIO257_USB_STP		PIN_CFG(257, ALT_A)
-
-#define GPIO258_GPIO		PIN_CFG(258, GPIO)
-#define GPIO258_USB_XCLK	PIN_CFG(258, ALT_A)
-#define GPIO258_NONE		PIN_CFG(258, ALT_B)
-#define GPIO258_DDR_TRIG	PIN_CFG(258, ALT_C)
-
-#define GPIO259_GPIO		PIN_CFG(259, GPIO)
-#define GPIO259_USB_DIR		PIN_CFG(259, ALT_A)
-
-#define GPIO260_GPIO		PIN_CFG(260, GPIO)
-#define GPIO260_USB_DAT7	PIN_CFG(260, ALT_A)
-
-#define GPIO261_GPIO		PIN_CFG(261, GPIO)
-#define GPIO261_USB_DAT6	PIN_CFG(261, ALT_A)
-
-#define GPIO262_GPIO		PIN_CFG(262, GPIO)
-#define GPIO262_USB_DAT5	PIN_CFG(262, ALT_A)
-
-#define GPIO263_GPIO		PIN_CFG(263, GPIO)
-#define GPIO263_USB_DAT4	PIN_CFG(263, ALT_A)
-
-#define GPIO264_GPIO		PIN_CFG(264, GPIO)
-#define GPIO264_USB_DAT3	PIN_CFG(264, ALT_A)
-
-#define GPIO265_GPIO		PIN_CFG(265, GPIO)
-#define GPIO265_USB_DAT2	PIN_CFG(265, ALT_A)
-
-#define GPIO266_GPIO		PIN_CFG(266, GPIO)
-#define GPIO266_USB_DAT1	PIN_CFG(266, ALT_A)
-
-#define GPIO267_GPIO		PIN_CFG(267, GPIO)
-#define GPIO267_USB_DAT0	PIN_CFG(267, ALT_A)
-
-#endif
diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c
index 2b7c93a..7aeb5d6 100644
--- a/arch/arm/mach-vexpress/tc2_pm.c
+++ b/arch/arm/mach-vexpress/tc2_pm.c
@@ -18,6 +18,7 @@
 #include <linux/of_address.h>
 #include <linux/spinlock.h>
 #include <linux/errno.h>
+#include <linux/irqchip/arm-gic.h>
 
 #include <asm/mcpm.h>
 #include <asm/proc-fns.h>
@@ -230,6 +231,7 @@
 	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
 	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
 	ve_spc_set_resume_addr(cluster, cpu, virt_to_phys(mcpm_entry_point));
+	gic_cpu_if_down();
 	tc2_pm_down(residency);
 }
 
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 2958e74..febaee7 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -17,6 +17,7 @@
 #include <linux/nodemask.h>
 #include <linux/initrd.h>
 #include <linux/of_fdt.h>
+#include <linux/of_reserved_mem.h>
 #include <linux/highmem.h>
 #include <linux/gfp.h>
 #include <linux/memblock.h>
@@ -77,7 +78,7 @@
 __tagtable(ATAG_INITRD2, parse_tag_initrd2);
 
 #ifdef CONFIG_OF_FLATTREE
-void __init early_init_dt_setup_initrd_arch(unsigned long start, unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
 {
 	phys_initrd_start = start;
 	phys_initrd_size = end - start;
@@ -207,7 +208,7 @@
 
 #ifdef CONFIG_ZONE_DMA
 
-unsigned long arm_dma_zone_size __read_mostly;
+phys_addr_t arm_dma_zone_size __read_mostly;
 EXPORT_SYMBOL(arm_dma_zone_size);
 
 /*
@@ -378,6 +379,8 @@
 	if (mdesc->reserve)
 		mdesc->reserve();
 
+	early_init_dt_scan_reserved_mem();
+
 	/*
 	 * reserve memory for DMA contigouos allocations,
 	 * must come from DMA area inside low memory
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index bca4c1c..12ad8f3 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -190,11 +190,6 @@
 	memblock_add(base, size);
 }
 
-void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
-{
-	return __va(memblock_alloc(size, align));
-}
-
 /*
  * Limit the memory size that was specified via FDT.
  */
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 67e8d7c..de2de5d 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -44,8 +44,7 @@
 
 phys_addr_t memstart_addr __read_mostly = 0;
 
-void __init early_init_dt_setup_initrd_arch(unsigned long start,
-					    unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
 {
 	phys_initrd_start = start;
 	phys_initrd_size = end - start;
diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index 7f8759a..a68f3cf 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -1983,6 +1983,9 @@
 				ARRAY_SIZE(smc_cs3_resource)))
 		goto fail;
 
+	/* For at32ap7000, we use the reset workaround for nand driver */
+	data->need_reset_workaround = true;
+
 	if (platform_device_add_data(pdev, data,
 				sizeof(struct atmel_nand_data)))
 		goto fail;
diff --git a/arch/c6x/kernel/devicetree.c b/arch/c6x/kernel/devicetree.c
index bdb56f0..9e15ab9 100644
--- a/arch/c6x/kernel/devicetree.c
+++ b/arch/c6x/kernel/devicetree.c
@@ -33,8 +33,7 @@
 
 
 #ifdef CONFIG_BLK_DEV_INITRD
-void __init early_init_dt_setup_initrd_arch(unsigned long start,
-		unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
 {
 	initrd_start = (unsigned long)__va(start);
 	initrd_end = (unsigned long)__va(end);
@@ -46,8 +45,3 @@
 {
 	c6x_add_memory(base, size);
 }
-
-void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
-{
-	return __va(memblock_alloc(size, align));
-}
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 3201ddb..c699d32 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -99,9 +99,6 @@
 	help
 	  Enable module allocation with kmalloc instead of vmalloc.
 
-config OOM_REBOOT
-       bool "Enable reboot at out of memory"
-
 source "kernel/Kconfig.preempt"
 
 source mm/Kconfig
@@ -175,12 +172,6 @@
 	help
 	  Width in bytes of the NOR Flash bus (1, 2 or 4). Is usually 2.
 
-config ETRAX_NANDFLASH_BUSWIDTH
-	int "Buswidth of NAND flash in bytes"
-	default "1"
-	help
-	  Width in bytes of the NAND flash (1 or 2).
-
 config ETRAX_FLASH1_SIZE
        int "FLASH1 size (dec, in MB. 0 = Unknown)"
        default "0"
@@ -272,38 +263,6 @@
 	  This option enables MTD mapping of flash devices.  Needed to use
 	  flash memories.  If unsure, say Y.
 
-config ETRAX_RTC
-	bool "Real Time Clock support"
-	depends on ETRAX_I2C
-	help
-	  Enables drivers for the Real-Time Clock battery-backed chips on
-	  some products. The kernel reads the time when booting, and
-	  the date can be set using ioctl(fd, RTC_SET_TIME, &rt) with rt a
-	  rtc_time struct (see <file:arch/cris/include/asm/rtc.h>) on the
-	  /dev/rtc device.  You can check the time with cat /proc/rtc, but
-	  normal time reading should be done using libc function time and
-	  friends.
-
-choice
-	prompt "RTC chip"
-	depends on ETRAX_RTC
-	default ETRAX_DS1302
-
-config ETRAX_DS1302
-	depends on ETRAX_ARCH_V10
-	bool "DS1302"
-	help
-	  Enables the driver for the DS1302 Real-Time Clock battery-backed
-	  chip on some products.
-
-config ETRAX_PCF8563
-	bool "PCF8563"
-	help
-	  Enables the driver for the PCF8563 Real-Time Clock battery-backed
-	  chip on some products.
-
-endchoice
-
 config ETRAX_SYNCHRONOUS_SERIAL
 	bool "Synchronous serial-port support"
 	help
@@ -578,26 +537,6 @@
 	depends on ETRAX_ARCH_V10
 	bool "DMA 5"
 
-config ETRAX_SERIAL_PORT3_DMA9_IN
-	bool "Ser3 uses DMA9 for input"
-	depends on ETRAXFS
-	help
-	  Enables the DMA9 input channel for ser3 (ttyS3).
-	  If you do not enable DMA, an interrupt for each character will be
-	  used when receiving data.
-	  Normally you want to use DMA, unless you use the DMA channel for
-	  something else.
-
-config ETRAX_SERIAL_PORT3_DMA3_IN
-	bool "Ser3 uses DMA3 for input"
-	depends on CRIS_MACH_ARTPEC3
-	help
-	  Enables the DMA3 input channel for ser3 (ttyS3).
-	  If you do not enable DMA, an interrupt for each character will be
-	  used when receiving data.
-	  Normally you want to use DMA, unless you use the DMA channel for
-	  something else.
-
 endchoice
 
 choice
@@ -615,26 +554,6 @@
 	depends on ETRAX_ARCH_V10
 	bool "DMA 4"
 
-config ETRAX_SERIAL_PORT3_DMA8_OUT
-	bool "Ser3 uses DMA8 for output"
-	depends on ETRAXFS
-	help
-	  Enables the DMA8 output channel for ser3 (ttyS3).
-	  If you do not enable DMA, an interrupt for each character will be
-	  used when transmitting data.
-	  Normally you want to use DMA, unless you use the DMA channel for
-	  something else.
-
-config ETRAX_SERIAL_PORT3_DMA2_OUT
-	bool "Ser3 uses DMA2 for output"
-	depends on CRIS_MACH_ARTPEC3
-	help
-	  Enables the DMA2 output channel for ser3 (ttyS3).
-	  If you do not enable DMA, an interrupt for each character will be
-	  used when transmitting data.
-	  Normally you want to use DMA, unless you use the DMA channel for
-	  something else.
-
 endchoice
 
 endmenu
diff --git a/arch/cris/arch-v10/drivers/Kconfig b/arch/cris/arch-v10/drivers/Kconfig
index daf5f19..239dab0 100644
--- a/arch/cris/arch-v10/drivers/Kconfig
+++ b/arch/cris/arch-v10/drivers/Kconfig
@@ -417,16 +417,6 @@
 	   for CTRL and BULK traffic only, INTR traffic may work as well
 	   however (depending on the requirements of timeliness).
 
-config ETRAX_USB_HOST_PORT1
-	bool "USB port 1 enabled"
-	depends on ETRAX_USB_HOST
-	default n
-
-config ETRAX_USB_HOST_PORT2
-	bool "USB port 2 enabled"
-	depends on ETRAX_USB_HOST
-	default n
-
 config ETRAX_PTABLE_SECTOR
 	int "Byte-offset of partition table sector"
 	depends on ETRAX_AXISFLASHMAP
@@ -527,19 +517,6 @@
 	  Remember that you need to setup the port directions appropriately in
 	  the General configuration.
 
-config ETRAX_PA_BUTTON_BITMASK
-	hex "PA-buttons bitmask"
-	depends on ETRAX_GPIO
-	default "02"
-	help
-	  This is a bitmask with information about what bits on PA that
-	  are used for buttons.
-	  Most products has a so called TEST button on PA1, if that's true
-	  use 02 here.
-	  Use 00 if there are no buttons on PA.
-	  If the bitmask is <> 00 a button driver will be included in the gpio
-	  driver. ETRAX general I/O support must be enabled.
-
 config ETRAX_PA_CHANGEABLE_DIR
 	hex "PA user changeable dir mask"
 	depends on ETRAX_GPIO
@@ -580,51 +557,4 @@
 	  Bit set = changeable.
 	  You probably want 00 here.
 
-config ETRAX_DS1302_RST_ON_GENERIC_PORT
-	bool "DS1302 RST on Generic Port"
-	depends on ETRAX_DS1302
-	help
-	  If your product has the RST signal line for the DS1302 RTC on the
-	  Generic Port then say Y here, otherwise leave it as N in which
-	  case the RST signal line is assumed to be connected to Port PB
-	  (just like the SCL and SDA lines).
-
-config ETRAX_DS1302_RSTBIT
-	int "DS1302 RST bit number"
-	depends on ETRAX_DS1302
-	default "2"
-	help
-	  This is the bit number for the RST signal line of the DS1302 RTC on
-	  the selected port. If you have selected the generic port then it
-	  should be bit 27, otherwise your best bet is bit 5.
-
-config ETRAX_DS1302_SCLBIT
-	int "DS1302 SCL bit number"
-	depends on ETRAX_DS1302
-	default "1"
-	help
-	  This is the bit number for the SCL signal line of the DS1302 RTC on
-	  Port PB. This is probably best left at 3.
-
-config ETRAX_DS1302_SDABIT
-	int "DS1302 SDA bit number"
-	depends on ETRAX_DS1302
-	default "0"
-	help
-	  This is the bit number for the SDA signal line of the DS1302 RTC on
-	  Port PB. This is probably best left at 2.
-
-config ETRAX_DS1302_TRICKLE_CHARGE
-	int "DS1302 Trickle charger value"
-	depends on ETRAX_DS1302
-	default "0"
-	help
-	  This controls the initial value of the trickle charge register.
-	  0 = disabled (use this if you are unsure or have a non rechargeable battery)
-	  Otherwise the following values can be OR:ed together to control the
-	  charge current:
-	  1 = 2kohm, 2 = 4kohm, 3 = 4kohm
-	  4 = 1 diode, 8 = 2 diodes
-	  Allowed values are (increasing current): 0, 11, 10, 9, 7, 6, 5
-
 endif
diff --git a/arch/cris/arch-v10/drivers/Makefile b/arch/cris/arch-v10/drivers/Makefile
index 44bf2e8..e5c1318 100644
--- a/arch/cris/arch-v10/drivers/Makefile
+++ b/arch/cris/arch-v10/drivers/Makefile
@@ -6,7 +6,5 @@
 obj-$(CONFIG_ETRAX_I2C)			+= i2c.o
 obj-$(CONFIG_ETRAX_I2C_EEPROM)		+= eeprom.o
 obj-$(CONFIG_ETRAX_GPIO)		+= gpio.o
-obj-$(CONFIG_ETRAX_DS1302)		+= ds1302.o
-obj-$(CONFIG_ETRAX_PCF8563)		+= pcf8563.o
 obj-$(CONFIG_ETRAX_SYNCHRONOUS_SERIAL)	+= sync_serial.o
 
diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig
index 1d866d3..6792503 100644
--- a/arch/cris/arch-v32/drivers/Kconfig
+++ b/arch/cris/arch-v32/drivers/Kconfig
@@ -19,64 +19,6 @@
 	  switch. This option should normally be disabled. If enabled,
 	  speed and duplex will be locked to 100 Mbit and full duplex.
 
-config ETRAX_ETHERNET_IFACE0
-	depends on ETRAX_ETHERNET
-	bool "Enable network interface 0"
-
-config ETRAX_ETHERNET_IFACE1
-	depends on (ETRAX_ETHERNET && ETRAXFS)
-	bool "Enable network interface 1 (uses DMA6 and DMA7)"
-
-config ETRAX_ETHERNET_GBIT
-	depends on (ETRAX_ETHERNET && CRIS_MACH_ARTPEC3)
-	bool "Enable gigabit Ethernet support"
-
-choice
-	prompt "Eth0 led group"
-	depends on ETRAX_ETHERNET_IFACE0
-	default ETRAX_ETH0_USE_LEDGRP0
-
-config ETRAX_ETH0_USE_LEDGRP0
-	bool "Use LED grp 0"
-	depends on ETRAX_NBR_LED_GRP_ONE || ETRAX_NBR_LED_GRP_TWO
-	help
-	  Use LED grp 0 for eth0
-
-config ETRAX_ETH0_USE_LEDGRP1
-	bool "Use LED grp 1"
-	depends on ETRAX_NBR_LED_GRP_TWO
-	help
-	  Use LED grp 1 for eth0
-
-config ETRAX_ETH0_USE_LEDGRPNULL
-	bool "Use no LEDs for eth0"
-	help
-	  Use no LEDs for eth0
-endchoice
-
-choice
-	prompt "Eth1 led group"
-	depends on ETRAX_ETHERNET_IFACE1
-	default ETRAX_ETH1_USE_LEDGRP1
-
-config ETRAX_ETH1_USE_LEDGRP0
-	bool "Use LED grp 0"
-	depends on ETRAX_NBR_LED_GRP_ONE || ETRAX_NBR_LED_GRP_TWO
-	help
-	  Use LED grp 0 for eth1
-
-config ETRAX_ETH1_USE_LEDGRP1
-	bool "Use LED grp 1"
-	depends on ETRAX_NBR_LED_GRP_TWO
-	help
-	  Use LED grp 1 for eth1
-
-config ETRAX_ETH1_USE_LEDGRPNULL
-	bool "Use no LEDs for eth1"
-	help
-	  Use no LEDs for eth1
-endchoice
-
 config ETRAXFS_SERIAL
 	bool "Serial-port support"
 	depends on ETRAX_ARCH_V32
@@ -108,261 +50,24 @@
 	  if you do not need DMA to something else.
 	  ser0 can use dma4 or dma6 for output and dma5 or dma7 for input.
 
-choice
-	prompt "Ser0 default port type "
-	depends on ETRAX_SERIAL_PORT0
-	default ETRAX_SERIAL_PORT0_TYPE_232
-	help
-	  Type of serial port.
-
-config ETRAX_SERIAL_PORT0_TYPE_232
-	bool "Ser0 is a RS-232 port"
-	help
-	  Configure serial port 0 to be a RS-232 port.
-
-config ETRAX_SERIAL_PORT0_TYPE_485HD
-	bool "Ser0 is a half duplex RS-485 port"
-	depends on ETRAX_RS485
-	help
-	  Configure serial port 0 to be a half duplex (two wires) RS-485 port.
-
-config ETRAX_SERIAL_PORT0_TYPE_485FD
-	bool "Ser0 is a full duplex RS-485 port"
-	depends on ETRAX_RS485
-	help
-	  Configure serial port 0 to be a full duplex (four wires) RS-485 port.
-endchoice
-
-config ETRAX_SER0_DTR_BIT
-	string "Ser 0 DTR bit (empty = not used)"
-	depends on ETRAX_SERIAL_PORT0
-
-config ETRAX_SER0_RI_BIT
-	string "Ser 0 RI bit (empty = not used)"
-	depends on ETRAX_SERIAL_PORT0
-
-config ETRAX_SER0_DSR_BIT
-	string "Ser 0 DSR bit (empty = not used)"
-	depends on ETRAX_SERIAL_PORT0
-
-config ETRAX_SER0_CD_BIT
-	string "Ser 0 CD bit (empty = not used)"
-	depends on ETRAX_SERIAL_PORT0
-
 config ETRAX_SERIAL_PORT1
 	bool "Serial port 1 enabled"
 	depends on ETRAXFS_SERIAL
 	help
 	  Enables the ETRAX FS serial driver for ser1 (ttyS1).
 
-choice
-	prompt "Ser1 default port type"
-	depends on ETRAX_SERIAL_PORT1
-	default ETRAX_SERIAL_PORT1_TYPE_232
-	help
-	  Type of serial port.
-
-config ETRAX_SERIAL_PORT1_TYPE_232
-	bool "Ser1 is a RS-232 port"
-	help
-	  Configure serial port 1 to be a RS-232 port.
-
-config ETRAX_SERIAL_PORT1_TYPE_485HD
-	bool "Ser1 is a half duplex RS-485 port"
-	depends on ETRAX_RS485
-	help
-	  Configure serial port 1 to be a half duplex (two wires) RS-485 port.
-
-config ETRAX_SERIAL_PORT1_TYPE_485FD
-	bool "Ser1 is a full duplex RS-485 port"
-	depends on ETRAX_RS485
-	help
-	  Configure serial port 1 to be a full duplex (four wires) RS-485 port.
-endchoice
-
-config ETRAX_SER1_DTR_BIT
-	string "Ser 1 DTR bit (empty = not used)"
-	depends on ETRAX_SERIAL_PORT1
-
-config ETRAX_SER1_RI_BIT
-	string "Ser 1 RI bit (empty = not used)"
-	depends on ETRAX_SERIAL_PORT1
-
-config ETRAX_SER1_DSR_BIT
-	string "Ser 1 DSR bit (empty = not used)"
-	depends on ETRAX_SERIAL_PORT1
-
-config ETRAX_SER1_CD_BIT
-	string "Ser 1 CD bit (empty = not used)"
-	depends on ETRAX_SERIAL_PORT1
-
 config ETRAX_SERIAL_PORT2
 	bool "Serial port 2 enabled"
 	depends on ETRAXFS_SERIAL
 	help
 	  Enables the ETRAX FS serial driver for ser2 (ttyS2).
 
-choice
-	prompt "Ser2 default port type"
-	depends on ETRAX_SERIAL_PORT2
-	default ETRAX_SERIAL_PORT2_TYPE_232
-	help
-	  What DMA channel to use for ser2
-
-config ETRAX_SERIAL_PORT2_TYPE_232
-	bool "Ser2 is a RS-232 port"
-	help
-	  Configure serial port 2 to be a RS-232 port.
-
-config ETRAX_SERIAL_PORT2_TYPE_485HD
-	bool "Ser2 is a half duplex RS-485 port"
-	depends on ETRAX_RS485
-	help
-	  Configure serial port 2 to be a half duplex (two wires) RS-485 port.
-
-config ETRAX_SERIAL_PORT2_TYPE_485FD
-	bool "Ser2 is a full duplex RS-485 port"
-	depends on ETRAX_RS485
-	help
-	  Configure serial port 2 to be a full duplex (four wires) RS-485 port.
-endchoice
-
-
-config ETRAX_SER2_DTR_BIT
-	string "Ser 2 DTR bit (empty = not used)"
-	depends on ETRAX_SERIAL_PORT2
-
-config ETRAX_SER2_RI_BIT
-	string "Ser 2 RI bit (empty = not used)"
-	depends on ETRAX_SERIAL_PORT2
-
-config ETRAX_SER2_DSR_BIT
-	string "Ser 2 DSR bit (empty = not used)"
-	depends on ETRAX_SERIAL_PORT2
-
-config ETRAX_SER2_CD_BIT
-	string "Ser 2 CD bit (empty = not used)"
-	depends on ETRAX_SERIAL_PORT2
-
 config ETRAX_SERIAL_PORT3
 	bool "Serial port 3 enabled"
 	depends on ETRAXFS_SERIAL
 	help
 	  Enables the ETRAX FS serial driver for ser3 (ttyS3).
 
-choice
-	prompt "Ser3 default port type"
-	depends on ETRAX_SERIAL_PORT3
-	default ETRAX_SERIAL_PORT3_TYPE_232
-	help
-	  What DMA channel to use for ser3.
-
-config ETRAX_SERIAL_PORT3_TYPE_232
-	bool "Ser3 is a RS-232 port"
-	help
-	  Configure serial port 3 to be a RS-232 port.
-
-config ETRAX_SERIAL_PORT3_TYPE_485HD
-	bool "Ser3 is a half duplex RS-485 port"
-	depends on ETRAX_RS485
-	help
-	  Configure serial port 3 to be a half duplex (two wires) RS-485 port.
-
-config ETRAX_SERIAL_PORT3_TYPE_485FD
-	bool "Ser3 is a full duplex RS-485 port"
-	depends on ETRAX_RS485
-	help
-	  Configure serial port 3 to be a full duplex (four wires) RS-485 port.
-endchoice
-
-config ETRAX_SER3_DTR_BIT
-	string "Ser 3 DTR bit (empty = not used)"
-	depends on ETRAX_SERIAL_PORT3
-
-config ETRAX_SER3_RI_BIT
-	string "Ser 3 RI bit (empty = not used)"
-	depends on ETRAX_SERIAL_PORT3
-
-config ETRAX_SER3_DSR_BIT
-	string "Ser 3 DSR bit (empty = not used)"
-	depends on ETRAX_SERIAL_PORT3
-
-config ETRAX_SER3_CD_BIT
-	string "Ser 3 CD bit (empty = not used)"
-	depends on ETRAX_SERIAL_PORT3
-
-config ETRAX_SERIAL_PORT4
-	bool "Serial port 4 enabled"
-	depends on ETRAXFS_SERIAL && CRIS_MACH_ARTPEC3
-	help
-	  Enables the ETRAX FS serial driver for ser4 (ttyS4).
-
-choice
-	prompt "Ser4 default port type"
-	depends on ETRAX_SERIAL_PORT4
-	default ETRAX_SERIAL_PORT4_TYPE_232
-	help
-	  What DMA channel to use for ser4.
-
-config ETRAX_SERIAL_PORT4_TYPE_232
-	bool "Ser4 is a RS-232 port"
-	help
-	  Configure serial port 4 to be a RS-232 port.
-
-config ETRAX_SERIAL_PORT4_TYPE_485HD
-	bool "Ser4 is a half duplex RS-485 port"
-	depends on ETRAX_RS485
-	help
-	  Configure serial port 4 to be a half duplex (two wires) RS-485 port.
-
-config ETRAX_SERIAL_PORT4_TYPE_485FD
-	bool "Ser4 is a full duplex RS-485 port"
-	depends on ETRAX_RS485
-	help
-	  Configure serial port 4 to be a full duplex (four wires) RS-485 port.
-endchoice
-
-choice
-	prompt "Ser4 DMA in channel "
-	depends on ETRAX_SERIAL_PORT4
-	default ETRAX_SERIAL_PORT4_NO_DMA_IN
-	help
-	  What DMA channel to use for ser4.
-
-
-config ETRAX_SERIAL_PORT4_NO_DMA_IN
-	bool "Ser4 uses no DMA for input"
-	help
-	  Do not use DMA for ser4 input.
-
-config ETRAX_SERIAL_PORT4_DMA9_IN
-	bool "Ser4 uses DMA9 for input"
-	depends on ETRAX_SERIAL_PORT4
-	help
-	  Enables the DMA9 input channel for ser4 (ttyS4).
-	  If you do not enable DMA, an interrupt for each character will be
-	  used when receiving data.
-	  Normally you want to use DMA, unless you use the DMA channel for
-	  something else.
-
-endchoice
-
-config ETRAX_SER4_DTR_BIT
-	string "Ser 4 DTR bit (empty = not used)"
-	depends on ETRAX_SERIAL_PORT4
-
-config ETRAX_SER4_RI_BIT
-	string "Ser 4 RI bit (empty = not used)"
-	depends on ETRAX_SERIAL_PORT4
-
-config ETRAX_SER4_DSR_BIT
-	string "Ser 4 DSR bit (empty = not used)"
-	depends on ETRAX_SERIAL_PORT4
-
-config ETRAX_SER4_CD_BIT
-	string "Ser 4 CD bit (empty = not used)"
-	depends on ETRAX_SERIAL_PORT4
-
 config ETRAX_SYNCHRONOUS_SERIAL
 	bool "Synchronous serial-port support"
 	depends on ETRAX_ARCH_V32
@@ -703,32 +408,6 @@
 	  want to build it as a module, which will be named spi_crisv32_sser.
 	  (You need to select MMC separately.)
 
-config ETRAX_SPI_SSER0_DMA
-	bool "DMA for SPI on sser0 enabled"
-	depends on ETRAX_SPI_SSER0
-	depends on !ETRAX_SERIAL_PORT1_DMA4_OUT && !ETRAX_SERIAL_PORT1_DMA5_IN
-	default y
-	help
-	  Say Y if using DMA (dma4/dma5) for SPI on synchronous serial port 0.
-
-config ETRAX_SPI_MMC_CD_SSER0_PIN
-	string "MMC/SD card detect pin for SPI on sser0"
-	depends on ETRAX_SPI_SSER0 && MMC_SPI
-	default "pd11"
-	help
-	  The pin to use for SD/MMC card detect.  This pin should be pulled up
-	  and grounded when a card is present.  If defined as " " (space), no
-	  pin is selected.  A card must then always be inserted for proper
-	  action.
-
-config ETRAX_SPI_MMC_WP_SSER0_PIN
-	string "MMC/SD card write-protect pin for SPI on sser0"
-	depends on ETRAX_SPI_SSER0 && MMC_SPI
-	default "pd10"
-	help
-	  The pin to use for the SD/MMC write-protect signal for a memory
-	  card.  If defined as " " (space), the card is considered writable.
-
 config ETRAX_SPI_SSER1
 	tristate "SPI using synchronous serial port 1 (sser1)"
 	depends on ETRAX_SPI_MMC
@@ -742,32 +421,6 @@
 	  want to build it as a module, which will be named spi_crisv32_sser.
 	  (You need to select MMC separately.)
 
-config ETRAX_SPI_SSER1_DMA
-	bool "DMA for SPI on sser1 enabled"
-	depends on ETRAX_SPI_SSER1 && !ETRAX_ETHERNET_IFACE1
-	depends on !ETRAX_SERIAL_PORT0_DMA6_OUT && !ETRAX_SERIAL_PORT0_DMA7_IN
-	default y
-	help
-	  Say Y if using DMA (dma6/dma7) for SPI on synchronous serial port 1.
-
-config ETRAX_SPI_MMC_CD_SSER1_PIN
-	string "MMC/SD card detect pin for SPI on sser1"
-	depends on ETRAX_SPI_SSER1 && MMC_SPI
-	default "pd12"
-	help
-	  The pin to use for SD/MMC card detect.  This pin should be pulled up
-	  and grounded when a card is present.  If defined as " " (space), no
-	  pin is selected.  A card must then always be inserted for proper
-	  action.
-
-config ETRAX_SPI_MMC_WP_SSER1_PIN
-	string "MMC/SD card write-protect pin for SPI on sser1"
-	depends on ETRAX_SPI_SSER1 && MMC_SPI
-	default "pd9"
-	help
-	  The pin to use for the SD/MMC write-protect signal for a memory
-	  card.  If defined as " " (space), the card is considered writable.
-
 config ETRAX_SPI_GPIO
 	tristate "Bitbanged SPI using gpio pins"
 	depends on ETRAX_SPI_MMC
@@ -782,51 +435,4 @@
 	  Say m to build it as a module, which will be called spi_crisv32_gpio.
 	  (You need to select MMC separately.)
 
-# The default match that of sser0, only because that's how it was tested.
-config ETRAX_SPI_CS_PIN
-	string "SPI chip select pin"
-	depends on ETRAX_SPI_GPIO
-	default "pc3"
-	help
-	  The pin to use for SPI chip select.
-
-config ETRAX_SPI_CLK_PIN
-	string "SPI clock pin"
-	depends on ETRAX_SPI_GPIO
-	default "pc1"
-	help
-	  The pin to use for the SPI clock.
-
-config ETRAX_SPI_DATAIN_PIN
-	string "SPI MISO (data in) pin"
-	depends on ETRAX_SPI_GPIO
-	default "pc16"
-	help
-	  The pin to use for SPI data in from the device.
-
-config ETRAX_SPI_DATAOUT_PIN
-	string "SPI MOSI (data out) pin"
-	depends on ETRAX_SPI_GPIO
-	default "pc0"
-	help
-	  The pin to use for SPI data out to the device.
-
-config ETRAX_SPI_MMC_CD_GPIO_PIN
-	string "MMC/SD card detect pin for SPI using gpio (space for none)"
-	depends on ETRAX_SPI_GPIO && MMC_SPI
-	default "pd11"
-	help
-	  The pin to use for SD/MMC card detect.  This pin should be pulled up
-	  and grounded when a card is present.  If defined as " " (space), no
-	  pin is selected.  A card must then always be inserted for proper
-	  action.
-
-config ETRAX_SPI_MMC_WP_GPIO_PIN
-	string "MMC/SD card write-protect pin for SPI using gpio (space for none)"
-	depends on ETRAX_SPI_GPIO && MMC_SPI
-	default "pd10"
-	help
-	  The pin to use for the SD/MMC write-protect signal for a memory
-	  card.  If defined as " " (space), the card is considered writable.
-
 endif
diff --git a/arch/cris/arch-v32/mach-a3/Kconfig b/arch/cris/arch-v32/mach-a3/Kconfig
index 7796aaf..8754727 100644
--- a/arch/cris/arch-v32/mach-a3/Kconfig
+++ b/arch/cris/arch-v32/mach-a3/Kconfig
@@ -15,10 +15,6 @@
        int
        default 5
 
-config ETRAX_DDR
-       bool
-       default y
-
 config ETRAX_DDR2_MRS
 	hex "DDR2 MRS"
 	default "0"
diff --git a/arch/cris/include/asm/processor.h b/arch/cris/include/asm/processor.h
index c0a29b9..15b815d 100644
--- a/arch/cris/include/asm/processor.h
+++ b/arch/cris/include/asm/processor.h
@@ -47,7 +47,6 @@
  */
 
 #define task_pt_regs(task) user_regs(task_thread_info(task))
-#define current_regs() task_pt_regs(current)
 
 unsigned long get_wchan(struct task_struct *p);
 
diff --git a/arch/cris/include/uapi/asm/kvm_para.h b/arch/cris/include/uapi/asm/kvm_para.h
new file mode 100644
index 0000000..14fab8f
--- /dev/null
+++ b/arch/cris/include/uapi/asm/kvm_para.h
@@ -0,0 +1 @@
+#include <asm-generic/kvm_para.h>
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 821170e..c3cda41 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -11,6 +11,7 @@
 	select VIRT_TO_BUS
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS
 	select GENERIC_CPU_DEVICES
+	select GENERIC_IOMAP
 	select GENERIC_STRNCPY_FROM_USER if MMU
 	select GENERIC_STRNLEN_USER if MMU
 	select FPU if MMU
@@ -72,7 +73,6 @@
 config MMU
 	bool "MMU-based Paged Memory Management Support"
 	default y
-	select GENERIC_IOMAP
 	help
 	  Select if you want MMU-based virtualised addressing space
 	  support by paged memory management. If unsure, say 'Y'.
diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine
index b9ab0a6..61dc643 100644
--- a/arch/m68k/Kconfig.machine
+++ b/arch/m68k/Kconfig.machine
@@ -150,18 +150,6 @@
 	help
 	  Support the bugs of Xcopilot.
 
-config UC5272
-	bool "Arcturus Networks uC5272 dimm board support"
-	depends on M5272
-	help
-	  Support for the Arcturus Networks uC5272 dimm board.
-
-config UC5282
-	bool "Arcturus Networks uC5282 board support"
-	depends on M528x
-	help
-	  Support for the Arcturus Networks uC5282 dimm board.
-
 config UCSIMM
 	bool "uCsimm module support"
 	depends on M68EZ328
@@ -205,23 +193,15 @@
 	help
 	  Support for the Lineo uCquicc board.
 
-config ARNEWSH
-	bool
-
 config ARN5206
 	bool "Arnewsh 5206 board support"
 	depends on M5206
-	select ARNEWSH
 	help
 	  Support for the Arnewsh 5206 board.
 
-config FREESCALE
-	bool
-
 config M5206eC3
 	bool "Motorola M5206eC3 board support"
 	depends on M5206e
-	select FREESCALE
 	help
 	  Support for the Motorola M5206eC3 board.
 
@@ -231,88 +211,24 @@
 	help
 	  Support for the Motorola M5206eLITE board.
 
-config M5208EVB
-	bool "Freescale M5208EVB board support"
-	depends on M520x
-	select FREESCALE
-	help
-	  Support for the Freescale Coldfire M5208EVB.
-
 config M5235EVB
 	bool "Freescale M5235EVB support"
 	depends on M523x
-	select FREESCALE
 	help
 	  Support for the Freescale M5235EVB board.
 
 config M5249C3
 	bool "Motorola M5249C3 board support"
 	depends on M5249
-	select FREESCALE
 	help
 	  Support for the Motorola M5249C3 board.
 
-config M5271EVB
-	bool "Freescale (Motorola) M5271EVB board support"
-	depends on M5271
-	select FREESCALE
-	help
-	  Support for the Freescale (Motorola) M5271EVB board.
-
-config M5275EVB
-	bool "Freescale (Motorola) M5275EVB board support"
-	depends on M5275
-	select FREESCALE
-	help
-	  Support for the Freescale (Motorola) M5275EVB board.
-
 config M5272C3
 	bool "Motorola M5272C3 board support"
 	depends on M5272
-	select FREESCALE
 	help
 	  Support for the Motorola M5272C3 board.
 
-config senTec
-	bool
-
-config COBRA5272
-	bool "senTec COBRA5272 board support"
-	depends on M5272
-	select senTec
-	help
-	  Support for the senTec COBRA5272 board.
-
-config AVNET
-	bool
-
-config AVNET5282
-	bool "Avnet 5282 board support"
-	depends on M528x
-	select AVNET
-	help
-	  Support for the Avnet 5282 board.
-
-config M5282EVB
-	bool "Motorola M5282EVB board support"
-	depends on M528x
-	select FREESCALE
-	help
-	  Support for the Motorola M5282EVB board.
-
-config COBRA5282
-	bool "senTec COBRA5282 board support"
-	depends on M528x
-	select senTec
-	help
-	  Support for the senTec COBRA5282 board.
-
-config SOM5282EM
-	bool "EMAC.Inc SOM5282EM board support"
-	depends on M528x
-	help
-	  Support for the EMAC.Inc SOM5282EM module.
-
 config WILDFIRE
 	bool "Intec Automation Inc. WildFire board support"
 	depends on M528x
@@ -328,14 +244,12 @@
 config ARN5307
 	bool "Arnewsh 5307 board support"
 	depends on M5307
-	select ARNEWSH
 	help
 	  Support for the Arnewsh 5307 board.
 
 config M5307C3
 	bool "Motorola M5307C3 board support"
 	depends on M5307
-	select FREESCALE
 	help
 	  Support for the Motorola M5307C3 board.
 
@@ -345,30 +259,9 @@
 	help
 	  Support for the SnapGear SecureEdge/MP3 platform.
 
-config M5329EVB
-	bool "Freescale (Motorola) M5329EVB board support"
-	depends on M532x
-	select FREESCALE
-	help
-	  Support for the Freescale (Motorola) M5329EVB board.
-
-config COBRA5329
-	bool "senTec COBRA5329 board support"
-	depends on M532x
-	help
-	  Support for the senTec COBRA5329 board.
-
-config M5373EVB
-	bool "Freescale M5373EVB board support"
-	depends on M537x
-	select FREESCALE
-	help
-	  Support for the Freescale M5373EVB board.
-
 config M5407C3
 	bool "Motorola M5407C3 board support"
 	depends on M5407
-	select FREESCALE
 	help
 	  Support for the Motorola M5407C3 board.
 
@@ -402,39 +295,12 @@
 	help
 	  Support for the SnapGear NETtel/SecureEdge/SnapGear boards.
 
-config SNAPGEAR
-	bool "SnapGear router board support"
-	depends on NETtel
-	help
-	  Special additional support for SnapGear router boards.
-
-config SNEHA
-	bool
-
-config CPU16B
-	bool "Sneha Technologies S.L. Sarasvati board support"
-	depends on M5272
-	select SNEHA
-	help
-	  Support for the SNEHA CPU16B board.
-
 config MOD5272
 	bool "Netburner MOD-5272 board support"
 	depends on M5272
 	help
 	  Support for the Netburner MOD-5272 board.
 
-config SAVANT
-	bool
-
-config SAVANTrosie1
-	bool "Savant Rosie1 board support"
-	depends on M523x
-	select SAVANT
-	help
-	  Support for the Savant Rosie1 board.
-
-
 if !MMU || COLDFIRE
 
 comment "Machine Options"
diff --git a/arch/m68k/include/asm/io_no.h b/arch/m68k/include/asm/io_no.h
index 353bf75..e153478 100644
--- a/arch/m68k/include/asm/io_no.h
+++ b/arch/m68k/include/asm/io_no.h
@@ -4,6 +4,7 @@
 #ifdef __KERNEL__
 
 #include <asm/virtconvert.h>
+#include <asm-generic/iomap.h>
 
 /*
  * These are for ISA/PCI shared memory _only_ and should never be used
diff --git a/arch/m68k/include/asm/page.h b/arch/m68k/include/asm/page.h
index 7c360da..38b024a 100644
--- a/arch/m68k/include/asm/page.h
+++ b/arch/m68k/include/asm/page.h
@@ -48,6 +48,9 @@
 #include <asm/page_no.h>
 #endif
 
+#define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
+				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
 #include <asm-generic/getorder.h>
 
 #endif /* _M68K_PAGE_H */
diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h
index 89f2014..5029f73 100644
--- a/arch/m68k/include/asm/page_mm.h
+++ b/arch/m68k/include/asm/page_mm.h
@@ -173,7 +173,4 @@
 
 #endif /* __ASSEMBLY__ */
 
-#define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
-				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
 #endif /* _M68K_PAGE_MM_H */
diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c
index 911ba47..5b16f5d 100644
--- a/arch/m68k/kernel/setup_no.c
+++ b/arch/m68k/kernel/setup_no.c
@@ -118,7 +118,7 @@
  *
  * Returns:
  */
-void parse_uboot_commandline(char *commandp, int size)
+static void __init parse_uboot_commandline(char *commandp, int size)
 {
 	extern unsigned long _init_sp;
 	unsigned long *sp;
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index 2a16df3..57fd286 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -50,6 +50,7 @@
 #include <asm/pgtable.h>
 #include <asm/traps.h>
 #include <asm/ucontext.h>
+#include <asm/cacheflush.h>
 
 #ifdef CONFIG_MMU
 
@@ -181,6 +182,13 @@
 		asm volatile ("movec %0,%%caar\n\t"
 			      "movec %1,%%cacr"
 			      : : "r" (vaddr + 4), "r" (temp));
+	} else {
+		/* CPU_IS_COLDFIRE */
+#if defined(CONFIG_CACHE_COPYBACK)
+		flush_cf_dcache(0, DCACHE_MAX_ADDR);
+#endif
+		/* Invalidate instruction cache for the pushed bytes */
+		clear_cf_icache(vaddr, vaddr + 8);
 	}
 }
 
diff --git a/arch/m68k/platform/68000/m68328.c b/arch/m68k/platform/68000/m68328.c
index a86eb66..e53caf4 100644
--- a/arch/m68k/platform/68000/m68328.c
+++ b/arch/m68k/platform/68000/m68328.c
@@ -15,6 +15,7 @@
 
 /***************************************************************************/
 
+#include <linux/init.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/rtc.h>
@@ -42,7 +43,7 @@
 
 /***************************************************************************/
 
-void config_BSP(char *command, int len)
+void __init config_BSP(char *command, int len)
 {
   printk(KERN_INFO "\n68328 support D. Jeff Dionne <jeff@uclinux.org>\n");
   printk(KERN_INFO "68328 support Kenneth Albanowski <kjahds@kjshds.com>\n");
diff --git a/arch/m68k/platform/68000/m68EZ328.c b/arch/m68k/platform/68000/m68EZ328.c
index a6eb72d..332b5e8 100644
--- a/arch/m68k/platform/68000/m68EZ328.c
+++ b/arch/m68k/platform/68000/m68EZ328.c
@@ -13,6 +13,7 @@
 
 /***************************************************************************/
 
+#include <linux/init.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/rtc.h>
@@ -52,7 +53,7 @@
 _bsc1(char *, getbenv, char *, a)
 #endif
 
-void config_BSP(char *command, int len)
+void __init config_BSP(char *command, int len)
 {
   unsigned char *p;
 
diff --git a/arch/m68k/platform/68000/m68VZ328.c b/arch/m68k/platform/68000/m68VZ328.c
index eb6964f..fd66583 100644
--- a/arch/m68k/platform/68000/m68VZ328.c
+++ b/arch/m68k/platform/68000/m68VZ328.c
@@ -14,6 +14,7 @@
 
 /***************************************************************************/
 
+#include <linux/init.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/kd.h>
@@ -59,7 +60,7 @@
 	);
 }
 
-static void init_hardware(char *command, int size)
+static void __init init_hardware(char *command, int size)
 {
 #ifdef CONFIG_DIRECT_IO_ACCESS
 	SCR = 0x10;					/* allow user access to internal registers */
@@ -145,7 +146,7 @@
 _bsc1(unsigned char *, gethwaddr, int, a)
 _bsc1(char *, getbenv, char *, a)
 
-static void init_hardware(char *command, int size)
+static void __init init_hardware(char *command, int size)
 {
 	char *p;
 
@@ -167,7 +168,7 @@
 {
 }
 
-static void init_hardware(char *command, int size)
+static void __init init_hardware(char *command, int size)
 {
 }
 
@@ -175,7 +176,7 @@
 #endif
 /***************************************************************************/
 
-void config_BSP(char *command, int size)
+void __init config_BSP(char *command, int size)
 {
 	printk(KERN_INFO "68VZ328 DragonBallVZ support (c) 2001 Lineo, Inc.\n");
 
diff --git a/arch/m68k/platform/68360/commproc.c b/arch/m68k/platform/68360/commproc.c
index 8e4e10c..315727b 100644
--- a/arch/m68k/platform/68360/commproc.c
+++ b/arch/m68k/platform/68360/commproc.c
@@ -31,6 +31,7 @@
  */
 
 #include <linux/errno.h>
+#include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/param.h>
@@ -77,7 +78,7 @@
 
 
 
-void m360_cpm_reset()
+void __init m360_cpm_reset()
 {
 /* 	pte_t		   *pte; */
 
diff --git a/arch/m68k/platform/68360/config.c b/arch/m68k/platform/68360/config.c
index 9877cef..0570741 100644
--- a/arch/m68k/platform/68360/config.c
+++ b/arch/m68k/platform/68360/config.c
@@ -11,6 +11,7 @@
  */
 
 #include <stdarg.h>
+#include <linux/init.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
@@ -140,7 +141,7 @@
 #endif
 
 
-void config_BSP(char *command, int len)
+void __init config_BSP(char *command, int len)
 {
   unsigned char *p;
 
diff --git a/arch/metag/Kconfig.soc b/arch/metag/Kconfig.soc
index 2a3c860..973640f 100644
--- a/arch/metag/Kconfig.soc
+++ b/arch/metag/Kconfig.soc
@@ -16,6 +16,8 @@
 
 config SOC_TZ1090
 	bool "Toumaz Xenif TZ1090 SoC (Comet)"
+	select ARCH_WANT_OPTIONAL_GPIOLIB
+	select IMGPDC_IRQ
 	select METAG_LNKGET_AROUND_CACHE
 	select METAG_META21
 	select METAG_SMP_WRITE_REORDERING
diff --git a/arch/metag/boot/dts/tz1090.dtsi b/arch/metag/boot/dts/tz1090.dtsi
index 8537446..24ea7d2 100644
--- a/arch/metag/boot/dts/tz1090.dtsi
+++ b/arch/metag/boot/dts/tz1090.dtsi
@@ -8,6 +8,8 @@
 
 #include "skeleton.dtsi"
 
+#include <dt-bindings/interrupt-controller/irq.h>
+
 / {
 	compatible = "toumaz,tz1090", "img,meta";
 
@@ -26,6 +28,22 @@
 		#size-cells = <1>;
 		ranges;
 
+		pdc: pdc@0x02006000 {
+			interrupt-controller;
+			#interrupt-cells = <2>;
+
+			reg = <0x02006000 0x1000>;
+			compatible = "img,pdc-intc";
+
+			num-perips = <3>;
+			num-syswakes = <3>;
+
+			interrupts = <18 IRQ_TYPE_LEVEL_HIGH>, /* Syswakes */
+			             <30 IRQ_TYPE_LEVEL_HIGH>, /* Perip 0 (RTC) */
+			             <29 IRQ_TYPE_LEVEL_HIGH>, /* Perip 1 (IR) */
+			             <31 IRQ_TYPE_LEVEL_HIGH>; /* Perip 2 (WDT) */
+		};
+
 		pinctrl: pinctrl@02005800 {
 			#gpio-range-cells = <3>;
 			compatible = "img,tz1090-pinctrl";
@@ -37,5 +55,54 @@
 			compatible = "img,tz1090-pdc-pinctrl";
 			reg = <0x02006500 0x100>;
 		};
+
+		gpios: gpios@02005800 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "img,tz1090-gpio";
+			reg = <0x02005800 0x90>;
+
+			gpios0: bank@0 {
+				gpio-controller;
+				interrupt-controller;
+				#gpio-cells = <2>;
+				#interrupt-cells = <2>;
+				reg = <0>;
+				interrupts = <13 IRQ_TYPE_LEVEL_HIGH>;
+				gpio-ranges = <&pinctrl 0 0 30>;
+			};
+			gpios1: bank@1 {
+				gpio-controller;
+				interrupt-controller;
+				#gpio-cells = <2>;
+				#interrupt-cells = <2>;
+				reg = <1>;
+				interrupts = <14 IRQ_TYPE_LEVEL_HIGH>;
+				gpio-ranges = <&pinctrl 0 30 30>;
+			};
+			gpios2: bank@2 {
+				gpio-controller;
+				interrupt-controller;
+				#gpio-cells = <2>;
+				#interrupt-cells = <2>;
+				reg = <2>;
+				interrupts = <15 IRQ_TYPE_LEVEL_HIGH>;
+				gpio-ranges = <&pinctrl 0 60 30>;
+			};
+		};
+
+		pdc_gpios: gpios@02006500 {
+			gpio-controller;
+			#gpio-cells = <2>;
+
+			compatible = "img,tz1090-pdc-gpio";
+			reg = <0x02006500 0x100>;
+
+			interrupt-parent = <&pdc>;
+			interrupts =	<8  IRQ_TYPE_NONE>,
+					<9  IRQ_TYPE_NONE>,
+					<10 IRQ_TYPE_NONE>;
+			gpio-ranges = <&pdc_pinctrl 0 0 7>;
+		};
 	};
 };
diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c
index 28813f1..1239195 100644
--- a/arch/metag/mm/init.c
+++ b/arch/metag/mm/init.c
@@ -407,10 +407,9 @@
 #endif
 
 #ifdef CONFIG_OF_FLATTREE
-void __init early_init_dt_setup_initrd_arch(unsigned long start,
-					    unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
 {
-	pr_err("%s(%lx, %lx)\n",
+	pr_err("%s(%llx, %llx)\n",
 	       __func__, start, end);
 }
 #endif /* CONFIG_OF_FLATTREE */
diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index 0a2c68f..0c4453f 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -46,11 +46,6 @@
 	memblock_add(base, size);
 }
 
-void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
-{
-	return __va(memblock_alloc(size, align));
-}
-
 #ifdef CONFIG_EARLY_PRINTK
 static char *stdout;
 
@@ -136,8 +131,7 @@
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
-void __init early_init_dt_setup_initrd_arch(unsigned long start,
-		unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
 {
 	initrd_start = (unsigned long)__va(start);
 	initrd_end = (unsigned long)__va(end);
diff --git a/arch/mips/bcm63xx/nvram.c b/arch/mips/bcm63xx/nvram.c
index e652e57..4b50d40 100644
--- a/arch/mips/bcm63xx/nvram.c
+++ b/arch/mips/bcm63xx/nvram.c
@@ -35,6 +35,8 @@
 	u32	checksum_high;
 };
 
+#define BCM63XX_DEFAULT_PSI_SIZE	64
+
 static struct bcm963xx_nvram nvram;
 static int mac_addr_used;
 
@@ -114,3 +116,12 @@
 	return 0;
 }
 EXPORT_SYMBOL(bcm63xx_nvram_get_mac_address);
+
+int bcm63xx_nvram_get_psi_size(void)
+{
+	if (nvram.psi_size > 0)
+		return nvram.psi_size;
+
+	return BCM63XX_DEFAULT_PSI_SIZE;
+}
+EXPORT_SYMBOL(bcm63xx_nvram_get_psi_size);
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_nvram.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_nvram.h
index 4e0b6bc..348df49 100644
--- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_nvram.h
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_nvram.h
@@ -30,4 +30,6 @@
  */
 int bcm63xx_nvram_get_mac_address(u8 *mac);
 
+int bcm63xx_nvram_get_psi_size(void);
+
 #endif /* BCM63XX_NVRAM_H */
diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c
index 7e95404..0fa0b69 100644
--- a/arch/mips/kernel/prom.c
+++ b/arch/mips/kernel/prom.c
@@ -58,8 +58,7 @@
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
-void __init early_init_dt_setup_initrd_arch(unsigned long start,
-					    unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
 {
 	initrd_start = (unsigned long)__va(start);
 	initrd_end = (unsigned long)__va(end);
diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S
index 222152a..177d61d 100644
--- a/arch/mn10300/kernel/entry.S
+++ b/arch/mn10300/kernel/entry.S
@@ -171,10 +171,10 @@
 	mov	(REG_EPSW,fp),d0	# need to deliver signals before
 					# returning to userspace
 	and	EPSW_nSL,d0
-	beq	resume_kernel		# returning to supervisor mode
+	bne	resume_userspace	# returning to userspace
 
 #ifdef CONFIG_PREEMPT
-ENTRY(resume_kernel)
+resume_kernel:
 	LOCAL_IRQ_DISABLE
 	mov	(TI_preempt_count,a2),d0	# non-zero preempt_count ?
 	cmp	0,d0
@@ -189,6 +189,8 @@
 	bne	restore_all
 	call	preempt_schedule_irq[],0
 	jmp	need_resched
+#else
+	jmp	resume_kernel
 #endif
 
 
diff --git a/arch/openrisc/kernel/prom.c b/arch/openrisc/kernel/prom.c
index 5869e3f..a63e768 100644
--- a/arch/openrisc/kernel/prom.c
+++ b/arch/openrisc/kernel/prom.c
@@ -55,11 +55,6 @@
 	memblock_add(base, size);
 }
 
-void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
-{
-	return __va(memblock_alloc(size, align));
-}
-
 void __init early_init_devtree(void *params)
 {
 	void *alloc;
@@ -96,8 +91,7 @@
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
-void __init early_init_dt_setup_initrd_arch(unsigned long start,
-		unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
 {
 	initrd_start = (unsigned long)__va(start);
 	initrd_end = (unsigned long)__va(end);
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 6bfcab97..b7634ce 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -546,14 +546,8 @@
 	memblock_add(base, size);
 }
 
-void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
-{
-	return __va(memblock_alloc(size, align));
-}
-
 #ifdef CONFIG_BLK_DEV_INITRD
-void __init early_init_dt_setup_initrd_arch(unsigned long start,
-		unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
 {
 	initrd_start = (unsigned long)__va(start);
 	initrd_end = (unsigned long)__va(end);
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index f390042..87ba7cf 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -620,12 +620,16 @@
 		case Opt_uid:
 			if (match_int(&args[0], &option))
 				return 0;
-			root->i_uid = option;
+			root->i_uid = make_kuid(current_user_ns(), option);
+			if (!uid_valid(root->i_uid))
+				return 0;
 			break;
 		case Opt_gid:
 			if (match_int(&args[0], &option))
 				return 0;
-			root->i_gid = option;
+			root->i_gid = make_kgid(current_user_ns(), option);
+			if (!gid_valid(root->i_gid))
+				return 0;
 			break;
 		case Opt_mode:
 			if (match_octal(&args[0], &option))
diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index 65dd81b..1fa8be4 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -600,37 +600,13 @@
 	},
 };
 
-static void sdhi0_set_pwr(struct platform_device *pdev, int state)
-{
-	static int power_gpio = -EINVAL;
-
-	if (power_gpio < 0) {
-		int ret = gpio_request(GPIO_PTB6, NULL);
-		if (!ret) {
-			power_gpio = GPIO_PTB6;
-			gpio_direction_output(power_gpio, 0);
-		}
-	}
-
-	/*
-	 * Toggle the GPIO regardless, whether we managed to grab it above or
-	 * the fixed regulator driver did.
-	 */
-	gpio_set_value(GPIO_PTB6, state);
-}
-
-static int sdhi0_get_cd(struct platform_device *pdev)
-{
-	return !gpio_get_value(GPIO_PTY7);
-}
-
 static struct sh_mobile_sdhi_info sdhi0_info = {
 	.dma_slave_tx	= SHDMA_SLAVE_SDHI0_TX,
 	.dma_slave_rx	= SHDMA_SLAVE_SDHI0_RX,
-	.set_pwr	= sdhi0_set_pwr,
 	.tmio_caps      = MMC_CAP_SDIO_IRQ | MMC_CAP_POWER_OFF_CARD |
 			  MMC_CAP_NEEDS_POLL,
-	.get_cd		= sdhi0_get_cd,
+	.tmio_flags	= TMIO_MMC_USE_GPIO_CD,
+	.cd_gpio	= GPIO_PTY7,
 };
 
 static struct resource sdhi0_resources[] = {
@@ -656,39 +632,15 @@
 	},
 };
 
-static void cn12_set_pwr(struct platform_device *pdev, int state)
-{
-	static int power_gpio = -EINVAL;
-
-	if (power_gpio < 0) {
-		int ret = gpio_request(GPIO_PTB7, NULL);
-		if (!ret) {
-			power_gpio = GPIO_PTB7;
-			gpio_direction_output(power_gpio, 0);
-		}
-	}
-
-	/*
-	 * Toggle the GPIO regardless, whether we managed to grab it above or
-	 * the fixed regulator driver did.
-	 */
-	gpio_set_value(GPIO_PTB7, state);
-}
-
 #if !defined(CONFIG_MMC_SH_MMCIF) && !defined(CONFIG_MMC_SH_MMCIF_MODULE)
 /* SDHI1 */
-static int sdhi1_get_cd(struct platform_device *pdev)
-{
-	return !gpio_get_value(GPIO_PTW7);
-}
-
 static struct sh_mobile_sdhi_info sdhi1_info = {
 	.dma_slave_tx	= SHDMA_SLAVE_SDHI1_TX,
 	.dma_slave_rx	= SHDMA_SLAVE_SDHI1_RX,
 	.tmio_caps      = MMC_CAP_SDIO_IRQ | MMC_CAP_POWER_OFF_CARD |
 			  MMC_CAP_NEEDS_POLL,
-	.set_pwr	= cn12_set_pwr,
-	.get_cd		= sdhi1_get_cd,
+	.tmio_flags	= TMIO_MMC_USE_GPIO_CD,
+	.cd_gpio	= GPIO_PTW7,
 };
 
 static struct resource sdhi1_resources[] = {
@@ -718,27 +670,19 @@
 #else
 
 /* MMC SPI */
-static int mmc_spi_get_ro(struct device *dev)
-{
-	return gpio_get_value(GPIO_PTY6);
-}
-
-static int mmc_spi_get_cd(struct device *dev)
-{
-	return !gpio_get_value(GPIO_PTY7);
-}
-
 static void mmc_spi_setpower(struct device *dev, unsigned int maskval)
 {
 	gpio_set_value(GPIO_PTB6, maskval ? 1 : 0);
 }
 
 static struct mmc_spi_platform_data mmc_spi_info = {
-	.get_ro = mmc_spi_get_ro,
-	.get_cd = mmc_spi_get_cd,
 	.caps = MMC_CAP_NEEDS_POLL,
+	.caps2 = MMC_CAP2_RO_ACTIVE_HIGH,
 	.ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, /* 3.3V only */
 	.setpower = mmc_spi_setpower,
+	.flags = MMC_SPI_USE_CD_GPIO | MMC_SPI_USE_RO_GPIO,
+	.cd_gpio = GPIO_PTY7,
+	.ro_gpio = GPIO_PTY6,
 };
 
 static struct spi_board_info spi_bus[] = {
@@ -998,11 +942,6 @@
 
 #if defined(CONFIG_MMC_SH_MMCIF) || defined(CONFIG_MMC_SH_MMCIF_MODULE)
 /* SH_MMCIF */
-static void mmcif_down_pwr(struct platform_device *pdev)
-{
-	cn12_set_pwr(pdev, 0);
-}
-
 static struct resource sh_mmcif_resources[] = {
 	[0] = {
 		.name	= "SH_MMCIF",
@@ -1023,8 +962,6 @@
 };
 
 static struct sh_mmcif_plat_data sh_mmcif_plat = {
-	.set_pwr	= cn12_set_pwr,
-	.down_pwr	= mmcif_down_pwr,
 	.sup_pclk	= 0, /* SH7724: Max Pclk/2 */
 	.caps		= MMC_CAP_4_BIT_DATA |
 			  MMC_CAP_8_BIT_DATA |
@@ -1341,10 +1278,6 @@
 	gpio_direction_input(GPIO_PTR6);
 
 	/* SD-card slot CN11 */
-	/* Card-detect, used on CN11, either with SDHI0 or with SPI */
-	gpio_request(GPIO_PTY7, NULL);
-	gpio_direction_input(GPIO_PTY7);
-
 #if defined(CONFIG_MMC_SDHI) || defined(CONFIG_MMC_SDHI_MODULE)
 	/* enable SDHI0 on CN11 (needs DS2.4 set to ON) */
 	gpio_request(GPIO_FN_SDHI0WP,  NULL);
@@ -1363,8 +1296,6 @@
 	gpio_direction_output(GPIO_PTM4, 1); /* active low CS */
 	gpio_request(GPIO_PTB6, NULL); /* 3.3V power control */
 	gpio_direction_output(GPIO_PTB6, 0); /* disable power by default */
-	gpio_request(GPIO_PTY6, NULL); /* write protect */
-	gpio_direction_input(GPIO_PTY6);
 
 	spi_register_board_info(spi_bus, ARRAY_SIZE(spi_bus));
 #endif
@@ -1394,10 +1325,6 @@
 	gpio_request(GPIO_FN_SDHI1D1,  NULL);
 	gpio_request(GPIO_FN_SDHI1D0,  NULL);
 
-	/* Card-detect, used on CN12 with SDHI1 */
-	gpio_request(GPIO_PTW7, NULL);
-	gpio_direction_input(GPIO_PTW7);
-
 	cn12_enabled = true;
 #endif
 
diff --git a/arch/um/drivers/ubd.h b/arch/um/drivers/ubd.h
index 3845051..3b48cd2 100644
--- a/arch/um/drivers/ubd.h
+++ b/arch/um/drivers/ubd.h
@@ -7,7 +7,6 @@
 #ifndef __UM_UBD_USER_H
 #define __UM_UBD_USER_H
 
-extern void ignore_sigwinch_sig(void);
 extern int start_io_thread(unsigned long sp, int *fds_out);
 extern int io_thread(void *arg);
 extern int kernel_fd;
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 879990c..3716e69 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -41,7 +41,7 @@
 #include <os.h>
 #include "cow.h"
 
-enum ubd_req { UBD_READ, UBD_WRITE };
+enum ubd_req { UBD_READ, UBD_WRITE, UBD_FLUSH };
 
 struct io_thread_req {
 	struct request *req;
@@ -866,6 +866,7 @@
 		goto out;
 	}
 	ubd_dev->queue->queuedata = ubd_dev;
+	blk_queue_flush(ubd_dev->queue, REQ_FLUSH);
 
 	blk_queue_max_segments(ubd_dev->queue, MAX_SG);
 	err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
@@ -1239,11 +1240,40 @@
 }
 
 /* Called with dev->lock held */
+static void prepare_flush_request(struct request *req,
+				  struct io_thread_req *io_req)
+{
+	struct gendisk *disk = req->rq_disk;
+	struct ubd *ubd_dev = disk->private_data;
+
+	io_req->req = req;
+	io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
+		ubd_dev->fd;
+	io_req->op = UBD_FLUSH;
+}
+
+static bool submit_request(struct io_thread_req *io_req, struct ubd *dev)
+{
+	int n = os_write_file(thread_fd, &io_req,
+			     sizeof(io_req));
+	if (n != sizeof(io_req)) {
+		if (n != -EAGAIN)
+			printk("write to io thread failed, "
+			       "errno = %d\n", -n);
+		else if (list_empty(&dev->restart))
+			list_add(&dev->restart, &restart);
+
+		kfree(io_req);
+		return false;
+	}
+	return true;
+}
+
+/* Called with dev->lock held */
 static void do_ubd_request(struct request_queue *q)
 {
 	struct io_thread_req *io_req;
 	struct request *req;
-	int n;
 
 	while(1){
 		struct ubd *dev = q->queuedata;
@@ -1259,6 +1289,19 @@
 		}
 
 		req = dev->request;
+
+		if (req->cmd_flags & REQ_FLUSH) {
+			io_req = kmalloc(sizeof(struct io_thread_req),
+					 GFP_ATOMIC);
+			if (io_req == NULL) {
+				if (list_empty(&dev->restart))
+					list_add(&dev->restart, &restart);
+				return;
+			}
+			prepare_flush_request(req, io_req);
+			submit_request(io_req, dev);
+		}
+
 		while(dev->start_sg < dev->end_sg){
 			struct scatterlist *sg = &dev->sg[dev->start_sg];
 
@@ -1273,17 +1316,8 @@
 					(unsigned long long)dev->rq_pos << 9,
 					sg->offset, sg->length, sg_page(sg));
 
-			n = os_write_file(thread_fd, &io_req,
-					  sizeof(struct io_thread_req *));
-			if(n != sizeof(struct io_thread_req *)){
-				if(n != -EAGAIN)
-					printk("write to io thread failed, "
-					       "errno = %d\n", -n);
-				else if(list_empty(&dev->restart))
-					list_add(&dev->restart, &restart);
-				kfree(io_req);
+			if (submit_request(io_req, dev) == false)
 				return;
-			}
 
 			dev->rq_pos += sg->length >> 9;
 			dev->start_sg++;
@@ -1367,6 +1401,17 @@
 	int err;
 	__u64 off;
 
+	if (req->op == UBD_FLUSH) {
+		/* fds[0] is always either the rw image or our cow file */
+		n = os_sync_file(req->fds[0]);
+		if (n != 0) {
+			printk("do_io - sync failed err = %d "
+			       "fd = %d\n", -n, req->fds[0]);
+			req->error = 1;
+		}
+		return;
+	}
+
 	nsectors = req->length / req->sectorsize;
 	start = 0;
 	do {
@@ -1431,7 +1476,8 @@
 	struct io_thread_req *req;
 	int n;
 
-	ignore_sigwinch_sig();
+	os_fix_helper_signals();
+
 	while(1){
 		n = os_read_file(kernel_fd, &req,
 				 sizeof(struct io_thread_req *));
diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c
index a703e45..e376f9b 100644
--- a/arch/um/drivers/ubd_user.c
+++ b/arch/um/drivers/ubd_user.c
@@ -21,11 +21,6 @@
 #include "ubd.h"
 #include <os.h>
 
-void ignore_sigwinch_sig(void)
-{
-	signal(SIGWINCH, SIG_IGN);
-}
-
 int start_io_thread(unsigned long sp, int *fd_out)
 {
 	int pid, fds[2], err;
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 95feaa4..021104d 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -141,6 +141,7 @@
 extern int os_open_file(const char *file, struct openflags flags, int mode);
 extern int os_read_file(int fd, void *buf, int len);
 extern int os_write_file(int fd, const void *buf, int count);
+extern int os_sync_file(int fd);
 extern int os_file_size(const char *file, unsigned long long *size_out);
 extern int os_file_modtime(const char *file, unsigned long *modtime);
 extern int os_pipe(int *fd, int stream, int close_on_exec);
@@ -200,6 +201,7 @@
 extern int os_drop_memory(void *addr, int length);
 extern int can_drop_memory(void);
 extern void os_flush_stdout(void);
+extern int os_mincore(void *addr, unsigned long len);
 
 /* execvp.c */
 extern int execvp_noalloc(char *buf, const char *file, char *const argv[]);
@@ -233,6 +235,7 @@
 extern void setup_hostinfo(char *buf, int len);
 extern void os_dump_core(void) __attribute__ ((noreturn));
 extern void um_early_printk(const char *s, unsigned int n);
+extern void os_fix_helper_signals(void);
 
 /* time.c */
 extern void idle_sleep(unsigned long long nsecs);
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index babe218..d8b78a0 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -13,7 +13,7 @@
 obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
 	physmem.o process.o ptrace.o reboot.o sigio.o \
 	signal.o smp.o syscall.o sysrq.o time.o tlb.o trap.o \
-	um_arch.o umid.o skas/
+	um_arch.o umid.o maccess.o skas/
 
 obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o
 obj-$(CONFIG_GPROF)	+= gprof_syms.o
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 36e12f0..1d8505b 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -337,6 +337,8 @@
 	.irq_disable = dummy,
 	.irq_enable = dummy,
 	.irq_ack = dummy,
+	.irq_mask = dummy,
+	.irq_unmask = dummy,
 };
 
 static struct irq_chip SIGVTALRM_irq_type = {
@@ -344,6 +346,8 @@
 	.irq_disable = dummy,
 	.irq_enable = dummy,
 	.irq_ack = dummy,
+	.irq_mask = dummy,
+	.irq_unmask = dummy,
 };
 
 void __init init_IRQ(void)
diff --git a/arch/um/kernel/maccess.c b/arch/um/kernel/maccess.c
new file mode 100644
index 0000000..1f3d5c4
--- /dev/null
+++ b/arch/um/kernel/maccess.c
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2013 Richard Weinberger <richrd@nod.at>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <os.h>
+
+long probe_kernel_read(void *dst, const void *src, size_t size)
+{
+	void *psrc = (void *)rounddown((unsigned long)src, PAGE_SIZE);
+
+	if ((unsigned long)src < PAGE_SIZE || size <= 0)
+		return -EFAULT;
+
+	if (os_mincore(psrc, size + src - psrc) <= 0)
+		return -EFAULT;
+
+	return __probe_kernel_read(dst, src, size);
+}
diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c
index 3a6bc2a..014eb35 100644
--- a/arch/um/os-Linux/aio.c
+++ b/arch/um/os-Linux/aio.c
@@ -104,8 +104,7 @@
 	struct io_event event;
 	int err, n, reply_fd;
 
-	signal(SIGWINCH, SIG_IGN);
-
+	os_fix_helper_signals();
 	while (1) {
 		n = io_getevents(ctx, 1, 1, &event, NULL);
 		if (n < 0) {
@@ -173,7 +172,7 @@
 	struct aio_thread_reply reply;
 	int err;
 
-	signal(SIGWINCH, SIG_IGN);
+	os_fix_helper_signals();
 	while (1) {
 		err = read(aio_req_fd_r, &req, sizeof(req));
 		if (err != sizeof(req)) {
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index c17bd6f..07a7501 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -266,6 +266,15 @@
 	return n;
 }
 
+int os_sync_file(int fd)
+{
+	int n = fsync(fd);
+
+	if (n < 0)
+		return -errno;
+	return n;
+}
+
 int os_file_size(const char *file, unsigned long long *size_out)
 {
 	struct uml_stat buf;
diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index 749c96d..e1704ff 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -123,6 +123,8 @@
 
 	setup_env_path();
 
+	setsid();
+
 	new_argv = malloc((argc + 1) * sizeof(char *));
 	if (new_argv == NULL) {
 		perror("Mallocing argv");
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index b8f34c9..33496fe 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -4,6 +4,7 @@
  */
 
 #include <stdio.h>
+#include <stdlib.h>
 #include <unistd.h>
 #include <errno.h>
 #include <signal.h>
@@ -232,6 +233,57 @@
 	return ok;
 }
 
+static int os_page_mincore(void *addr)
+{
+	char vec[2];
+	int ret;
+
+	ret = mincore(addr, UM_KERN_PAGE_SIZE, vec);
+	if (ret < 0) {
+		if (errno == ENOMEM || errno == EINVAL)
+			return 0;
+		else
+			return -errno;
+	}
+
+	return vec[0] & 1;
+}
+
+int os_mincore(void *addr, unsigned long len)
+{
+	char *vec;
+	int ret, i;
+
+	if (len <= UM_KERN_PAGE_SIZE)
+		return os_page_mincore(addr);
+
+	vec = calloc(1, (len + UM_KERN_PAGE_SIZE - 1) / UM_KERN_PAGE_SIZE);
+	if (!vec)
+		return -ENOMEM;
+
+	ret = mincore(addr, UM_KERN_PAGE_SIZE, vec);
+	if (ret < 0) {
+		if (errno == ENOMEM || errno == EINVAL)
+			ret = 0;
+		else
+			ret = -errno;
+
+		goto out;
+	}
+
+	for (i = 0; i < ((len + UM_KERN_PAGE_SIZE - 1) / UM_KERN_PAGE_SIZE); i++) {
+		if (!(vec[i] & 1)) {
+			ret = 0;
+			goto out;
+		}
+	}
+
+	ret = 1;
+out:
+	free(vec);
+	return ret;
+}
+
 void init_new_thread_signals(void)
 {
 	set_handler(SIGSEGV);
@@ -242,5 +294,4 @@
 	signal(SIGHUP, SIG_IGN);
 	set_handler(SIGIO);
 	signal(SIGWINCH, SIG_IGN);
-	signal(SIGTERM, SIG_DFL);
 }
diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c
index 8b61cc0..46e762f 100644
--- a/arch/um/os-Linux/sigio.c
+++ b/arch/um/os-Linux/sigio.c
@@ -55,7 +55,7 @@
 	int i, n, respond_fd;
 	char c;
 
-	signal(SIGWINCH, SIG_IGN);
+	os_fix_helper_signals();
 	fds = &current_poll;
 	while (1) {
 		n = poll(fds->poll, fds->used, -1);
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index 492ef5e..faee55e 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -94,6 +94,16 @@
 			exit(127);
 }
 
+/*
+ * UML helper threads must not handle SIGWINCH/INT/TERM
+ */
+void os_fix_helper_signals(void)
+{
+	signal(SIGWINCH, SIG_IGN);
+	signal(SIGINT, SIG_DFL);
+	signal(SIGTERM, SIG_DFL);
+}
+
 void os_dump_core(void)
 {
 	int pid;
diff --git a/arch/x86/include/asm/dma-contiguous.h b/arch/x86/include/asm/dma-contiguous.h
index c092416..b4b38ba 100644
--- a/arch/x86/include/asm/dma-contiguous.h
+++ b/arch/x86/include/asm/dma-contiguous.h
@@ -4,7 +4,6 @@
 #ifdef __KERNEL__
 
 #include <linux/types.h>
-#include <asm-generic/dma-contiguous.h>
 
 static inline void
 dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { }
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 3a16c14..64507f3 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -3,18 +3,23 @@
 
 #ifdef __KERNEL__
 
+#include <linux/stringify.h>
 #include <linux/types.h>
 #include <asm/nops.h>
 #include <asm/asm.h>
 
 #define JUMP_LABEL_NOP_SIZE 5
 
-#define STATIC_KEY_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
+#ifdef CONFIG_X86_64
+# define STATIC_KEY_INIT_NOP P6_NOP5_ATOMIC
+#else
+# define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC
+#endif
 
 static __always_inline bool arch_static_branch(struct static_key *key)
 {
 	asm goto("1:"
-		STATIC_KEY_INITIAL_NOP
+		".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
 		".pushsection __jump_table,  \"aw\" \n\t"
 		_ASM_ALIGN "\n\t"
 		_ASM_PTR "1b, %l[l_yes], %c0 \n\t"
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 69eb2fa..376dc78 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -52,8 +52,7 @@
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
-void __init early_init_dt_setup_initrd_arch(unsigned long start,
-					    unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
 {
 	initrd_start = (unsigned long)__va(start);
 	initrd_end = (unsigned long)__va(end);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 2cfbc3a..f0dcb0c 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1176,6 +1176,9 @@
 #else /* ! CONFIG_DYNAMIC_FTRACE */
 
 ENTRY(mcount)
+	cmpl $__PAGE_OFFSET, %esp
+	jb ftrace_stub		/* Paging not enabled yet? */
+
 	cmpl $0, function_trace_stop
 	jne  ftrace_stub
 
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 460f5d9..ee11b7d 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -24,18 +24,57 @@
 	} __attribute__((packed));
 };
 
+static void bug_at(unsigned char *ip, int line)
+{
+	/*
+	 * The location is not an op that we were expecting.
+	 * Something went wrong. Crash the box, as something could be
+	 * corrupting the kernel.
+	 */
+	pr_warning("Unexpected op at %pS [%p] (%02x %02x %02x %02x %02x) %s:%d\n",
+	       ip, ip, ip[0], ip[1], ip[2], ip[3], ip[4], __FILE__, line);
+	BUG();
+}
+
 static void __jump_label_transform(struct jump_entry *entry,
 				   enum jump_label_type type,
-				   void *(*poker)(void *, const void *, size_t))
+				   void *(*poker)(void *, const void *, size_t),
+				   int init)
 {
 	union jump_code_union code;
+	const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
 
 	if (type == JUMP_LABEL_ENABLE) {
+		/*
+		 * We are enabling this jump label. If it is not a nop
+		 * then something must have gone wrong.
+		 */
+		if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
+			bug_at((void *)entry->code, __LINE__);
+
 		code.jump = 0xe9;
 		code.offset = entry->target -
 				(entry->code + JUMP_LABEL_NOP_SIZE);
-	} else
+	} else {
+		/*
+		 * We are disabling this jump label. If it is not what
+		 * we think it is, then something must have gone wrong.
+		 * If this is the first initialization call, then we
+		 * are converting the default nop to the ideal nop.
+		 */
+		if (init) {
+			const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
+			if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0))
+				bug_at((void *)entry->code, __LINE__);
+		} else {
+			code.jump = 0xe9;
+			code.offset = entry->target -
+				(entry->code + JUMP_LABEL_NOP_SIZE);
+			if (unlikely(memcmp((void *)entry->code, &code, 5) != 0))
+				bug_at((void *)entry->code, __LINE__);
+		}
 		memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
+	}
 
 	/*
 	 * Make text_poke_bp() a default fallback poker.
@@ -57,15 +96,38 @@
 {
 	get_online_cpus();
 	mutex_lock(&text_mutex);
-	__jump_label_transform(entry, type, NULL);
+	__jump_label_transform(entry, type, NULL, 0);
 	mutex_unlock(&text_mutex);
 	put_online_cpus();
 }
 
+static enum {
+	JL_STATE_START,
+	JL_STATE_NO_UPDATE,
+	JL_STATE_UPDATE,
+} jlstate __initdata_or_module = JL_STATE_START;
+
 __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry,
 				      enum jump_label_type type)
 {
-	__jump_label_transform(entry, type, text_poke_early);
+	/*
+	 * This function is called at boot up and when modules are
+	 * first loaded. Check if the default nop, the one that is
+	 * inserted at compile time, is the ideal nop. If it is, then
+	 * we do not need to update the nop, and we can leave it as is.
+	 * If it is not, then we need to update the nop to the ideal nop.
+	 */
+	if (jlstate == JL_STATE_START) {
+		const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
+		const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
+
+		if (memcmp(ideal_nop, default_nop, 5) != 0)
+			jlstate = JL_STATE_UPDATE;
+		else
+			jlstate = JL_STATE_NO_UPDATE;
+	}
+	if (jlstate == JL_STATE_UPDATE)
+		__jump_label_transform(entry, type, text_poke_early, 1);
 }
 
 #endif
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 6a22c19..bdf8532 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -7,8 +7,7 @@
  * kernel and insert a module (lg.ko) which allows us to run other Linux
  * kernels the same way we'd run processes.  We call the first kernel the Host,
  * and the others the Guests.  The program which sets up and configures Guests
- * (such as the example in Documentation/virtual/lguest/lguest.c) is called the
- * Launcher.
+ * (such as the example in tools/lguest/lguest.c) is called the Launcher.
  *
  * Secondly, we only run specially modified Guests, not normal kernels: setting
  * CONFIG_LGUEST_GUEST to "y" compiles this file into the kernel so it knows
@@ -1057,6 +1056,12 @@
 }
 
 /* Let's just say, I wouldn't do debugging under a Guest. */
+static unsigned long lguest_get_debugreg(int regno)
+{
+	/* FIXME: Implement */
+	return 0;
+}
+
 static void lguest_set_debugreg(int regno, unsigned long value)
 {
 	/* FIXME: Implement */
@@ -1304,6 +1309,7 @@
 	pv_cpu_ops.load_tr_desc = lguest_load_tr_desc;
 	pv_cpu_ops.set_ldt = lguest_set_ldt;
 	pv_cpu_ops.load_tls = lguest_load_tls;
+	pv_cpu_ops.get_debugreg = lguest_get_debugreg;
 	pv_cpu_ops.set_debugreg = lguest_set_debugreg;
 	pv_cpu_ops.clts = lguest_clts;
 	pv_cpu_ops.read_cr0 = lguest_read_cr0;
diff --git a/arch/x86/um/os-Linux/prctl.c b/arch/x86/um/os-Linux/prctl.c
index 9d34edd..96eb2bd 100644
--- a/arch/x86/um/os-Linux/prctl.c
+++ b/arch/x86/um/os-Linux/prctl.c
@@ -4,7 +4,7 @@
  */
 
 #include <sys/ptrace.h>
-#include <linux/ptrace.h>
+#include <asm/ptrace.h>
 
 int os_arch_prctl(int pid, int code, unsigned long *addr)
 {
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 42a8bba..101012b 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -170,8 +170,7 @@
 
 __tagtable(BP_TAG_FDT, parse_tag_fdt);
 
-void __init early_init_dt_setup_initrd_arch(unsigned long start,
-		unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
 {
 	initrd_start = (void *)__va(start);
 	initrd_end = (void *)__va(end);
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
index 6c9cdaa..99802d6f 100644
--- a/drivers/base/dma-contiguous.c
+++ b/drivers/base/dma-contiguous.c
@@ -96,7 +96,7 @@
 #endif
 
 /**
- * dma_contiguous_reserve() - reserve area for contiguous memory handling
+ * dma_contiguous_reserve() - reserve area(s) for contiguous memory handling
  * @limit: End address of the reserved memory (optional, 0 for any).
  *
  * This function reserves memory from early allocator. It should be
@@ -124,22 +124,29 @@
 #endif
 	}
 
-	if (selected_size) {
+	if (selected_size && !dma_contiguous_default_area) {
 		pr_debug("%s: reserving %ld MiB for global area\n", __func__,
 			 (unsigned long)selected_size / SZ_1M);
 
-		dma_declare_contiguous(NULL, selected_size, 0, limit);
+		dma_contiguous_reserve_area(selected_size, 0, limit,
+					    &dma_contiguous_default_area);
 	}
 };
 
 static DEFINE_MUTEX(cma_mutex);
 
-static int __init cma_activate_area(unsigned long base_pfn, unsigned long count)
+static int __init cma_activate_area(struct cma *cma)
 {
-	unsigned long pfn = base_pfn;
-	unsigned i = count >> pageblock_order;
+	int bitmap_size = BITS_TO_LONGS(cma->count) * sizeof(long);
+	unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
+	unsigned i = cma->count >> pageblock_order;
 	struct zone *zone;
 
+	cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+
+	if (!cma->bitmap)
+		return -ENOMEM;
+
 	WARN_ON_ONCE(!pfn_valid(pfn));
 	zone = page_zone(pfn_to_page(pfn));
 
@@ -153,92 +160,53 @@
 		}
 		init_cma_reserved_pageblock(pfn_to_page(base_pfn));
 	} while (--i);
+
 	return 0;
 }
 
-static struct cma * __init cma_create_area(unsigned long base_pfn,
-				     unsigned long count)
-{
-	int bitmap_size = BITS_TO_LONGS(count) * sizeof(long);
-	struct cma *cma;
-	int ret = -ENOMEM;
-
-	pr_debug("%s(base %08lx, count %lx)\n", __func__, base_pfn, count);
-
-	cma = kmalloc(sizeof *cma, GFP_KERNEL);
-	if (!cma)
-		return ERR_PTR(-ENOMEM);
-
-	cma->base_pfn = base_pfn;
-	cma->count = count;
-	cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
-
-	if (!cma->bitmap)
-		goto no_mem;
-
-	ret = cma_activate_area(base_pfn, count);
-	if (ret)
-		goto error;
-
-	pr_debug("%s: returned %p\n", __func__, (void *)cma);
-	return cma;
-
-error:
-	kfree(cma->bitmap);
-no_mem:
-	kfree(cma);
-	return ERR_PTR(ret);
-}
-
-static struct cma_reserved {
-	phys_addr_t start;
-	unsigned long size;
-	struct device *dev;
-} cma_reserved[MAX_CMA_AREAS] __initdata;
-static unsigned cma_reserved_count __initdata;
+static struct cma cma_areas[MAX_CMA_AREAS];
+static unsigned cma_area_count;
 
 static int __init cma_init_reserved_areas(void)
 {
-	struct cma_reserved *r = cma_reserved;
-	unsigned i = cma_reserved_count;
+	int i;
 
-	pr_debug("%s()\n", __func__);
-
-	for (; i; --i, ++r) {
-		struct cma *cma;
-		cma = cma_create_area(PFN_DOWN(r->start),
-				      r->size >> PAGE_SHIFT);
-		if (!IS_ERR(cma))
-			dev_set_cma_area(r->dev, cma);
+	for (i = 0; i < cma_area_count; i++) {
+		int ret = cma_activate_area(&cma_areas[i]);
+		if (ret)
+			return ret;
 	}
+
 	return 0;
 }
 core_initcall(cma_init_reserved_areas);
 
 /**
- * dma_declare_contiguous() - reserve area for contiguous memory handling
- *			      for particular device
- * @dev:   Pointer to device structure.
- * @size:  Size of the reserved memory.
- * @base:  Start address of the reserved memory (optional, 0 for any).
+ * dma_contiguous_reserve_area() - reserve custom contiguous area
+ * @size: Size of the reserved area (in bytes),
+ * @base: Base address of the reserved area optional, use 0 for any
  * @limit: End address of the reserved memory (optional, 0 for any).
+ * @res_cma: Pointer to store the created cma region.
  *
- * This function reserves memory for specified device. It should be
- * called by board specific code when early allocator (memblock or bootmem)
- * is still activate.
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the early allocator (memblock or bootmem)
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory. This function allows to create custom reserved areas for specific
+ * devices.
  */
-int __init dma_declare_contiguous(struct device *dev, phys_addr_t size,
-				  phys_addr_t base, phys_addr_t limit)
+int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
+				       phys_addr_t limit, struct cma **res_cma)
 {
-	struct cma_reserved *r = &cma_reserved[cma_reserved_count];
+	struct cma *cma = &cma_areas[cma_area_count];
 	phys_addr_t alignment;
+	int ret = 0;
 
 	pr_debug("%s(size %lx, base %08lx, limit %08lx)\n", __func__,
 		 (unsigned long)size, (unsigned long)base,
 		 (unsigned long)limit);
 
 	/* Sanity checks */
-	if (cma_reserved_count == ARRAY_SIZE(cma_reserved)) {
+	if (cma_area_count == ARRAY_SIZE(cma_areas)) {
 		pr_err("Not enough slots for CMA reserved regions!\n");
 		return -ENOSPC;
 	}
@@ -256,7 +224,7 @@
 	if (base) {
 		if (memblock_is_region_reserved(base, size) ||
 		    memblock_reserve(base, size) < 0) {
-			base = -EBUSY;
+			ret = -EBUSY;
 			goto err;
 		}
 	} else {
@@ -266,7 +234,7 @@
 		 */
 		phys_addr_t addr = __memblock_alloc_base(size, alignment, limit);
 		if (!addr) {
-			base = -ENOMEM;
+			ret = -ENOMEM;
 			goto err;
 		} else {
 			base = addr;
@@ -277,10 +245,11 @@
 	 * Each reserved area must be initialised later, when more kernel
 	 * subsystems (like slab allocator) are available.
 	 */
-	r->start = base;
-	r->size = size;
-	r->dev = dev;
-	cma_reserved_count++;
+	cma->base_pfn = PFN_DOWN(base);
+	cma->count = size >> PAGE_SHIFT;
+	*res_cma = cma;
+	cma_area_count++;
+
 	pr_info("CMA: reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M,
 		(unsigned long)base);
 
@@ -289,7 +258,7 @@
 	return 0;
 err:
 	pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
-	return base;
+	return ret;
 }
 
 /**
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 191cd17..39c51cc 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1561,11 +1561,12 @@
 		obj_request, obj_request->img_request, obj_request->result,
 		xferred, length);
 	/*
-	 * ENOENT means a hole in the image.  We zero-fill the
-	 * entire length of the request.  A short read also implies
-	 * zero-fill to the end of the request.  Either way we
-	 * update the xferred count to indicate the whole request
-	 * was satisfied.
+	 * ENOENT means a hole in the image.  We zero-fill the entire
+	 * length of the request.  A short read also implies zero-fill
+	 * to the end of the request.  An error requires the whole
+	 * length of the request to be reported finished with an error
+	 * to the block layer.  In each case we update the xferred
+	 * count to indicate the whole request was satisfied.
 	 */
 	rbd_assert(obj_request->type != OBJ_REQUEST_NODATA);
 	if (obj_request->result == -ENOENT) {
@@ -1574,14 +1575,13 @@
 		else
 			zero_pages(obj_request->pages, 0, length);
 		obj_request->result = 0;
-		obj_request->xferred = length;
 	} else if (xferred < length && !obj_request->result) {
 		if (obj_request->type == OBJ_REQUEST_BIO)
 			zero_bio_chain(obj_request->bio_list, xferred);
 		else
 			zero_pages(obj_request->pages, xferred, length);
-		obj_request->xferred = length;
 	}
+	obj_request->xferred = length;
 	obj_request_done_set(obj_request);
 }
 
@@ -2167,9 +2167,9 @@
 	struct rbd_obj_request *obj_request = NULL;
 	struct rbd_obj_request *next_obj_request;
 	bool write_request = img_request_write_test(img_request);
-	struct bio *bio_list = 0;
+	struct bio *bio_list = NULL;
 	unsigned int bio_offset = 0;
-	struct page **pages = 0;
+	struct page **pages = NULL;
 	u64 img_offset;
 	u64 resid;
 	u16 opcode;
@@ -2207,6 +2207,11 @@
 		rbd_segment_name_free(object_name);
 		if (!obj_request)
 			goto out_unwind;
+		/*
+		 * set obj_request->img_request before creating the
+		 * osd_request so that it gets the right snapc
+		 */
+		rbd_img_obj_request_add(img_request, obj_request);
 
 		if (type == OBJ_REQUEST_BIO) {
 			unsigned int clone_size;
@@ -2248,11 +2253,6 @@
 					obj_request->pages, length,
 					offset & ~PAGE_MASK, false, false);
 
-		/*
-		 * set obj_request->img_request before formatting
-		 * the osd_request so that it gets the right snapc
-		 */
-		rbd_img_obj_request_add(img_request, obj_request);
 		if (write_request)
 			rbd_osd_req_format_write(obj_request);
 		else
@@ -3706,12 +3706,14 @@
 	if (ret < sizeof (size_buf))
 		return -ERANGE;
 
-	if (order)
+	if (order) {
 		*order = size_buf.order;
+		dout("  order %u", (unsigned int)*order);
+	}
 	*snap_size = le64_to_cpu(size_buf.size);
 
-	dout("  snap_id 0x%016llx order = %u, snap_size = %llu\n",
-		(unsigned long long)snap_id, (unsigned int)*order,
+	dout("  snap_id 0x%016llx snap_size = %llu\n",
+		(unsigned long long)snap_id,
 		(unsigned long long)*snap_size);
 
 	return 0;
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index fc45567..b79cf3e 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1529,18 +1529,22 @@
 {
 	struct port_buffer *buf;
 
+	spin_lock_irq(&port->inbuf_lock);
 	/* Remove unused data this port might have received. */
 	discard_port_data(port);
 
-	reclaim_consumed_buffers(port);
-
 	/* Remove buffers we queued up for the Host to send us data in. */
 	while ((buf = virtqueue_detach_unused_buf(port->in_vq)))
 		free_buf(buf, true);
+	spin_unlock_irq(&port->inbuf_lock);
+
+	spin_lock_irq(&port->outvq_lock);
+	reclaim_consumed_buffers(port);
 
 	/* Free pending buffers from the out-queue. */
 	while ((buf = virtqueue_detach_unused_buf(port->out_vq)))
 		free_buf(buf, true);
+	spin_unlock_irq(&port->outvq_lock);
 }
 
 /*
@@ -1554,6 +1558,7 @@
 	list_del(&port->list);
 	spin_unlock_irq(&port->portdev->ports_lock);
 
+	spin_lock_irq(&port->inbuf_lock);
 	if (port->guest_connected) {
 		/* Let the app know the port is going down. */
 		send_sigio_to_port(port);
@@ -1564,6 +1569,7 @@
 
 		wake_up_interruptible(&port->waitqueue);
 	}
+	spin_unlock_irq(&port->inbuf_lock);
 
 	if (is_console_port(port)) {
 		spin_lock_irq(&pdrvdata_lock);
@@ -1585,9 +1591,8 @@
 	device_destroy(pdrvdata.class, port->dev->devt);
 	cdev_del(port->cdev);
 
-	kfree(port->name);
-
 	debugfs_remove(port->debugfs_file);
+	kfree(port->name);
 
 	/*
 	 * Locks around here are not necessary - a port can't be
@@ -1681,7 +1686,9 @@
 		 * If the guest is connected, it'll be interested in
 		 * knowing the host connection state changed.
 		 */
+		spin_lock_irq(&port->inbuf_lock);
 		send_sigio_to_port(port);
+		spin_unlock_irq(&port->inbuf_lock);
 		break;
 	case VIRTIO_CONSOLE_PORT_NAME:
 		/*
@@ -1801,13 +1808,13 @@
 	if (!port->guest_connected && !is_rproc_serial(port->portdev->vdev))
 		discard_port_data(port);
 
+	/* Send a SIGIO indicating new data in case the process asked for it */
+	send_sigio_to_port(port);
+
 	spin_unlock_irqrestore(&port->inbuf_lock, flags);
 
 	wake_up_interruptible(&port->waitqueue);
 
-	/* Send a SIGIO indicating new data in case the process asked for it */
-	send_sigio_to_port(port);
-
 	if (is_console_port(port) && hvc_poll(port->cons.hvc))
 		hvc_kick();
 }
@@ -2241,10 +2248,8 @@
 	}
 
 	pdrvdata.debugfs_dir = debugfs_create_dir("virtio-ports", NULL);
-	if (!pdrvdata.debugfs_dir) {
-		pr_warning("Error %ld creating debugfs dir for virtio-ports\n",
-			   PTR_ERR(pdrvdata.debugfs_dir));
-	}
+	if (!pdrvdata.debugfs_dir)
+		pr_warning("Error creating debugfs dir for virtio-ports\n");
 	INIT_LIST_HEAD(&pdrvdata.consoles);
 	INIT_LIST_HEAD(&pdrvdata.portdevs);
 
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index 51380d6..279407a 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -27,7 +27,7 @@
 	bool "DebugFS representation of clock tree"
 	select DEBUG_FS
 	---help---
-	  Creates a directory hierchy in debugfs for visualizing the clk
+	  Creates a directory hierarchy in debugfs for visualizing the clk
 	  tree structure.  Each directory contains read-only members
 	  that export information specific to that clk node: clk_rate,
 	  clk_flags, clk_prepare_count, clk_enable_count &
@@ -64,6 +64,12 @@
 	  This driver supports Silicon Labs 5351A/B/C programmable clock
 	  generators.
 
+config COMMON_CLK_S2MPS11
+	tristate "Clock driver for S2MPS11 MFD"
+	depends on MFD_SEC_CORE
+	---help---
+	  This driver supports S2MPS11 crystal oscillator clock.
+
 config CLK_TWL6040
 	tristate "External McPDM functional clock from twl6040"
 	depends on TWL6040_CORE
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index 4038c2b..7b11106 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -40,5 +40,6 @@
 obj-$(CONFIG_COMMON_CLK_WM831X) += clk-wm831x.o
 obj-$(CONFIG_COMMON_CLK_MAX77686) += clk-max77686.o
 obj-$(CONFIG_COMMON_CLK_SI5351) += clk-si5351.o
+obj-$(CONFIG_COMMON_CLK_S2MPS11) += clk-s2mps11.o
 obj-$(CONFIG_CLK_TWL6040)	+= clk-twl6040.o
 obj-$(CONFIG_CLK_PPC_CORENET)	+= clk-ppc-corenet.o
diff --git a/drivers/clk/clk-bcm2835.c b/drivers/clk/clk-bcm2835.c
index 792bc57..5fb4ff5 100644
--- a/drivers/clk/clk-bcm2835.c
+++ b/drivers/clk/clk-bcm2835.c
@@ -23,7 +23,7 @@
 #include <linux/clk-provider.h>
 #include <linux/of.h>
 
-static const __initconst struct of_device_id clk_match[] = {
+static const struct of_device_id clk_match[] __initconst = {
 	{ .compatible = "fixed-clock", .data = of_fixed_clk_setup, },
 	{ }
 };
diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c
index 6d55eb2..8d3009e 100644
--- a/drivers/clk/clk-divider.c
+++ b/drivers/clk/clk-divider.c
@@ -104,7 +104,7 @@
 	struct clk_divider *divider = to_clk_divider(hw);
 	unsigned int div, val;
 
-	val = readl(divider->reg) >> divider->shift;
+	val = clk_readl(divider->reg) >> divider->shift;
 	val &= div_mask(divider);
 
 	div = _get_div(divider, val);
@@ -230,11 +230,11 @@
 	if (divider->flags & CLK_DIVIDER_HIWORD_MASK) {
 		val = div_mask(divider) << (divider->shift + 16);
 	} else {
-		val = readl(divider->reg);
+		val = clk_readl(divider->reg);
 		val &= ~(div_mask(divider) << divider->shift);
 	}
 	val |= value << divider->shift;
-	writel(val, divider->reg);
+	clk_writel(val, divider->reg);
 
 	if (divider->lock)
 		spin_unlock_irqrestore(divider->lock, flags);
@@ -317,6 +317,7 @@
 	return _register_divider(dev, name, parent_name, flags, reg, shift,
 			width, clk_divider_flags, NULL, lock);
 }
+EXPORT_SYMBOL_GPL(clk_register_divider);
 
 /**
  * clk_register_divider_table - register a table based divider clock with
@@ -341,3 +342,4 @@
 	return _register_divider(dev, name, parent_name, flags, reg, shift,
 			width, clk_divider_flags, table, lock);
 }
+EXPORT_SYMBOL_GPL(clk_register_divider_table);
diff --git a/drivers/clk/clk-fixed-factor.c b/drivers/clk/clk-fixed-factor.c
index 9ff7d51..0e1d89b 100644
--- a/drivers/clk/clk-fixed-factor.c
+++ b/drivers/clk/clk-fixed-factor.c
@@ -97,6 +97,8 @@
 
 	return clk;
 }
+EXPORT_SYMBOL_GPL(clk_register_fixed_factor);
+
 #ifdef CONFIG_OF
 /**
  * of_fixed_factor_clk_setup() - Setup function for simple fixed factor clock
diff --git a/drivers/clk/clk-fixed-rate.c b/drivers/clk/clk-fixed-rate.c
index dc58fbd..1ed591a 100644
--- a/drivers/clk/clk-fixed-rate.c
+++ b/drivers/clk/clk-fixed-rate.c
@@ -80,6 +80,7 @@
 
 	return clk;
 }
+EXPORT_SYMBOL_GPL(clk_register_fixed_rate);
 
 #ifdef CONFIG_OF
 /**
diff --git a/drivers/clk/clk-gate.c b/drivers/clk/clk-gate.c
index 790306e..4a58c55 100644
--- a/drivers/clk/clk-gate.c
+++ b/drivers/clk/clk-gate.c
@@ -58,7 +58,7 @@
 		if (set)
 			reg |= BIT(gate->bit_idx);
 	} else {
-		reg = readl(gate->reg);
+		reg = clk_readl(gate->reg);
 
 		if (set)
 			reg |= BIT(gate->bit_idx);
@@ -66,7 +66,7 @@
 			reg &= ~BIT(gate->bit_idx);
 	}
 
-	writel(reg, gate->reg);
+	clk_writel(reg, gate->reg);
 
 	if (gate->lock)
 		spin_unlock_irqrestore(gate->lock, flags);
@@ -89,7 +89,7 @@
 	u32 reg;
 	struct clk_gate *gate = to_clk_gate(hw);
 
-	reg = readl(gate->reg);
+	reg = clk_readl(gate->reg);
 
 	/* if a set bit disables this clk, flip it before masking */
 	if (gate->flags & CLK_GATE_SET_TO_DISABLE)
@@ -161,3 +161,4 @@
 
 	return clk;
 }
+EXPORT_SYMBOL_GPL(clk_register_gate);
diff --git a/drivers/clk/clk-mux.c b/drivers/clk/clk-mux.c
index 614444c..4f96ff3 100644
--- a/drivers/clk/clk-mux.c
+++ b/drivers/clk/clk-mux.c
@@ -42,7 +42,7 @@
 	 * OTOH, pmd_trace_clk_mux_ck uses a separate bit for each clock, so
 	 * val = 0x4 really means "bit 2, index starts at bit 0"
 	 */
-	val = readl(mux->reg) >> mux->shift;
+	val = clk_readl(mux->reg) >> mux->shift;
 	val &= mux->mask;
 
 	if (mux->table) {
@@ -89,11 +89,11 @@
 	if (mux->flags & CLK_MUX_HIWORD_MASK) {
 		val = mux->mask << (mux->shift + 16);
 	} else {
-		val = readl(mux->reg);
+		val = clk_readl(mux->reg);
 		val &= ~(mux->mask << mux->shift);
 	}
 	val |= index << mux->shift;
-	writel(val, mux->reg);
+	clk_writel(val, mux->reg);
 
 	if (mux->lock)
 		spin_unlock_irqrestore(mux->lock, flags);
@@ -104,9 +104,15 @@
 const struct clk_ops clk_mux_ops = {
 	.get_parent = clk_mux_get_parent,
 	.set_parent = clk_mux_set_parent,
+	.determine_rate = __clk_mux_determine_rate,
 };
 EXPORT_SYMBOL_GPL(clk_mux_ops);
 
+const struct clk_ops clk_mux_ro_ops = {
+	.get_parent = clk_mux_get_parent,
+};
+EXPORT_SYMBOL_GPL(clk_mux_ro_ops);
+
 struct clk *clk_register_mux_table(struct device *dev, const char *name,
 		const char **parent_names, u8 num_parents, unsigned long flags,
 		void __iomem *reg, u8 shift, u32 mask,
@@ -133,7 +139,10 @@
 	}
 
 	init.name = name;
-	init.ops = &clk_mux_ops;
+	if (clk_mux_flags & CLK_MUX_READ_ONLY)
+		init.ops = &clk_mux_ro_ops;
+	else
+		init.ops = &clk_mux_ops;
 	init.flags = flags | CLK_IS_BASIC;
 	init.parent_names = parent_names;
 	init.num_parents = num_parents;
@@ -154,6 +163,7 @@
 
 	return clk;
 }
+EXPORT_SYMBOL_GPL(clk_register_mux_table);
 
 struct clk *clk_register_mux(struct device *dev, const char *name,
 		const char **parent_names, u8 num_parents, unsigned long flags,
@@ -166,3 +176,4 @@
 				      flags, reg, shift, mask, clk_mux_flags,
 				      NULL, lock);
 }
+EXPORT_SYMBOL_GPL(clk_register_mux);
diff --git a/drivers/clk/clk-nomadik.c b/drivers/clk/clk-nomadik.c
index 6d819a3..51410c2 100644
--- a/drivers/clk/clk-nomadik.c
+++ b/drivers/clk/clk-nomadik.c
@@ -479,12 +479,12 @@
 		of_clk_add_provider(np, of_clk_src_simple_get, clk);
 }
 
-static const __initconst struct of_device_id nomadik_src_match[] = {
+static const struct of_device_id nomadik_src_match[] __initconst = {
 	{ .compatible = "stericsson,nomadik-src" },
 	{ /* sentinel */ }
 };
 
-static const __initconst struct of_device_id nomadik_src_clk_match[] = {
+static const struct of_device_id nomadik_src_clk_match[] __initconst = {
 	{
 		.compatible = "fixed-clock",
 		.data = of_fixed_clk_setup,
diff --git a/drivers/clk/clk-prima2.c b/drivers/clk/clk-prima2.c
index 643ca65..5ab95f1 100644
--- a/drivers/clk/clk-prima2.c
+++ b/drivers/clk/clk-prima2.c
@@ -1034,7 +1034,7 @@
 	usb0,  usb1,  maxclk,
 };
 
-static __initdata struct clk_hw* prima2_clk_hw_array[maxclk] = {
+static struct clk_hw *prima2_clk_hw_array[maxclk] __initdata = {
 	NULL, /* dummy */
 	NULL,
 	&clk_pll1.hw,
diff --git a/drivers/clk/clk-s2mps11.c b/drivers/clk/clk-s2mps11.c
new file mode 100644
index 0000000..7be41e6
--- /dev/null
+++ b/drivers/clk/clk-s2mps11.c
@@ -0,0 +1,273 @@
+/*
+ * clk-s2mps11.c - Clock driver for S2MPS11.
+ *
+ * Copyright (C) 2013 Samsung Electornics
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/of.h>
+#include <linux/clkdev.h>
+#include <linux/regmap.h>
+#include <linux/clk-provider.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/samsung/s2mps11.h>
+#include <linux/mfd/samsung/core.h>
+
+#define s2mps11_name(a) (a->hw.init->name)
+
+static struct clk **clk_table;
+static struct clk_onecell_data clk_data;
+
+enum {
+	S2MPS11_CLK_AP = 0,
+	S2MPS11_CLK_CP,
+	S2MPS11_CLK_BT,
+	S2MPS11_CLKS_NUM,
+};
+
+struct s2mps11_clk {
+	struct sec_pmic_dev *iodev;
+	struct clk_hw hw;
+	struct clk *clk;
+	struct clk_lookup *lookup;
+	u32 mask;
+	bool enabled;
+};
+
+static struct s2mps11_clk *to_s2mps11_clk(struct clk_hw *hw)
+{
+	return container_of(hw, struct s2mps11_clk, hw);
+}
+
+static int s2mps11_clk_prepare(struct clk_hw *hw)
+{
+	struct s2mps11_clk *s2mps11 = to_s2mps11_clk(hw);
+	int ret;
+
+	ret = regmap_update_bits(s2mps11->iodev->regmap,
+				S2MPS11_REG_RTC_CTRL,
+				 s2mps11->mask, s2mps11->mask);
+	if (!ret)
+		s2mps11->enabled = true;
+
+	return ret;
+}
+
+static void s2mps11_clk_unprepare(struct clk_hw *hw)
+{
+	struct s2mps11_clk *s2mps11 = to_s2mps11_clk(hw);
+	int ret;
+
+	ret = regmap_update_bits(s2mps11->iodev->regmap, S2MPS11_REG_RTC_CTRL,
+			   s2mps11->mask, ~s2mps11->mask);
+
+	if (!ret)
+		s2mps11->enabled = false;
+}
+
+static int s2mps11_clk_is_enabled(struct clk_hw *hw)
+{
+	struct s2mps11_clk *s2mps11 = to_s2mps11_clk(hw);
+
+	return s2mps11->enabled;
+}
+
+static unsigned long s2mps11_clk_recalc_rate(struct clk_hw *hw,
+					     unsigned long parent_rate)
+{
+	struct s2mps11_clk *s2mps11 = to_s2mps11_clk(hw);
+	if (s2mps11->enabled)
+		return 32768;
+	else
+		return 0;
+}
+
+static struct clk_ops s2mps11_clk_ops = {
+	.prepare	= s2mps11_clk_prepare,
+	.unprepare	= s2mps11_clk_unprepare,
+	.is_enabled	= s2mps11_clk_is_enabled,
+	.recalc_rate	= s2mps11_clk_recalc_rate,
+};
+
+static struct clk_init_data s2mps11_clks_init[S2MPS11_CLKS_NUM] = {
+	[S2MPS11_CLK_AP] = {
+		.name = "s2mps11_ap",
+		.ops = &s2mps11_clk_ops,
+		.flags = CLK_IS_ROOT,
+	},
+	[S2MPS11_CLK_CP] = {
+		.name = "s2mps11_cp",
+		.ops = &s2mps11_clk_ops,
+		.flags = CLK_IS_ROOT,
+	},
+	[S2MPS11_CLK_BT] = {
+		.name = "s2mps11_bt",
+		.ops = &s2mps11_clk_ops,
+		.flags = CLK_IS_ROOT,
+	},
+};
+
+static struct device_node *s2mps11_clk_parse_dt(struct platform_device *pdev)
+{
+	struct sec_pmic_dev *iodev = dev_get_drvdata(pdev->dev.parent);
+	struct device_node *clk_np;
+	int i;
+
+	if (!iodev->dev->of_node)
+		return NULL;
+
+	clk_np = of_find_node_by_name(iodev->dev->of_node, "clocks");
+	if (!clk_np) {
+		dev_err(&pdev->dev, "could not find clock sub-node\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	clk_table = devm_kzalloc(&pdev->dev, sizeof(struct clk *) *
+				 S2MPS11_CLKS_NUM, GFP_KERNEL);
+	if (!clk_table)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < S2MPS11_CLKS_NUM; i++)
+		of_property_read_string_index(clk_np, "clock-output-names", i,
+				&s2mps11_clks_init[i].name);
+
+	return clk_np;
+}
+
+static int s2mps11_clk_probe(struct platform_device *pdev)
+{
+	struct sec_pmic_dev *iodev = dev_get_drvdata(pdev->dev.parent);
+	struct s2mps11_clk *s2mps11_clks, *s2mps11_clk;
+	struct device_node *clk_np = NULL;
+	int i, ret = 0;
+	u32 val;
+
+	s2mps11_clks = devm_kzalloc(&pdev->dev, sizeof(*s2mps11_clk) *
+					S2MPS11_CLKS_NUM, GFP_KERNEL);
+	if (!s2mps11_clks)
+		return -ENOMEM;
+
+	s2mps11_clk = s2mps11_clks;
+
+	clk_np = s2mps11_clk_parse_dt(pdev);
+	if (IS_ERR(clk_np))
+		return PTR_ERR(clk_np);
+
+	for (i = 0; i < S2MPS11_CLKS_NUM; i++, s2mps11_clk++) {
+		s2mps11_clk->iodev = iodev;
+		s2mps11_clk->hw.init = &s2mps11_clks_init[i];
+		s2mps11_clk->mask = 1 << i;
+
+		ret = regmap_read(s2mps11_clk->iodev->regmap,
+				  S2MPS11_REG_RTC_CTRL, &val);
+		if (ret < 0)
+			goto err_reg;
+
+		s2mps11_clk->enabled = val & s2mps11_clk->mask;
+
+		s2mps11_clk->clk = devm_clk_register(&pdev->dev,
+							&s2mps11_clk->hw);
+		if (IS_ERR(s2mps11_clk->clk)) {
+			dev_err(&pdev->dev, "Fail to register : %s\n",
+						s2mps11_name(s2mps11_clk));
+			ret = PTR_ERR(s2mps11_clk->clk);
+			goto err_reg;
+		}
+
+		s2mps11_clk->lookup = devm_kzalloc(&pdev->dev,
+					sizeof(struct clk_lookup), GFP_KERNEL);
+		if (!s2mps11_clk->lookup) {
+			ret = -ENOMEM;
+			goto err_lup;
+		}
+
+		s2mps11_clk->lookup->con_id = s2mps11_name(s2mps11_clk);
+		s2mps11_clk->lookup->clk = s2mps11_clk->clk;
+
+		clkdev_add(s2mps11_clk->lookup);
+	}
+
+	if (clk_table) {
+		for (i = 0; i < S2MPS11_CLKS_NUM; i++)
+			clk_table[i] = s2mps11_clks[i].clk;
+
+		clk_data.clks = clk_table;
+		clk_data.clk_num = S2MPS11_CLKS_NUM;
+		of_clk_add_provider(clk_np, of_clk_src_onecell_get, &clk_data);
+	}
+
+	platform_set_drvdata(pdev, s2mps11_clks);
+
+	return ret;
+err_lup:
+	devm_clk_unregister(&pdev->dev, s2mps11_clk->clk);
+err_reg:
+	while (s2mps11_clk > s2mps11_clks) {
+		if (s2mps11_clk->lookup) {
+			clkdev_drop(s2mps11_clk->lookup);
+			devm_clk_unregister(&pdev->dev, s2mps11_clk->clk);
+		}
+		s2mps11_clk--;
+	}
+
+	return ret;
+}
+
+static int s2mps11_clk_remove(struct platform_device *pdev)
+{
+	struct s2mps11_clk *s2mps11_clks = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < S2MPS11_CLKS_NUM; i++)
+		clkdev_drop(s2mps11_clks[i].lookup);
+
+	return 0;
+}
+
+static const struct platform_device_id s2mps11_clk_id[] = {
+	{ "s2mps11-clk", 0},
+	{ },
+};
+MODULE_DEVICE_TABLE(platform, s2mps11_clk_id);
+
+static struct platform_driver s2mps11_clk_driver = {
+	.driver = {
+		.name  = "s2mps11-clk",
+		.owner = THIS_MODULE,
+	},
+	.probe = s2mps11_clk_probe,
+	.remove = s2mps11_clk_remove,
+	.id_table = s2mps11_clk_id,
+};
+
+static int __init s2mps11_clk_init(void)
+{
+	return platform_driver_register(&s2mps11_clk_driver);
+}
+subsys_initcall(s2mps11_clk_init);
+
+static void __init s2mps11_clk_cleanup(void)
+{
+	platform_driver_unregister(&s2mps11_clk_driver);
+}
+module_exit(s2mps11_clk_cleanup);
+
+MODULE_DESCRIPTION("S2MPS11 Clock Driver");
+MODULE_AUTHOR("Yadwinder Singh Brar <yadi.brar@samsung.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/clk/clk-u300.c b/drivers/clk/clk-u300.c
index 8774e05..3efbdd0 100644
--- a/drivers/clk/clk-u300.c
+++ b/drivers/clk/clk-u300.c
@@ -746,7 +746,7 @@
 	u16 clk_val;
 };
 
-struct u300_clock const __initconst u300_clk_lookup[] = {
+static struct u300_clock const u300_clk_lookup[] __initconst = {
 	{
 		.type = U300_CLK_TYPE_REST,
 		.id = 3,
@@ -1151,7 +1151,7 @@
 		of_clk_add_provider(np, of_clk_src_simple_get, clk);
 }
 
-static const __initconst struct of_device_id u300_clk_match[] = {
+static const struct of_device_id u300_clk_match[] __initconst = {
 	{
 		.compatible = "fixed-clock",
 		.data = of_fixed_clk_setup,
diff --git a/drivers/clk/clk-wm831x.c b/drivers/clk/clk-wm831x.c
index 1b3f8c9..805b4c3 100644
--- a/drivers/clk/clk-wm831x.c
+++ b/drivers/clk/clk-wm831x.c
@@ -31,7 +31,7 @@
 	bool xtal_ena;
 };
 
-static int wm831x_xtal_is_enabled(struct clk_hw *hw)
+static int wm831x_xtal_is_prepared(struct clk_hw *hw)
 {
 	struct wm831x_clk *clkdata = container_of(hw, struct wm831x_clk,
 						  xtal_hw);
@@ -52,7 +52,7 @@
 }
 
 static const struct clk_ops wm831x_xtal_ops = {
-	.is_enabled = wm831x_xtal_is_enabled,
+	.is_prepared = wm831x_xtal_is_prepared,
 	.recalc_rate = wm831x_xtal_recalc_rate,
 };
 
@@ -73,7 +73,7 @@
 	24576000,
 };
 
-static int wm831x_fll_is_enabled(struct clk_hw *hw)
+static int wm831x_fll_is_prepared(struct clk_hw *hw)
 {
 	struct wm831x_clk *clkdata = container_of(hw, struct wm831x_clk,
 						  fll_hw);
@@ -170,7 +170,7 @@
 	if (i == ARRAY_SIZE(wm831x_fll_auto_rates))
 		return -EINVAL;
 
-	if (wm831x_fll_is_enabled(hw))
+	if (wm831x_fll_is_prepared(hw))
 		return -EPERM;
 
 	return wm831x_set_bits(wm831x, WM831X_CLOCK_CONTROL_2,
@@ -220,7 +220,7 @@
 }
 
 static const struct clk_ops wm831x_fll_ops = {
-	.is_enabled = wm831x_fll_is_enabled,
+	.is_prepared = wm831x_fll_is_prepared,
 	.prepare = wm831x_fll_prepare,
 	.unprepare = wm831x_fll_unprepare,
 	.round_rate = wm831x_fll_round_rate,
@@ -237,7 +237,7 @@
 	.flags = CLK_SET_RATE_GATE,
 };
 
-static int wm831x_clkout_is_enabled(struct clk_hw *hw)
+static int wm831x_clkout_is_prepared(struct clk_hw *hw)
 {
 	struct wm831x_clk *clkdata = container_of(hw, struct wm831x_clk,
 						  clkout_hw);
@@ -335,7 +335,7 @@
 }
 
 static const struct clk_ops wm831x_clkout_ops = {
-	.is_enabled = wm831x_clkout_is_enabled,
+	.is_prepared = wm831x_clkout_is_prepared,
 	.prepare = wm831x_clkout_prepare,
 	.unprepare = wm831x_clkout_unprepare,
 	.get_parent = wm831x_clkout_get_parent,
@@ -360,6 +360,8 @@
 	if (!clkdata)
 		return -ENOMEM;
 
+	clkdata->wm831x = wm831x;
+
 	/* XTAL_ENA can only be set via OTP/InstantConfig so just read once */
 	ret = wm831x_reg_read(wm831x, WM831X_CLOCK_CONTROL_2);
 	if (ret < 0) {
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 54a191c..a004769 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -458,7 +458,6 @@
 			clk->ops->unprepare(clk->hw);
 	}
 }
-EXPORT_SYMBOL_GPL(__clk_get_flags);
 
 /* caller must hold prepare_lock */
 static void clk_disable_unused_subtree(struct clk *clk)
@@ -559,6 +558,19 @@
 	return !clk ? NULL : clk->parent;
 }
 
+struct clk *clk_get_parent_by_index(struct clk *clk, u8 index)
+{
+	if (!clk || index >= clk->num_parents)
+		return NULL;
+	else if (!clk->parents)
+		return __clk_lookup(clk->parent_names[index]);
+	else if (!clk->parents[index])
+		return clk->parents[index] =
+			__clk_lookup(clk->parent_names[index]);
+	else
+		return clk->parents[index];
+}
+
 unsigned int __clk_get_enable_count(struct clk *clk)
 {
 	return !clk ? 0 : clk->enable_count;
@@ -594,6 +606,7 @@
 {
 	return !clk ? 0 : clk->flags;
 }
+EXPORT_SYMBOL_GPL(__clk_get_flags);
 
 bool __clk_is_prepared(struct clk *clk)
 {
@@ -679,6 +692,55 @@
 	return NULL;
 }
 
+/*
+ * Helper for finding best parent to provide a given frequency. This can be used
+ * directly as a determine_rate callback (e.g. for a mux), or from a more
+ * complex clock that may combine a mux with other operations.
+ */
+long __clk_mux_determine_rate(struct clk_hw *hw, unsigned long rate,
+			      unsigned long *best_parent_rate,
+			      struct clk **best_parent_p)
+{
+	struct clk *clk = hw->clk, *parent, *best_parent = NULL;
+	int i, num_parents;
+	unsigned long parent_rate, best = 0;
+
+	/* if NO_REPARENT flag set, pass through to current parent */
+	if (clk->flags & CLK_SET_RATE_NO_REPARENT) {
+		parent = clk->parent;
+		if (clk->flags & CLK_SET_RATE_PARENT)
+			best = __clk_round_rate(parent, rate);
+		else if (parent)
+			best = __clk_get_rate(parent);
+		else
+			best = __clk_get_rate(clk);
+		goto out;
+	}
+
+	/* find the parent that can provide the fastest rate <= rate */
+	num_parents = clk->num_parents;
+	for (i = 0; i < num_parents; i++) {
+		parent = clk_get_parent_by_index(clk, i);
+		if (!parent)
+			continue;
+		if (clk->flags & CLK_SET_RATE_PARENT)
+			parent_rate = __clk_round_rate(parent, rate);
+		else
+			parent_rate = __clk_get_rate(parent);
+		if (parent_rate <= rate && parent_rate > best) {
+			best_parent = parent;
+			best = parent_rate;
+		}
+	}
+
+out:
+	if (best_parent)
+		*best_parent_p = best_parent;
+	*best_parent_rate = best;
+
+	return best;
+}
+
 /***        clk api        ***/
 
 void __clk_unprepare(struct clk *clk)
@@ -702,7 +764,7 @@
 
 /**
  * clk_unprepare - undo preparation of a clock source
- * @clk: the clk being unprepare
+ * @clk: the clk being unprepared
  *
  * clk_unprepare may sleep, which differentiates it from clk_disable.  In a
  * simple case, clk_unprepare can be used instead of clk_disable to gate a clk
@@ -869,27 +931,31 @@
 /**
  * __clk_round_rate - round the given rate for a clk
  * @clk: round the rate of this clock
+ * @rate: the rate which is to be rounded
  *
  * Caller must hold prepare_lock.  Useful for clk_ops such as .set_rate
  */
 unsigned long __clk_round_rate(struct clk *clk, unsigned long rate)
 {
 	unsigned long parent_rate = 0;
+	struct clk *parent;
 
 	if (!clk)
 		return 0;
 
-	if (!clk->ops->round_rate) {
-		if (clk->flags & CLK_SET_RATE_PARENT)
-			return __clk_round_rate(clk->parent, rate);
-		else
-			return clk->rate;
-	}
+	parent = clk->parent;
+	if (parent)
+		parent_rate = parent->rate;
 
-	if (clk->parent)
-		parent_rate = clk->parent->rate;
-
-	return clk->ops->round_rate(clk->hw, rate, &parent_rate);
+	if (clk->ops->determine_rate)
+		return clk->ops->determine_rate(clk->hw, rate, &parent_rate,
+						&parent);
+	else if (clk->ops->round_rate)
+		return clk->ops->round_rate(clk->hw, rate, &parent_rate);
+	else if (clk->flags & CLK_SET_RATE_PARENT)
+		return __clk_round_rate(clk->parent, rate);
+	else
+		return clk->rate;
 }
 
 /**
@@ -956,7 +1022,7 @@
  *
  * Walks the subtree of clks starting with clk and recalculates rates as it
  * goes.  Note that if a clk does not implement the .recalc_rate callback then
- * it is assumed that the clock will take on the rate of it's parent.
+ * it is assumed that the clock will take on the rate of its parent.
  *
  * clk_recalc_rates also propagates the POST_RATE_CHANGE notification,
  * if necessary.
@@ -1014,6 +1080,115 @@
 }
 EXPORT_SYMBOL_GPL(clk_get_rate);
 
+static u8 clk_fetch_parent_index(struct clk *clk, struct clk *parent)
+{
+	u8 i;
+
+	if (!clk->parents)
+		clk->parents = kzalloc((sizeof(struct clk*) * clk->num_parents),
+								GFP_KERNEL);
+
+	/*
+	 * find index of new parent clock using cached parent ptrs,
+	 * or if not yet cached, use string name comparison and cache
+	 * them now to avoid future calls to __clk_lookup.
+	 */
+	for (i = 0; i < clk->num_parents; i++) {
+		if (clk->parents && clk->parents[i] == parent)
+			break;
+		else if (!strcmp(clk->parent_names[i], parent->name)) {
+			if (clk->parents)
+				clk->parents[i] = __clk_lookup(parent->name);
+			break;
+		}
+	}
+
+	return i;
+}
+
+static void clk_reparent(struct clk *clk, struct clk *new_parent)
+{
+	hlist_del(&clk->child_node);
+
+	if (new_parent) {
+		/* avoid duplicate POST_RATE_CHANGE notifications */
+		if (new_parent->new_child == clk)
+			new_parent->new_child = NULL;
+
+		hlist_add_head(&clk->child_node, &new_parent->children);
+	} else {
+		hlist_add_head(&clk->child_node, &clk_orphan_list);
+	}
+
+	clk->parent = new_parent;
+}
+
+static int __clk_set_parent(struct clk *clk, struct clk *parent, u8 p_index)
+{
+	unsigned long flags;
+	int ret = 0;
+	struct clk *old_parent = clk->parent;
+
+	/*
+	 * Migrate prepare state between parents and prevent race with
+	 * clk_enable().
+	 *
+	 * If the clock is not prepared, then a race with
+	 * clk_enable/disable() is impossible since we already have the
+	 * prepare lock (future calls to clk_enable() need to be preceded by
+	 * a clk_prepare()).
+	 *
+	 * If the clock is prepared, migrate the prepared state to the new
+	 * parent and also protect against a race with clk_enable() by
+	 * forcing the clock and the new parent on.  This ensures that all
+	 * future calls to clk_enable() are practically NOPs with respect to
+	 * hardware and software states.
+	 *
+	 * See also: Comment for clk_set_parent() below.
+	 */
+	if (clk->prepare_count) {
+		__clk_prepare(parent);
+		clk_enable(parent);
+		clk_enable(clk);
+	}
+
+	/* update the clk tree topology */
+	flags = clk_enable_lock();
+	clk_reparent(clk, parent);
+	clk_enable_unlock(flags);
+
+	/* change clock input source */
+	if (parent && clk->ops->set_parent)
+		ret = clk->ops->set_parent(clk->hw, p_index);
+
+	if (ret) {
+		flags = clk_enable_lock();
+		clk_reparent(clk, old_parent);
+		clk_enable_unlock(flags);
+
+		if (clk->prepare_count) {
+			clk_disable(clk);
+			clk_disable(parent);
+			__clk_unprepare(parent);
+		}
+		return ret;
+	}
+
+	/*
+	 * Finish the migration of prepare state and undo the changes done
+	 * for preventing a race with clk_enable().
+	 */
+	if (clk->prepare_count) {
+		clk_disable(clk);
+		clk_disable(old_parent);
+		__clk_unprepare(old_parent);
+	}
+
+	/* update debugfs with new clk tree topology */
+	clk_debug_reparent(clk, parent);
+	return 0;
+}
+
 /**
  * __clk_speculate_rates
  * @clk: first clk in the subtree
@@ -1026,7 +1201,7 @@
  * pre-rate change notifications and returns early if no clks in the
  * subtree have subscribed to the notifications.  Note that if a clk does not
  * implement the .recalc_rate callback then it is assumed that the clock will
- * take on the rate of it's parent.
+ * take on the rate of its parent.
  *
  * Caller must hold prepare_lock.
  */
@@ -1058,18 +1233,25 @@
 	return ret;
 }
 
-static void clk_calc_subtree(struct clk *clk, unsigned long new_rate)
+static void clk_calc_subtree(struct clk *clk, unsigned long new_rate,
+			     struct clk *new_parent, u8 p_index)
 {
 	struct clk *child;
 
 	clk->new_rate = new_rate;
+	clk->new_parent = new_parent;
+	clk->new_parent_index = p_index;
+	/* include clk in new parent's PRE_RATE_CHANGE notifications */
+	clk->new_child = NULL;
+	if (new_parent && new_parent != clk->parent)
+		new_parent->new_child = clk;
 
 	hlist_for_each_entry(child, &clk->children, child_node) {
 		if (child->ops->recalc_rate)
 			child->new_rate = child->ops->recalc_rate(child->hw, new_rate);
 		else
 			child->new_rate = new_rate;
-		clk_calc_subtree(child, child->new_rate);
+		clk_calc_subtree(child, child->new_rate, NULL, 0);
 	}
 }
 
@@ -1080,50 +1262,63 @@
 static struct clk *clk_calc_new_rates(struct clk *clk, unsigned long rate)
 {
 	struct clk *top = clk;
+	struct clk *old_parent, *parent;
 	unsigned long best_parent_rate = 0;
 	unsigned long new_rate;
+	u8 p_index = 0;
 
 	/* sanity */
 	if (IS_ERR_OR_NULL(clk))
 		return NULL;
 
 	/* save parent rate, if it exists */
-	if (clk->parent)
-		best_parent_rate = clk->parent->rate;
+	parent = old_parent = clk->parent;
+	if (parent)
+		best_parent_rate = parent->rate;
 
-	/* never propagate up to the parent */
-	if (!(clk->flags & CLK_SET_RATE_PARENT)) {
-		if (!clk->ops->round_rate) {
-			clk->new_rate = clk->rate;
-			return NULL;
-		}
-		new_rate = clk->ops->round_rate(clk->hw, rate, &best_parent_rate);
+	/* find the closest rate and parent clk/rate */
+	if (clk->ops->determine_rate) {
+		new_rate = clk->ops->determine_rate(clk->hw, rate,
+						    &best_parent_rate,
+						    &parent);
+	} else if (clk->ops->round_rate) {
+		new_rate = clk->ops->round_rate(clk->hw, rate,
+						&best_parent_rate);
+	} else if (!parent || !(clk->flags & CLK_SET_RATE_PARENT)) {
+		/* pass-through clock without adjustable parent */
+		clk->new_rate = clk->rate;
+		return NULL;
+	} else {
+		/* pass-through clock with adjustable parent */
+		top = clk_calc_new_rates(parent, rate);
+		new_rate = parent->new_rate;
 		goto out;
 	}
 
-	/* need clk->parent from here on out */
-	if (!clk->parent) {
-		pr_debug("%s: %s has NULL parent\n", __func__, clk->name);
+	/* some clocks must be gated to change parent */
+	if (parent != old_parent &&
+	    (clk->flags & CLK_SET_PARENT_GATE) && clk->prepare_count) {
+		pr_debug("%s: %s not gated but wants to reparent\n",
+			 __func__, clk->name);
 		return NULL;
 	}
 
-	if (!clk->ops->round_rate) {
-		top = clk_calc_new_rates(clk->parent, rate);
-		new_rate = clk->parent->new_rate;
-
-		goto out;
+	/* try finding the new parent index */
+	if (parent) {
+		p_index = clk_fetch_parent_index(clk, parent);
+		if (p_index == clk->num_parents) {
+			pr_debug("%s: clk %s can not be parent of clk %s\n",
+				 __func__, parent->name, clk->name);
+			return NULL;
+		}
 	}
 
-	new_rate = clk->ops->round_rate(clk->hw, rate, &best_parent_rate);
-
-	if (best_parent_rate != clk->parent->rate) {
-		top = clk_calc_new_rates(clk->parent, best_parent_rate);
-
-		goto out;
-	}
+	if ((clk->flags & CLK_SET_RATE_PARENT) && parent &&
+	    best_parent_rate != parent->rate)
+		top = clk_calc_new_rates(parent, best_parent_rate);
 
 out:
-	clk_calc_subtree(clk, new_rate);
+	clk_calc_subtree(clk, new_rate, parent, p_index);
 
 	return top;
 }
@@ -1135,7 +1330,7 @@
  */
 static struct clk *clk_propagate_rate_change(struct clk *clk, unsigned long event)
 {
-	struct clk *child, *fail_clk = NULL;
+	struct clk *child, *tmp_clk, *fail_clk = NULL;
 	int ret = NOTIFY_DONE;
 
 	if (clk->rate == clk->new_rate)
@@ -1148,9 +1343,19 @@
 	}
 
 	hlist_for_each_entry(child, &clk->children, child_node) {
-		clk = clk_propagate_rate_change(child, event);
-		if (clk)
-			fail_clk = clk;
+		/* Skip children who will be reparented to another clock */
+		if (child->new_parent && child->new_parent != clk)
+			continue;
+		tmp_clk = clk_propagate_rate_change(child, event);
+		if (tmp_clk)
+			fail_clk = tmp_clk;
+	}
+
+	/* handle the new child who might not be in clk->children yet */
+	if (clk->new_child) {
+		tmp_clk = clk_propagate_rate_change(clk->new_child, event);
+		if (tmp_clk)
+			fail_clk = tmp_clk;
 	}
 
 	return fail_clk;
@@ -1168,6 +1373,10 @@
 
 	old_rate = clk->rate;
 
+	/* set parent */
+	if (clk->new_parent && clk->new_parent != clk->parent)
+		__clk_set_parent(clk, clk->new_parent, clk->new_parent_index);
+
 	if (clk->parent)
 		best_parent_rate = clk->parent->rate;
 
@@ -1182,8 +1391,16 @@
 	if (clk->notifier_count && old_rate != clk->rate)
 		__clk_notify(clk, POST_RATE_CHANGE, old_rate, clk->rate);
 
-	hlist_for_each_entry(child, &clk->children, child_node)
+	hlist_for_each_entry(child, &clk->children, child_node) {
+		/* Skip children who will be reparented to another clock */
+		if (child->new_parent && child->new_parent != clk)
+			continue;
 		clk_change_rate(child);
+	}
+
+	/* handle the new child who might not be in clk->children yet */
+	if (clk->new_child)
+		clk_change_rate(clk->new_child);
 }
 
 /**
@@ -1198,7 +1415,7 @@
  * outcome of clk's .round_rate implementation.  If *parent_rate is unchanged
  * after calling .round_rate then upstream parent propagation is ignored.  If
  * *parent_rate comes back with a new rate for clk's parent then we propagate
- * up to clk's parent and set it's rate.  Upward propagation will continue
+ * up to clk's parent and set its rate.  Upward propagation will continue
  * until either a clk does not support the CLK_SET_RATE_PARENT flag or
  * .round_rate stops requesting changes to clk's parent_rate.
  *
@@ -1212,6 +1429,9 @@
 	struct clk *top, *fail_clk;
 	int ret = 0;
 
+	if (!clk)
+		return 0;
+
 	/* prevent racing with updates to the clock topology */
 	clk_prepare_lock();
 
@@ -1315,30 +1535,12 @@
 			kzalloc((sizeof(struct clk*) * clk->num_parents),
 					GFP_KERNEL);
 
-	if (!clk->parents)
-		ret = __clk_lookup(clk->parent_names[index]);
-	else if (!clk->parents[index])
-		ret = clk->parents[index] =
-			__clk_lookup(clk->parent_names[index]);
-	else
-		ret = clk->parents[index];
+	ret = clk_get_parent_by_index(clk, index);
 
 out:
 	return ret;
 }
 
-static void clk_reparent(struct clk *clk, struct clk *new_parent)
-{
-	hlist_del(&clk->child_node);
-
-	if (new_parent)
-		hlist_add_head(&clk->child_node, &new_parent->children);
-	else
-		hlist_add_head(&clk->child_node, &clk_orphan_list);
-
-	clk->parent = new_parent;
-}
-
 void __clk_reparent(struct clk *clk, struct clk *new_parent)
 {
 	clk_reparent(clk, new_parent);
@@ -1346,98 +1548,6 @@
 	__clk_recalc_rates(clk, POST_RATE_CHANGE);
 }
 
-static u8 clk_fetch_parent_index(struct clk *clk, struct clk *parent)
-{
-	u8 i;
-
-	if (!clk->parents)
-		clk->parents = kzalloc((sizeof(struct clk*) * clk->num_parents),
-								GFP_KERNEL);
-
-	/*
-	 * find index of new parent clock using cached parent ptrs,
-	 * or if not yet cached, use string name comparison and cache
-	 * them now to avoid future calls to __clk_lookup.
-	 */
-	for (i = 0; i < clk->num_parents; i++) {
-		if (clk->parents && clk->parents[i] == parent)
-			break;
-		else if (!strcmp(clk->parent_names[i], parent->name)) {
-			if (clk->parents)
-				clk->parents[i] = __clk_lookup(parent->name);
-			break;
-		}
-	}
-
-	return i;
-}
-
-static int __clk_set_parent(struct clk *clk, struct clk *parent, u8 p_index)
-{
-	unsigned long flags;
-	int ret = 0;
-	struct clk *old_parent = clk->parent;
-
-	/*
-	 * Migrate prepare state between parents and prevent race with
-	 * clk_enable().
-	 *
-	 * If the clock is not prepared, then a race with
-	 * clk_enable/disable() is impossible since we already have the
-	 * prepare lock (future calls to clk_enable() need to be preceded by
-	 * a clk_prepare()).
-	 *
-	 * If the clock is prepared, migrate the prepared state to the new
-	 * parent and also protect against a race with clk_enable() by
-	 * forcing the clock and the new parent on.  This ensures that all
-	 * future calls to clk_enable() are practically NOPs with respect to
-	 * hardware and software states.
-	 *
-	 * See also: Comment for clk_set_parent() below.
-	 */
-	if (clk->prepare_count) {
-		__clk_prepare(parent);
-		clk_enable(parent);
-		clk_enable(clk);
-	}
-
-	/* update the clk tree topology */
-	flags = clk_enable_lock();
-	clk_reparent(clk, parent);
-	clk_enable_unlock(flags);
-
-	/* change clock input source */
-	if (parent && clk->ops->set_parent)
-		ret = clk->ops->set_parent(clk->hw, p_index);
-
-	if (ret) {
-		flags = clk_enable_lock();
-		clk_reparent(clk, old_parent);
-		clk_enable_unlock(flags);
-
-		if (clk->prepare_count) {
-			clk_disable(clk);
-			clk_disable(parent);
-			__clk_unprepare(parent);
-		}
-		return ret;
-	}
-
-	/*
-	 * Finish the migration of prepare state and undo the changes done
-	 * for preventing a race with clk_enable().
-	 */
-	if (clk->prepare_count) {
-		clk_disable(clk);
-		clk_disable(old_parent);
-		__clk_unprepare(old_parent);
-	}
-
-	/* update debugfs with new clk tree topology */
-	clk_debug_reparent(clk, parent);
-	return 0;
-}
-
 /**
  * clk_set_parent - switch the parent of a mux clk
  * @clk: the mux clk whose input we are switching
@@ -1461,7 +1571,10 @@
 	u8 p_index = 0;
 	unsigned long p_rate = 0;
 
-	if (!clk || !clk->ops)
+	if (!clk)
+		return 0;
+
+	if (!clk->ops)
 		return -EINVAL;
 
 	/* verify ops for for multi-parent clks */
@@ -1544,8 +1657,9 @@
 
 	/* check that clk_ops are sane.  See Documentation/clk.txt */
 	if (clk->ops->set_rate &&
-			!(clk->ops->round_rate && clk->ops->recalc_rate)) {
-		pr_warning("%s: %s must implement .round_rate & .recalc_rate\n",
+	    !((clk->ops->round_rate || clk->ops->determine_rate) &&
+	      clk->ops->recalc_rate)) {
+		pr_warning("%s: %s must implement .round_rate or .determine_rate in addition to .recalc_rate\n",
 				__func__, clk->name);
 		ret = -EINVAL;
 		goto out;
@@ -1628,7 +1742,7 @@
 	 * this clock
 	 */
 	hlist_for_each_entry_safe(orphan, tmp2, &clk_orphan_list, child_node) {
-		if (orphan->ops->get_parent) {
+		if (orphan->num_parents && orphan->ops->get_parent) {
 			i = orphan->ops->get_parent(orphan->hw);
 			if (!strcmp(clk->name, orphan->parent_names[i]))
 				__clk_reparent(orphan, clk);
@@ -1648,7 +1762,7 @@
 	 * The .init callback is not used by any of the basic clock types, but
 	 * exists for weird hardware that must perform initialization magic.
 	 * Please consider other ways of solving initialization problems before
-	 * using this callback, as it's use is discouraged.
+	 * using this callback, as its use is discouraged.
 	 */
 	if (clk->ops->init)
 		clk->ops->init(clk->hw);
@@ -1675,7 +1789,7 @@
  * very large numbers of clocks that need to be statically initialized.  It is
  * a layering violation to include clk-private.h from any code which implements
  * a clock's .ops; as such any statically initialized clock data MUST be in a
- * separate C file from the logic that implements it's operations.  Returns 0
+ * separate C file from the logic that implements its operations.  Returns 0
  * on success, otherwise an error code.
  */
 struct clk *__clk_register(struct device *dev, struct clk_hw *hw)
@@ -2115,13 +2229,13 @@
  */
 void __init of_clk_init(const struct of_device_id *matches)
 {
+	const struct of_device_id *match;
 	struct device_node *np;
 
 	if (!matches)
 		matches = __clk_of_table;
 
-	for_each_matching_node(np, matches) {
-		const struct of_device_id *match = of_match_node(matches, np);
+	for_each_matching_node_and_match(np, matches, &match) {
 		of_clk_init_cb_t clk_init_cb = match->data;
 		clk_init_cb(np);
 	}
diff --git a/drivers/clk/mmp/clk-mmp2.c b/drivers/clk/mmp/clk-mmp2.c
index d1f1a19..b2721ca 100644
--- a/drivers/clk/mmp/clk-mmp2.c
+++ b/drivers/clk/mmp/clk-mmp2.c
@@ -248,7 +248,8 @@
 	clk_register_clkdev(clk, NULL, "mmp2-pwm.3");
 
 	clk = clk_register_mux(NULL, "uart0_mux", uart_parent,
-				ARRAY_SIZE(uart_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(uart_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apbc_base + APBC_UART0, 4, 3, 0, &clk_lock);
 	clk_set_parent(clk, vctcxo);
 	clk_register_clkdev(clk, "uart_mux.0", NULL);
@@ -258,7 +259,8 @@
 	clk_register_clkdev(clk, NULL, "pxa2xx-uart.0");
 
 	clk = clk_register_mux(NULL, "uart1_mux", uart_parent,
-				ARRAY_SIZE(uart_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(uart_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apbc_base + APBC_UART1, 4, 3, 0, &clk_lock);
 	clk_set_parent(clk, vctcxo);
 	clk_register_clkdev(clk, "uart_mux.1", NULL);
@@ -268,7 +270,8 @@
 	clk_register_clkdev(clk, NULL, "pxa2xx-uart.1");
 
 	clk = clk_register_mux(NULL, "uart2_mux", uart_parent,
-				ARRAY_SIZE(uart_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(uart_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apbc_base + APBC_UART2, 4, 3, 0, &clk_lock);
 	clk_set_parent(clk, vctcxo);
 	clk_register_clkdev(clk, "uart_mux.2", NULL);
@@ -278,7 +281,8 @@
 	clk_register_clkdev(clk, NULL, "pxa2xx-uart.2");
 
 	clk = clk_register_mux(NULL, "uart3_mux", uart_parent,
-				ARRAY_SIZE(uart_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(uart_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apbc_base + APBC_UART3, 4, 3, 0, &clk_lock);
 	clk_set_parent(clk, vctcxo);
 	clk_register_clkdev(clk, "uart_mux.3", NULL);
@@ -288,7 +292,8 @@
 	clk_register_clkdev(clk, NULL, "pxa2xx-uart.3");
 
 	clk = clk_register_mux(NULL, "ssp0_mux", ssp_parent,
-				ARRAY_SIZE(ssp_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(ssp_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apbc_base + APBC_SSP0, 4, 3, 0, &clk_lock);
 	clk_register_clkdev(clk, "uart_mux.0", NULL);
 
@@ -297,7 +302,8 @@
 	clk_register_clkdev(clk, NULL, "mmp-ssp.0");
 
 	clk = clk_register_mux(NULL, "ssp1_mux", ssp_parent,
-				ARRAY_SIZE(ssp_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(ssp_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apbc_base + APBC_SSP1, 4, 3, 0, &clk_lock);
 	clk_register_clkdev(clk, "ssp_mux.1", NULL);
 
@@ -306,7 +312,8 @@
 	clk_register_clkdev(clk, NULL, "mmp-ssp.1");
 
 	clk = clk_register_mux(NULL, "ssp2_mux", ssp_parent,
-				ARRAY_SIZE(ssp_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(ssp_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apbc_base + APBC_SSP2, 4, 3, 0, &clk_lock);
 	clk_register_clkdev(clk, "ssp_mux.2", NULL);
 
@@ -315,7 +322,8 @@
 	clk_register_clkdev(clk, NULL, "mmp-ssp.2");
 
 	clk = clk_register_mux(NULL, "ssp3_mux", ssp_parent,
-				ARRAY_SIZE(ssp_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(ssp_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apbc_base + APBC_SSP3, 4, 3, 0, &clk_lock);
 	clk_register_clkdev(clk, "ssp_mux.3", NULL);
 
@@ -324,7 +332,8 @@
 	clk_register_clkdev(clk, NULL, "mmp-ssp.3");
 
 	clk = clk_register_mux(NULL, "sdh_mux", sdh_parent,
-				ARRAY_SIZE(sdh_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(sdh_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apmu_base + APMU_SDH0, 8, 2, 0, &clk_lock);
 	clk_register_clkdev(clk, "sdh_mux", NULL);
 
@@ -354,7 +363,8 @@
 	clk_register_clkdev(clk, "usb_clk", NULL);
 
 	clk = clk_register_mux(NULL, "disp0_mux", disp_parent,
-				ARRAY_SIZE(disp_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(disp_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apmu_base + APMU_DISP0, 6, 2, 0, &clk_lock);
 	clk_register_clkdev(clk, "disp_mux.0", NULL);
 
@@ -376,7 +386,8 @@
 	clk_register_clkdev(clk, "disp_sphy.0", NULL);
 
 	clk = clk_register_mux(NULL, "disp1_mux", disp_parent,
-				ARRAY_SIZE(disp_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(disp_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apmu_base + APMU_DISP1, 6, 2, 0, &clk_lock);
 	clk_register_clkdev(clk, "disp_mux.1", NULL);
 
@@ -394,7 +405,8 @@
 	clk_register_clkdev(clk, "ccic_arbiter", NULL);
 
 	clk = clk_register_mux(NULL, "ccic0_mux", ccic_parent,
-				ARRAY_SIZE(ccic_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(ccic_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apmu_base + APMU_CCIC0, 6, 2, 0, &clk_lock);
 	clk_register_clkdev(clk, "ccic_mux.0", NULL);
 
@@ -421,7 +433,8 @@
 	clk_register_clkdev(clk, "sphyclk", "mmp-ccic.0");
 
 	clk = clk_register_mux(NULL, "ccic1_mux", ccic_parent,
-				ARRAY_SIZE(ccic_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(ccic_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apmu_base + APMU_CCIC1, 6, 2, 0, &clk_lock);
 	clk_register_clkdev(clk, "ccic_mux.1", NULL);
 
diff --git a/drivers/clk/mmp/clk-pxa168.c b/drivers/clk/mmp/clk-pxa168.c
index 28b3b51..014396b 100644
--- a/drivers/clk/mmp/clk-pxa168.c
+++ b/drivers/clk/mmp/clk-pxa168.c
@@ -199,7 +199,8 @@
 	clk_register_clkdev(clk, NULL, "pxa168-pwm.3");
 
 	clk = clk_register_mux(NULL, "uart0_mux", uart_parent,
-				ARRAY_SIZE(uart_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(uart_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apbc_base + APBC_UART0, 4, 3, 0, &clk_lock);
 	clk_set_parent(clk, uart_pll);
 	clk_register_clkdev(clk, "uart_mux.0", NULL);
@@ -209,7 +210,8 @@
 	clk_register_clkdev(clk, NULL, "pxa2xx-uart.0");
 
 	clk = clk_register_mux(NULL, "uart1_mux", uart_parent,
-				ARRAY_SIZE(uart_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(uart_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apbc_base + APBC_UART1, 4, 3, 0, &clk_lock);
 	clk_set_parent(clk, uart_pll);
 	clk_register_clkdev(clk, "uart_mux.1", NULL);
@@ -219,7 +221,8 @@
 	clk_register_clkdev(clk, NULL, "pxa2xx-uart.1");
 
 	clk = clk_register_mux(NULL, "uart2_mux", uart_parent,
-				ARRAY_SIZE(uart_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(uart_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apbc_base + APBC_UART2, 4, 3, 0, &clk_lock);
 	clk_set_parent(clk, uart_pll);
 	clk_register_clkdev(clk, "uart_mux.2", NULL);
@@ -229,7 +232,8 @@
 	clk_register_clkdev(clk, NULL, "pxa2xx-uart.2");
 
 	clk = clk_register_mux(NULL, "ssp0_mux", ssp_parent,
-				ARRAY_SIZE(ssp_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(ssp_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apbc_base + APBC_SSP0, 4, 3, 0, &clk_lock);
 	clk_register_clkdev(clk, "uart_mux.0", NULL);
 
@@ -238,7 +242,8 @@
 	clk_register_clkdev(clk, NULL, "mmp-ssp.0");
 
 	clk = clk_register_mux(NULL, "ssp1_mux", ssp_parent,
-				ARRAY_SIZE(ssp_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(ssp_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apbc_base + APBC_SSP1, 4, 3, 0, &clk_lock);
 	clk_register_clkdev(clk, "ssp_mux.1", NULL);
 
@@ -247,7 +252,8 @@
 	clk_register_clkdev(clk, NULL, "mmp-ssp.1");
 
 	clk = clk_register_mux(NULL, "ssp2_mux", ssp_parent,
-				ARRAY_SIZE(ssp_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(ssp_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apbc_base + APBC_SSP2, 4, 3, 0, &clk_lock);
 	clk_register_clkdev(clk, "ssp_mux.2", NULL);
 
@@ -256,7 +262,8 @@
 	clk_register_clkdev(clk, NULL, "mmp-ssp.2");
 
 	clk = clk_register_mux(NULL, "ssp3_mux", ssp_parent,
-				ARRAY_SIZE(ssp_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(ssp_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apbc_base + APBC_SSP3, 4, 3, 0, &clk_lock);
 	clk_register_clkdev(clk, "ssp_mux.3", NULL);
 
@@ -265,7 +272,8 @@
 	clk_register_clkdev(clk, NULL, "mmp-ssp.3");
 
 	clk = clk_register_mux(NULL, "ssp4_mux", ssp_parent,
-				ARRAY_SIZE(ssp_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(ssp_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apbc_base + APBC_SSP4, 4, 3, 0, &clk_lock);
 	clk_register_clkdev(clk, "ssp_mux.4", NULL);
 
@@ -278,7 +286,8 @@
 	clk_register_clkdev(clk, NULL, "pxa3xx-nand.0");
 
 	clk = clk_register_mux(NULL, "sdh0_mux", sdh_parent,
-				ARRAY_SIZE(sdh_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(sdh_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apmu_base + APMU_SDH0, 6, 1, 0, &clk_lock);
 	clk_register_clkdev(clk, "sdh0_mux", NULL);
 
@@ -287,7 +296,8 @@
 	clk_register_clkdev(clk, NULL, "sdhci-pxa.0");
 
 	clk = clk_register_mux(NULL, "sdh1_mux", sdh_parent,
-				ARRAY_SIZE(sdh_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(sdh_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apmu_base + APMU_SDH1, 6, 1, 0, &clk_lock);
 	clk_register_clkdev(clk, "sdh1_mux", NULL);
 
@@ -304,7 +314,8 @@
 	clk_register_clkdev(clk, "sph_clk", NULL);
 
 	clk = clk_register_mux(NULL, "disp0_mux", disp_parent,
-				ARRAY_SIZE(disp_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(disp_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apmu_base + APMU_DISP0, 6, 1, 0, &clk_lock);
 	clk_register_clkdev(clk, "disp_mux.0", NULL);
 
@@ -317,7 +328,8 @@
 	clk_register_clkdev(clk, "hclk", "mmp-disp.0");
 
 	clk = clk_register_mux(NULL, "ccic0_mux", ccic_parent,
-				ARRAY_SIZE(ccic_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(ccic_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apmu_base + APMU_CCIC0, 6, 1, 0, &clk_lock);
 	clk_register_clkdev(clk, "ccic_mux.0", NULL);
 
@@ -327,8 +339,8 @@
 
 	clk = clk_register_mux(NULL, "ccic0_phy_mux", ccic_phy_parent,
 				ARRAY_SIZE(ccic_phy_parent),
-				CLK_SET_RATE_PARENT, apmu_base + APMU_CCIC0,
-				7, 1, 0, &clk_lock);
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
+				apmu_base + APMU_CCIC0, 7, 1, 0, &clk_lock);
 	clk_register_clkdev(clk, "ccic_phy_mux.0", NULL);
 
 	clk = mmp_clk_register_apmu("ccic0_phy", "ccic0_phy_mux",
diff --git a/drivers/clk/mmp/clk-pxa910.c b/drivers/clk/mmp/clk-pxa910.c
index 6ec0569..9efc6a4 100644
--- a/drivers/clk/mmp/clk-pxa910.c
+++ b/drivers/clk/mmp/clk-pxa910.c
@@ -204,7 +204,8 @@
 	clk_register_clkdev(clk, NULL, "pxa910-pwm.3");
 
 	clk = clk_register_mux(NULL, "uart0_mux", uart_parent,
-				ARRAY_SIZE(uart_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(uart_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apbc_base + APBC_UART0, 4, 3, 0, &clk_lock);
 	clk_set_parent(clk, uart_pll);
 	clk_register_clkdev(clk, "uart_mux.0", NULL);
@@ -214,7 +215,8 @@
 	clk_register_clkdev(clk, NULL, "pxa2xx-uart.0");
 
 	clk = clk_register_mux(NULL, "uart1_mux", uart_parent,
-				ARRAY_SIZE(uart_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(uart_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apbc_base + APBC_UART1, 4, 3, 0, &clk_lock);
 	clk_set_parent(clk, uart_pll);
 	clk_register_clkdev(clk, "uart_mux.1", NULL);
@@ -224,7 +226,8 @@
 	clk_register_clkdev(clk, NULL, "pxa2xx-uart.1");
 
 	clk = clk_register_mux(NULL, "uart2_mux", uart_parent,
-				ARRAY_SIZE(uart_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(uart_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apbcp_base + APBCP_UART2, 4, 3, 0, &clk_lock);
 	clk_set_parent(clk, uart_pll);
 	clk_register_clkdev(clk, "uart_mux.2", NULL);
@@ -234,7 +237,8 @@
 	clk_register_clkdev(clk, NULL, "pxa2xx-uart.2");
 
 	clk = clk_register_mux(NULL, "ssp0_mux", ssp_parent,
-				ARRAY_SIZE(ssp_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(ssp_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apbc_base + APBC_SSP0, 4, 3, 0, &clk_lock);
 	clk_register_clkdev(clk, "uart_mux.0", NULL);
 
@@ -243,7 +247,8 @@
 	clk_register_clkdev(clk, NULL, "mmp-ssp.0");
 
 	clk = clk_register_mux(NULL, "ssp1_mux", ssp_parent,
-				ARRAY_SIZE(ssp_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(ssp_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apbc_base + APBC_SSP1, 4, 3, 0, &clk_lock);
 	clk_register_clkdev(clk, "ssp_mux.1", NULL);
 
@@ -256,7 +261,8 @@
 	clk_register_clkdev(clk, NULL, "pxa3xx-nand.0");
 
 	clk = clk_register_mux(NULL, "sdh0_mux", sdh_parent,
-				ARRAY_SIZE(sdh_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(sdh_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apmu_base + APMU_SDH0, 6, 1, 0, &clk_lock);
 	clk_register_clkdev(clk, "sdh0_mux", NULL);
 
@@ -265,7 +271,8 @@
 	clk_register_clkdev(clk, NULL, "sdhci-pxa.0");
 
 	clk = clk_register_mux(NULL, "sdh1_mux", sdh_parent,
-				ARRAY_SIZE(sdh_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(sdh_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apmu_base + APMU_SDH1, 6, 1, 0, &clk_lock);
 	clk_register_clkdev(clk, "sdh1_mux", NULL);
 
@@ -282,7 +289,8 @@
 	clk_register_clkdev(clk, "sph_clk", NULL);
 
 	clk = clk_register_mux(NULL, "disp0_mux", disp_parent,
-				ARRAY_SIZE(disp_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(disp_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apmu_base + APMU_DISP0, 6, 1, 0, &clk_lock);
 	clk_register_clkdev(clk, "disp_mux.0", NULL);
 
@@ -291,7 +299,8 @@
 	clk_register_clkdev(clk, NULL, "mmp-disp.0");
 
 	clk = clk_register_mux(NULL, "ccic0_mux", ccic_parent,
-				ARRAY_SIZE(ccic_parent), CLK_SET_RATE_PARENT,
+				ARRAY_SIZE(ccic_parent),
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 				apmu_base + APMU_CCIC0, 6, 1, 0, &clk_lock);
 	clk_register_clkdev(clk, "ccic_mux.0", NULL);
 
@@ -301,8 +310,8 @@
 
 	clk = clk_register_mux(NULL, "ccic0_phy_mux", ccic_phy_parent,
 				ARRAY_SIZE(ccic_phy_parent),
-				CLK_SET_RATE_PARENT, apmu_base + APMU_CCIC0,
-				7, 1, 0, &clk_lock);
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
+				apmu_base + APMU_CCIC0, 7, 1, 0, &clk_lock);
 	clk_register_clkdev(clk, "ccic_phy_mux.0", NULL);
 
 	clk = mmp_clk_register_apmu("ccic0_phy", "ccic0_phy_mux",
diff --git a/drivers/clk/mvebu/armada-370.c b/drivers/clk/mvebu/armada-370.c
index 079960e..fc777bd 100644
--- a/drivers/clk/mvebu/armada-370.c
+++ b/drivers/clk/mvebu/armada-370.c
@@ -32,13 +32,13 @@
 
 enum { A370_CPU_TO_NBCLK, A370_CPU_TO_HCLK, A370_CPU_TO_DRAMCLK };
 
-static const struct coreclk_ratio __initconst a370_coreclk_ratios[] = {
+static const struct coreclk_ratio a370_coreclk_ratios[] __initconst = {
 	{ .id = A370_CPU_TO_NBCLK, .name = "nbclk" },
 	{ .id = A370_CPU_TO_HCLK, .name = "hclk" },
 	{ .id = A370_CPU_TO_DRAMCLK, .name = "dramclk" },
 };
 
-static const u32 __initconst a370_tclk_freqs[] = {
+static const u32 a370_tclk_freqs[] __initconst = {
 	16600000,
 	20000000,
 };
@@ -52,7 +52,7 @@
 	return a370_tclk_freqs[tclk_freq_select];
 }
 
-static const u32 __initconst a370_cpu_freqs[] = {
+static const u32 a370_cpu_freqs[] __initconst = {
 	400000000,
 	533000000,
 	667000000,
@@ -78,7 +78,7 @@
 	return cpu_freq;
 }
 
-static const int __initconst a370_nbclk_ratios[32][2] = {
+static const int a370_nbclk_ratios[32][2] __initconst = {
 	{0, 1}, {1, 2}, {2, 2}, {2, 2},
 	{1, 2}, {1, 2}, {1, 1}, {2, 3},
 	{0, 1}, {1, 2}, {2, 4}, {0, 1},
@@ -89,7 +89,7 @@
 	{0, 1}, {0, 1}, {0, 1}, {0, 1},
 };
 
-static const int __initconst a370_hclk_ratios[32][2] = {
+static const int a370_hclk_ratios[32][2] __initconst = {
 	{0, 1}, {1, 2}, {2, 6}, {2, 3},
 	{1, 3}, {1, 4}, {1, 2}, {2, 6},
 	{0, 1}, {1, 6}, {2, 10}, {0, 1},
@@ -100,7 +100,7 @@
 	{0, 1}, {0, 1}, {0, 1}, {0, 1},
 };
 
-static const int __initconst a370_dramclk_ratios[32][2] = {
+static const int a370_dramclk_ratios[32][2] __initconst = {
 	{0, 1}, {1, 2}, {2, 3}, {2, 3},
 	{1, 3}, {1, 2}, {1, 2}, {2, 6},
 	{0, 1}, {1, 3}, {2, 5}, {0, 1},
@@ -152,7 +152,7 @@
  * Clock Gating Control
  */
 
-static const struct clk_gating_soc_desc __initconst a370_gating_desc[] = {
+static const struct clk_gating_soc_desc a370_gating_desc[] __initconst = {
 	{ "audio", NULL, 0, 0 },
 	{ "pex0_en", NULL, 1, 0 },
 	{ "pex1_en", NULL,  2, 0 },
diff --git a/drivers/clk/mvebu/armada-xp.c b/drivers/clk/mvebu/armada-xp.c
index 13b62ce..9922c44 100644
--- a/drivers/clk/mvebu/armada-xp.c
+++ b/drivers/clk/mvebu/armada-xp.c
@@ -40,7 +40,7 @@
 
 enum { AXP_CPU_TO_NBCLK, AXP_CPU_TO_HCLK, AXP_CPU_TO_DRAMCLK };
 
-static const struct coreclk_ratio __initconst axp_coreclk_ratios[] = {
+static const struct coreclk_ratio axp_coreclk_ratios[] __initconst = {
 	{ .id = AXP_CPU_TO_NBCLK, .name = "nbclk" },
 	{ .id = AXP_CPU_TO_HCLK, .name = "hclk" },
 	{ .id = AXP_CPU_TO_DRAMCLK, .name = "dramclk" },
@@ -52,7 +52,7 @@
 	return 250000000;
 }
 
-static const u32 __initconst axp_cpu_freqs[] = {
+static const u32 axp_cpu_freqs[] __initconst = {
 	1000000000,
 	1066000000,
 	1200000000,
@@ -89,7 +89,7 @@
 	return cpu_freq;
 }
 
-static const int __initconst axp_nbclk_ratios[32][2] = {
+static const int axp_nbclk_ratios[32][2] __initconst = {
 	{0, 1}, {1, 2}, {2, 2}, {2, 2},
 	{1, 2}, {1, 2}, {1, 1}, {2, 3},
 	{0, 1}, {1, 2}, {2, 4}, {0, 1},
@@ -100,7 +100,7 @@
 	{0, 1}, {0, 1}, {0, 1}, {0, 1},
 };
 
-static const int __initconst axp_hclk_ratios[32][2] = {
+static const int axp_hclk_ratios[32][2] __initconst = {
 	{0, 1}, {1, 2}, {2, 6}, {2, 3},
 	{1, 3}, {1, 4}, {1, 2}, {2, 6},
 	{0, 1}, {1, 6}, {2, 10}, {0, 1},
@@ -111,7 +111,7 @@
 	{0, 1}, {0, 1}, {0, 1}, {0, 1},
 };
 
-static const int __initconst axp_dramclk_ratios[32][2] = {
+static const int axp_dramclk_ratios[32][2] __initconst = {
 	{0, 1}, {1, 2}, {2, 3}, {2, 3},
 	{1, 3}, {1, 2}, {1, 2}, {2, 6},
 	{0, 1}, {1, 3}, {2, 5}, {0, 1},
@@ -169,7 +169,7 @@
  * Clock Gating Control
  */
 
-static const struct clk_gating_soc_desc __initconst axp_gating_desc[] = {
+static const struct clk_gating_soc_desc axp_gating_desc[] __initconst = {
 	{ "audio", NULL, 0, 0 },
 	{ "ge3", NULL, 1, 0 },
 	{ "ge2", NULL,  2, 0 },
diff --git a/drivers/clk/mvebu/clk-cpu.c b/drivers/clk/mvebu/clk-cpu.c
index b0fbc07..1466865 100644
--- a/drivers/clk/mvebu/clk-cpu.c
+++ b/drivers/clk/mvebu/clk-cpu.c
@@ -119,7 +119,7 @@
 
 	cpuclk = kzalloc(ncpus * sizeof(*cpuclk), GFP_KERNEL);
 	if (WARN_ON(!cpuclk))
-		return;
+		goto cpuclk_out;
 
 	clks = kzalloc(ncpus * sizeof(*clks), GFP_KERNEL);
 	if (WARN_ON(!clks))
@@ -170,6 +170,8 @@
 		kfree(cpuclk[ncpus].clk_name);
 clks_out:
 	kfree(cpuclk);
+cpuclk_out:
+	iounmap(clock_complex_base);
 }
 
 CLK_OF_DECLARE(armada_xp_cpu_clock, "marvell,armada-xp-cpu-clock",
diff --git a/drivers/clk/mvebu/common.c b/drivers/clk/mvebu/common.c
index adaa4a1..25ceccf 100644
--- a/drivers/clk/mvebu/common.c
+++ b/drivers/clk/mvebu/common.c
@@ -45,8 +45,10 @@
 	clk_data.clk_num = 2 + desc->num_ratios;
 	clk_data.clks = kzalloc(clk_data.clk_num * sizeof(struct clk *),
 				GFP_KERNEL);
-	if (WARN_ON(!clk_data.clks))
+	if (WARN_ON(!clk_data.clks)) {
+		iounmap(base);
 		return;
+	}
 
 	/* Register TCLK */
 	of_property_read_string_index(np, "clock-output-names", 0,
@@ -134,7 +136,7 @@
 
 	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
 	if (WARN_ON(!ctrl))
-		return;
+		goto ctrl_out;
 
 	spin_lock_init(&ctrl->lock);
 
@@ -145,10 +147,8 @@
 	ctrl->num_gates = n;
 	ctrl->gates = kzalloc(ctrl->num_gates * sizeof(struct clk *),
 			      GFP_KERNEL);
-	if (WARN_ON(!ctrl->gates)) {
-		kfree(ctrl);
-		return;
-	}
+	if (WARN_ON(!ctrl->gates))
+		goto gates_out;
 
 	for (n = 0; n < ctrl->num_gates; n++) {
 		const char *parent =
@@ -160,4 +160,10 @@
 	}
 
 	of_clk_add_provider(np, clk_gating_get_src, ctrl);
+
+	return;
+gates_out:
+	kfree(ctrl);
+ctrl_out:
+	iounmap(base);
 }
diff --git a/drivers/clk/mvebu/dove.c b/drivers/clk/mvebu/dove.c
index 79d7aed..38aee1e 100644
--- a/drivers/clk/mvebu/dove.c
+++ b/drivers/clk/mvebu/dove.c
@@ -74,12 +74,12 @@
 
 enum { DOVE_CPU_TO_L2, DOVE_CPU_TO_DDR };
 
-static const struct coreclk_ratio __initconst dove_coreclk_ratios[] = {
+static const struct coreclk_ratio dove_coreclk_ratios[] __initconst = {
 	{ .id = DOVE_CPU_TO_L2, .name = "l2clk", },
 	{ .id = DOVE_CPU_TO_DDR, .name = "ddrclk", }
 };
 
-static const u32 __initconst dove_tclk_freqs[] = {
+static const u32 dove_tclk_freqs[] __initconst = {
 	166666667,
 	125000000,
 	0, 0
@@ -92,7 +92,7 @@
 	return dove_tclk_freqs[opt];
 }
 
-static const u32 __initconst dove_cpu_freqs[] = {
+static const u32 dove_cpu_freqs[] __initconst = {
 	0, 0, 0, 0, 0,
 	1000000000,
 	933333333, 933333333,
@@ -111,12 +111,12 @@
 	return dove_cpu_freqs[opt];
 }
 
-static const int __initconst dove_cpu_l2_ratios[8][2] = {
+static const int dove_cpu_l2_ratios[8][2] __initconst = {
 	{ 1, 1 }, { 0, 1 }, { 1, 2 }, { 0, 1 },
 	{ 1, 3 }, { 0, 1 }, { 1, 4 }, { 0, 1 }
 };
 
-static const int __initconst dove_cpu_ddr_ratios[16][2] = {
+static const int dove_cpu_ddr_ratios[16][2] __initconst = {
 	{ 1, 1 }, { 0, 1 }, { 1, 2 }, { 2, 5 },
 	{ 1, 3 }, { 0, 1 }, { 1, 4 }, { 0, 1 },
 	{ 1, 5 }, { 0, 1 }, { 1, 6 }, { 0, 1 },
@@ -164,7 +164,7 @@
  * Clock Gating Control
  */
 
-static const struct clk_gating_soc_desc __initconst dove_gating_desc[] = {
+static const struct clk_gating_soc_desc dove_gating_desc[] __initconst = {
 	{ "usb0", NULL, 0, 0 },
 	{ "usb1", NULL, 1, 0 },
 	{ "ge",	"gephy", 2, 0 },
diff --git a/drivers/clk/mvebu/kirkwood.c b/drivers/clk/mvebu/kirkwood.c
index 71d2461..2636a55 100644
--- a/drivers/clk/mvebu/kirkwood.c
+++ b/drivers/clk/mvebu/kirkwood.c
@@ -78,7 +78,7 @@
 
 enum { KIRKWOOD_CPU_TO_L2, KIRKWOOD_CPU_TO_DDR };
 
-static const struct coreclk_ratio __initconst kirkwood_coreclk_ratios[] = {
+static const struct coreclk_ratio kirkwood_coreclk_ratios[] __initconst = {
 	{ .id = KIRKWOOD_CPU_TO_L2, .name = "l2clk", },
 	{ .id = KIRKWOOD_CPU_TO_DDR, .name = "ddrclk", }
 };
@@ -90,7 +90,7 @@
 	return (opt) ? 166666667 : 200000000;
 }
 
-static const u32 __initconst kirkwood_cpu_freqs[] = {
+static const u32 kirkwood_cpu_freqs[] __initconst = {
 	0, 0, 0, 0,
 	600000000,
 	0,
@@ -111,12 +111,12 @@
 	return kirkwood_cpu_freqs[opt];
 }
 
-static const int __initconst kirkwood_cpu_l2_ratios[8][2] = {
+static const int kirkwood_cpu_l2_ratios[8][2] __initconst = {
 	{ 0, 1 }, { 1, 2 }, { 0, 1 }, { 1, 3 },
 	{ 0, 1 }, { 1, 4 }, { 0, 1 }, { 0, 1 }
 };
 
-static const int __initconst kirkwood_cpu_ddr_ratios[16][2] = {
+static const int kirkwood_cpu_ddr_ratios[16][2] __initconst = {
 	{ 0, 1 }, { 0, 1 }, { 1, 2 }, { 0, 1 },
 	{ 1, 3 }, { 0, 1 }, { 1, 4 }, { 2, 9 },
 	{ 1, 5 }, { 1, 6 }, { 0, 1 }, { 0, 1 },
@@ -145,7 +145,7 @@
 	}
 }
 
-static const u32 __initconst mv88f6180_cpu_freqs[] = {
+static const u32 mv88f6180_cpu_freqs[] __initconst = {
 	0, 0, 0, 0, 0,
 	600000000,
 	800000000,
@@ -158,7 +158,7 @@
 	return mv88f6180_cpu_freqs[opt];
 }
 
-static const int __initconst mv88f6180_cpu_ddr_ratios[8][2] = {
+static const int mv88f6180_cpu_ddr_ratios[8][2] __initconst = {
 	{ 0, 1 }, { 0, 1 }, { 0, 1 }, { 0, 1 },
 	{ 0, 1 }, { 1, 3 }, { 1, 4 }, { 1, 5 }
 };
@@ -219,7 +219,7 @@
  * Clock Gating Control
  */
 
-static const struct clk_gating_soc_desc __initconst kirkwood_gating_desc[] = {
+static const struct clk_gating_soc_desc kirkwood_gating_desc[] __initconst = {
 	{ "ge0", NULL, 0, 0 },
 	{ "pex0", NULL, 2, 0 },
 	{ "usb0", NULL, 3, 0 },
diff --git a/drivers/clk/mxs/clk-imx23.c b/drivers/clk/mxs/clk-imx23.c
index f6a7487..c396fe3 100644
--- a/drivers/clk/mxs/clk-imx23.c
+++ b/drivers/clk/mxs/clk-imx23.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/clk.h>
+#include <linux/clk/mxs.h>
 #include <linux/clkdev.h>
 #include <linux/err.h>
 #include <linux/init.h>
diff --git a/drivers/clk/mxs/clk.h b/drivers/clk/mxs/clk.h
index 81421e2..ef10ad9 100644
--- a/drivers/clk/mxs/clk.h
+++ b/drivers/clk/mxs/clk.h
@@ -52,8 +52,8 @@
 		u8 shift, u8 width, const char **parent_names, int num_parents)
 {
 	return clk_register_mux(NULL, name, parent_names, num_parents,
-				CLK_SET_RATE_PARENT, reg, shift, width,
-				0, &mxs_lock);
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
+				reg, shift, width, 0, &mxs_lock);
 }
 
 static inline struct clk *mxs_clk_fixed_factor(const char *name,
diff --git a/drivers/clk/samsung/Makefile b/drivers/clk/samsung/Makefile
index 5d4d432..3413380 100644
--- a/drivers/clk/samsung/Makefile
+++ b/drivers/clk/samsung/Makefile
@@ -8,3 +8,6 @@
 obj-$(CONFIG_SOC_EXYNOS5420)	+= clk-exynos5420.o
 obj-$(CONFIG_SOC_EXYNOS5440)	+= clk-exynos5440.o
 obj-$(CONFIG_ARCH_EXYNOS)	+= clk-exynos-audss.o
+ifdef CONFIG_COMMON_CLK
+obj-$(CONFIG_ARCH_S3C64XX)	+= clk-s3c64xx.o
+endif
diff --git a/drivers/clk/samsung/clk-exynos-audss.c b/drivers/clk/samsung/clk-exynos-audss.c
index 9b1bbd5..39b40aa 100644
--- a/drivers/clk/samsung/clk-exynos-audss.c
+++ b/drivers/clk/samsung/clk-exynos-audss.c
@@ -62,7 +62,7 @@
 #endif /* CONFIG_PM_SLEEP */
 
 /* register exynos_audss clocks */
-void __init exynos_audss_clk_init(struct device_node *np)
+static void __init exynos_audss_clk_init(struct device_node *np)
 {
 	reg_base = of_iomap(np, 0);
 	if (!reg_base) {
@@ -82,11 +82,13 @@
 	of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data);
 
 	clk_table[EXYNOS_MOUT_AUDSS] = clk_register_mux(NULL, "mout_audss",
-				mout_audss_p, ARRAY_SIZE(mout_audss_p), 0,
+				mout_audss_p, ARRAY_SIZE(mout_audss_p),
+				CLK_SET_RATE_NO_REPARENT,
 				reg_base + ASS_CLK_SRC, 0, 1, 0, &lock);
 
 	clk_table[EXYNOS_MOUT_I2S] = clk_register_mux(NULL, "mout_i2s",
-				mout_i2s_p, ARRAY_SIZE(mout_i2s_p), 0,
+				mout_i2s_p, ARRAY_SIZE(mout_i2s_p),
+				CLK_SET_RATE_NO_REPARENT,
 				reg_base + ASS_CLK_SRC, 2, 2, 0, &lock);
 
 	clk_table[EXYNOS_DOUT_SRP] = clk_register_divider(NULL, "dout_srp",
diff --git a/drivers/clk/samsung/clk-exynos4.c b/drivers/clk/samsung/clk-exynos4.c
index 4e57397..ad5ff50 100644
--- a/drivers/clk/samsung/clk-exynos4.c
+++ b/drivers/clk/samsung/clk-exynos4.c
@@ -17,7 +17,6 @@
 #include <linux/of_address.h>
 
 #include "clk.h"
-#include "clk-pll.h"
 
 /* Exynos4 clock controller register offsets */
 #define SRC_LEFTBUS		0x4200
@@ -97,12 +96,15 @@
 #define GATE_IP_PERIL		0xc950
 #define E4210_GATE_IP_PERIR	0xc960
 #define GATE_BLOCK		0xc970
+#define E4X12_MPLL_LOCK		0x10008
 #define E4X12_MPLL_CON0		0x10108
 #define SRC_DMC			0x10200
 #define SRC_MASK_DMC		0x10300
 #define DIV_DMC0		0x10500
 #define DIV_DMC1		0x10504
 #define GATE_IP_DMC		0x10900
+#define APLL_LOCK		0x14000
+#define E4210_MPLL_LOCK		0x14008
 #define APLL_CON0		0x14100
 #define E4210_MPLL_CON0		0x14108
 #define SRC_CPU			0x14200
@@ -121,6 +123,12 @@
 	EXYNOS4X12,
 };
 
+/* list of PLLs to be registered */
+enum exynos4_plls {
+	apll, mpll, epll, vpll,
+	nr_plls			/* number of PLLs */
+};
+
 /*
  * Let each supported clock get a unique id. This id is used to lookup the clock
  * for device tree based platforms. The clocks are categorized into three
@@ -169,7 +177,7 @@
 	gicisp, smmu_isp, smmu_drc, smmu_fd, smmu_lite0, smmu_lite1, mcuctl_isp,
 	mpwm_isp, i2c0_isp, i2c1_isp, mtcadc_isp, pwm_isp, wdt_isp, uart_isp,
 	asyncaxim, smmu_ispcx, spi0_isp, spi1_isp, pwm_isp_sclk, spi0_isp_sclk,
-	spi1_isp_sclk, uart_isp_sclk,
+	spi1_isp_sclk, uart_isp_sclk, tmu_apbif,
 
 	/* mux clocks */
 	mout_fimc0 = 384, mout_fimc1, mout_fimc2, mout_fimc3, mout_cam0,
@@ -187,7 +195,7 @@
  * list of controller registers to be saved and restored during a
  * suspend/resume cycle.
  */
-static __initdata unsigned long exynos4210_clk_save[] = {
+static unsigned long exynos4210_clk_save[] __initdata = {
 	E4210_SRC_IMAGE,
 	E4210_SRC_LCD1,
 	E4210_SRC_MASK_LCD1,
@@ -198,7 +206,7 @@
 	E4210_MPLL_CON0,
 };
 
-static __initdata unsigned long exynos4x12_clk_save[] = {
+static unsigned long exynos4x12_clk_save[] __initdata = {
 	E4X12_GATE_IP_IMAGE,
 	E4X12_GATE_IP_PERIR,
 	E4X12_SRC_CAM1,
@@ -207,7 +215,7 @@
 	E4X12_MPLL_CON0,
 };
 
-static __initdata unsigned long exynos4_clk_regs[] = {
+static unsigned long exynos4_clk_regs[] __initdata = {
 	SRC_LEFTBUS,
 	DIV_LEFTBUS,
 	GATE_IP_LEFTBUS,
@@ -338,24 +346,24 @@
 PNAME(mout_user_aclk266_gps_p4x12) = {"fin_pll", "div_aclk266_gps", };
 
 /* fixed rate clocks generated outside the soc */
-struct samsung_fixed_rate_clock exynos4_fixed_rate_ext_clks[] __initdata = {
+static struct samsung_fixed_rate_clock exynos4_fixed_rate_ext_clks[] __initdata = {
 	FRATE(xxti, "xxti", NULL, CLK_IS_ROOT, 0),
 	FRATE(xusbxti, "xusbxti", NULL, CLK_IS_ROOT, 0),
 };
 
 /* fixed rate clocks generated inside the soc */
-struct samsung_fixed_rate_clock exynos4_fixed_rate_clks[] __initdata = {
+static struct samsung_fixed_rate_clock exynos4_fixed_rate_clks[] __initdata = {
 	FRATE(none, "sclk_hdmi24m", NULL, CLK_IS_ROOT, 24000000),
 	FRATE(none, "sclk_hdmiphy", NULL, CLK_IS_ROOT, 27000000),
 	FRATE(none, "sclk_usbphy0", NULL, CLK_IS_ROOT, 48000000),
 };
 
-struct samsung_fixed_rate_clock exynos4210_fixed_rate_clks[] __initdata = {
+static struct samsung_fixed_rate_clock exynos4210_fixed_rate_clks[] __initdata = {
 	FRATE(none, "sclk_usbphy1", NULL, CLK_IS_ROOT, 48000000),
 };
 
 /* list of mux clocks supported in all exynos4 soc's */
-struct samsung_mux_clock exynos4_mux_clks[] __initdata = {
+static struct samsung_mux_clock exynos4_mux_clks[] __initdata = {
 	MUX_FA(mout_apll, "mout_apll", mout_apll_p, SRC_CPU, 0, 1,
 			CLK_SET_RATE_PARENT, 0, "mout_apll"),
 	MUX(none, "mout_hdmi", mout_hdmi_p, SRC_TV, 0, 1),
@@ -367,17 +375,20 @@
 			CLK_SET_RATE_PARENT, 0),
 	MUX(none, "mout_spdif", mout_spdif_p, SRC_PERIL1, 8, 2),
 	MUX(none, "mout_onenand1", mout_onenand1_p, SRC_TOP0, 0, 1),
-	MUX_A(sclk_epll, "sclk_epll", mout_epll_p, SRC_TOP0, 4, 1, "sclk_epll"),
+	MUX(sclk_epll, "sclk_epll", mout_epll_p, SRC_TOP0, 4, 1),
 	MUX(none, "mout_onenand", mout_onenand_p, SRC_TOP0, 28, 1),
 };
 
 /* list of mux clocks supported in exynos4210 soc */
-struct samsung_mux_clock exynos4210_mux_clks[] __initdata = {
+static struct samsung_mux_clock exynos4210_mux_early[] __initdata = {
+	MUX(none, "mout_vpllsrc", mout_vpllsrc_p, SRC_TOP1, 0, 1),
+};
+
+static struct samsung_mux_clock exynos4210_mux_clks[] __initdata = {
 	MUX(none, "mout_aclk200", sclk_ampll_p4210, SRC_TOP0, 12, 1),
 	MUX(none, "mout_aclk100", sclk_ampll_p4210, SRC_TOP0, 16, 1),
 	MUX(none, "mout_aclk160", sclk_ampll_p4210, SRC_TOP0, 20, 1),
 	MUX(none, "mout_aclk133", sclk_ampll_p4210, SRC_TOP0, 24, 1),
-	MUX(none, "mout_vpllsrc", mout_vpllsrc_p, SRC_TOP1, 0, 1),
 	MUX(none, "mout_mixer", mout_mixer_p4210, SRC_TV, 4, 1),
 	MUX(none, "mout_dac", mout_dac_p4210, SRC_TV, 8, 1),
 	MUX(none, "mout_g2d0", sclk_ampll_p4210, E4210_SRC_IMAGE, 0, 1),
@@ -385,11 +396,9 @@
 	MUX(none, "mout_g2d", mout_g2d_p, E4210_SRC_IMAGE, 8, 1),
 	MUX(none, "mout_fimd1", group1_p4210, E4210_SRC_LCD1, 0, 4),
 	MUX(none, "mout_mipi1", group1_p4210, E4210_SRC_LCD1, 12, 4),
-	MUX_A(sclk_mpll, "sclk_mpll", mout_mpll_p, SRC_CPU, 8, 1, "mout_mpll"),
-	MUX_A(mout_core, "mout_core", mout_core_p4210,
-			SRC_CPU, 16, 1, "moutcore"),
-	MUX_A(sclk_vpll, "sclk_vpll", sclk_vpll_p4210,
-			SRC_TOP0, 8, 1, "sclk_vpll"),
+	MUX(sclk_mpll, "sclk_mpll", mout_mpll_p, SRC_CPU, 8, 1),
+	MUX(mout_core, "mout_core", mout_core_p4210, SRC_CPU, 16, 1),
+	MUX(sclk_vpll, "sclk_vpll", sclk_vpll_p4210, SRC_TOP0, 8, 1),
 	MUX(mout_fimc0, "mout_fimc0", group1_p4210, SRC_CAM, 0, 4),
 	MUX(mout_fimc1, "mout_fimc1", group1_p4210, SRC_CAM, 4, 4),
 	MUX(mout_fimc2, "mout_fimc2", group1_p4210, SRC_CAM, 8, 4),
@@ -423,9 +432,9 @@
 };
 
 /* list of mux clocks supported in exynos4x12 soc */
-struct samsung_mux_clock exynos4x12_mux_clks[] __initdata = {
-	MUX_A(mout_mpll_user_c, "mout_mpll_user_c", mout_mpll_user_p4x12,
-			SRC_CPU, 24, 1, "mout_mpll"),
+static struct samsung_mux_clock exynos4x12_mux_clks[] __initdata = {
+	MUX(mout_mpll_user_c, "mout_mpll_user_c", mout_mpll_user_p4x12,
+			SRC_CPU, 24, 1),
 	MUX(none, "mout_aclk266_gps", aclk_p4412, SRC_TOP1, 4, 1),
 	MUX(none, "mout_aclk400_mcuisp", aclk_p4412, SRC_TOP1, 8, 1),
 	MUX(mout_mpll_user_t, "mout_mpll_user_t", mout_mpll_user_p4x12,
@@ -445,12 +454,9 @@
 	MUX(none, "mout_jpeg0", sclk_ampll_p4x12, E4X12_SRC_CAM1, 0, 1),
 	MUX(none, "mout_jpeg1", sclk_evpll_p, E4X12_SRC_CAM1, 4, 1),
 	MUX(none, "mout_jpeg", mout_jpeg_p, E4X12_SRC_CAM1, 8, 1),
-	MUX_A(sclk_mpll, "sclk_mpll", mout_mpll_p,
-			SRC_DMC, 12, 1, "sclk_mpll"),
-	MUX_A(sclk_vpll, "sclk_vpll", mout_vpll_p,
-			SRC_TOP0, 8, 1, "sclk_vpll"),
-	MUX_A(mout_core, "mout_core", mout_core_p4x12,
-			SRC_CPU, 16, 1, "moutcore"),
+	MUX(sclk_mpll, "sclk_mpll", mout_mpll_p, SRC_DMC, 12, 1),
+	MUX(sclk_vpll, "sclk_vpll", mout_vpll_p, SRC_TOP0, 8, 1),
+	MUX(mout_core, "mout_core", mout_core_p4x12, SRC_CPU, 16, 1),
 	MUX(mout_fimc0, "mout_fimc0", group1_p4x12, SRC_CAM, 0, 4),
 	MUX(mout_fimc1, "mout_fimc1", group1_p4x12, SRC_CAM, 4, 4),
 	MUX(mout_fimc2, "mout_fimc2", group1_p4x12, SRC_CAM, 8, 4),
@@ -491,7 +497,7 @@
 };
 
 /* list of divider clocks supported in all exynos4 soc's */
-struct samsung_div_clock exynos4_div_clks[] __initdata = {
+static struct samsung_div_clock exynos4_div_clks[] __initdata = {
 	DIV(none, "div_core", "mout_core", DIV_CPU0, 0, 3),
 	DIV(none, "div_core2", "div_core", DIV_CPU0, 28, 3),
 	DIV(none, "div_fimc0", "mout_fimc0", DIV_CAM, 0, 4),
@@ -538,9 +544,8 @@
 	DIV(none, "div_spi_pre2", "div_spi2", DIV_PERIL2, 8, 8),
 	DIV(none, "div_audio1", "mout_audio1", DIV_PERIL4, 0, 4),
 	DIV(none, "div_audio2", "mout_audio2", DIV_PERIL4, 16, 4),
-	DIV_A(arm_clk, "arm_clk", "div_core2", DIV_CPU0, 28, 3, "armclk"),
-	DIV_A(sclk_apll, "sclk_apll", "mout_apll",
-			DIV_CPU0, 24, 3, "sclk_apll"),
+	DIV(arm_clk, "arm_clk", "div_core2", DIV_CPU0, 28, 3),
+	DIV(sclk_apll, "sclk_apll", "mout_apll", DIV_CPU0, 24, 3),
 	DIV_F(none, "div_mipi_pre0", "div_mipi0", DIV_LCD0, 20, 4,
 			CLK_SET_RATE_PARENT, 0),
 	DIV_F(none, "div_mmc_pre0", "div_mmc0", DIV_FSYS1, 8, 8,
@@ -554,7 +559,7 @@
 };
 
 /* list of divider clocks supported in exynos4210 soc */
-struct samsung_div_clock exynos4210_div_clks[] __initdata = {
+static struct samsung_div_clock exynos4210_div_clks[] __initdata = {
 	DIV(aclk200, "aclk200", "mout_aclk200", DIV_TOP, 0, 3),
 	DIV(sclk_fimg2d, "sclk_fimg2d", "mout_g2d", DIV_IMAGE, 0, 4),
 	DIV(none, "div_fimd1", "mout_fimd1", E4210_DIV_LCD1, 0, 4),
@@ -565,7 +570,7 @@
 };
 
 /* list of divider clocks supported in exynos4x12 soc */
-struct samsung_div_clock exynos4x12_div_clks[] __initdata = {
+static struct samsung_div_clock exynos4x12_div_clks[] __initdata = {
 	DIV(none, "div_mdnie0", "mout_mdnie0", DIV_LCD0, 4, 4),
 	DIV(none, "div_mdnie_pwm0", "mout_mdnie_pwm0", DIV_LCD0, 8, 4),
 	DIV(none, "div_mdnie_pwm_pre0", "div_mdnie_pwm0", DIV_LCD0, 12, 4),
@@ -594,7 +599,7 @@
 };
 
 /* list of gate clocks supported in all exynos4 soc's */
-struct samsung_gate_clock exynos4_gate_clks[] __initdata = {
+static struct samsung_gate_clock exynos4_gate_clks[] __initdata = {
 	/*
 	 * After all Exynos4 based platforms are migrated to use device tree,
 	 * the device name and clock alias names specified below for some
@@ -629,164 +634,151 @@
 			CLK_SET_RATE_PARENT, 0),
 	GATE(sclk_audio1, "sclk_audio1", "div_audio1", SRC_MASK_PERIL1, 0,
 			CLK_SET_RATE_PARENT, 0),
-	GATE_D(vp, "s5p-mixer", "vp", "aclk160", GATE_IP_TV, 0, 0, 0),
-	GATE_D(mixer, "s5p-mixer", "mixer", "aclk160", GATE_IP_TV, 1, 0, 0),
-	GATE_D(hdmi, "exynos4-hdmi", "hdmi", "aclk160", GATE_IP_TV, 3, 0, 0),
-	GATE_A(pwm, "pwm", "aclk100", GATE_IP_PERIL, 24, 0, 0, "timers"),
-	GATE_A(sdmmc4, "sdmmc4", "aclk133", GATE_IP_FSYS, 9, 0, 0, "biu"),
-	GATE_A(usb_host, "usb_host", "aclk133",
-			GATE_IP_FSYS, 12, 0, 0, "usbhost"),
-	GATE_DA(sclk_fimc0, "exynos4-fimc.0", "sclk_fimc0", "div_fimc0",
-			SRC_MASK_CAM, 0, CLK_SET_RATE_PARENT, 0, "sclk_fimc"),
-	GATE_DA(sclk_fimc1, "exynos4-fimc.1", "sclk_fimc1", "div_fimc1",
-			SRC_MASK_CAM, 4, CLK_SET_RATE_PARENT, 0, "sclk_fimc"),
-	GATE_DA(sclk_fimc2, "exynos4-fimc.2", "sclk_fimc2", "div_fimc2",
-			SRC_MASK_CAM, 8, CLK_SET_RATE_PARENT, 0, "sclk_fimc"),
-	GATE_DA(sclk_fimc3, "exynos4-fimc.3", "sclk_fimc3", "div_fimc3",
-			SRC_MASK_CAM, 12, CLK_SET_RATE_PARENT, 0, "sclk_fimc"),
-	GATE_DA(sclk_csis0, "s5p-mipi-csis.0", "sclk_csis0", "div_csis0",
-			SRC_MASK_CAM, 24, CLK_SET_RATE_PARENT, 0, "sclk_csis"),
-	GATE_DA(sclk_csis1, "s5p-mipi-csis.1", "sclk_csis1", "div_csis1",
-			SRC_MASK_CAM, 28, CLK_SET_RATE_PARENT, 0, "sclk_csis"),
-	GATE_DA(sclk_fimd0, "exynos4-fb.0", "sclk_fimd0", "div_fimd0",
-			SRC_MASK_LCD0, 0, CLK_SET_RATE_PARENT, 0, "sclk_fimd"),
-	GATE_DA(sclk_mmc0, "exynos4-sdhci.0", "sclk_mmc0", "div_mmc_pre0",
-			SRC_MASK_FSYS, 0, CLK_SET_RATE_PARENT, 0,
-			"mmc_busclk.2"),
-	GATE_DA(sclk_mmc1, "exynos4-sdhci.1", "sclk_mmc1", "div_mmc_pre1",
-			SRC_MASK_FSYS, 4, CLK_SET_RATE_PARENT, 0,
-			"mmc_busclk.2"),
-	GATE_DA(sclk_mmc2, "exynos4-sdhci.2", "sclk_mmc2", "div_mmc_pre2",
-			SRC_MASK_FSYS, 8, CLK_SET_RATE_PARENT, 0,
-			"mmc_busclk.2"),
-	GATE_DA(sclk_mmc3, "exynos4-sdhci.3", "sclk_mmc3", "div_mmc_pre3",
-			SRC_MASK_FSYS, 12, CLK_SET_RATE_PARENT, 0,
-			"mmc_busclk.2"),
-	GATE_DA(sclk_mmc4, NULL, "sclk_mmc4", "div_mmc_pre4",
-			SRC_MASK_FSYS, 16, CLK_SET_RATE_PARENT, 0, "ciu"),
-	GATE_DA(sclk_uart0, "exynos4210-uart.0", "uclk0", "div_uart0",
-			SRC_MASK_PERIL0, 0, CLK_SET_RATE_PARENT,
-			0, "clk_uart_baud0"),
-	GATE_DA(sclk_uart1, "exynos4210-uart.1", "uclk1", "div_uart1",
-			SRC_MASK_PERIL0, 4, CLK_SET_RATE_PARENT,
-			0, "clk_uart_baud0"),
-	GATE_DA(sclk_uart2, "exynos4210-uart.2", "uclk2", "div_uart2",
-			SRC_MASK_PERIL0, 8, CLK_SET_RATE_PARENT,
-			0, "clk_uart_baud0"),
-	GATE_DA(sclk_uart3, "exynos4210-uart.3", "uclk3", "div_uart3",
-			SRC_MASK_PERIL0, 12, CLK_SET_RATE_PARENT,
-			0, "clk_uart_baud0"),
-	GATE_DA(sclk_uart4, "exynos4210-uart.4", "uclk4", "div_uart4",
-			SRC_MASK_PERIL0, 16, CLK_SET_RATE_PARENT,
-			0, "clk_uart_baud0"),
+	GATE(vp, "vp", "aclk160", GATE_IP_TV, 0, 0, 0),
+	GATE(mixer, "mixer", "aclk160", GATE_IP_TV, 1, 0, 0),
+	GATE(hdmi, "hdmi", "aclk160", GATE_IP_TV, 3, 0, 0),
+	GATE(pwm, "pwm", "aclk100", GATE_IP_PERIL, 24, 0, 0),
+	GATE(sdmmc4, "sdmmc4", "aclk133", GATE_IP_FSYS, 9, 0, 0),
+	GATE(usb_host, "usb_host", "aclk133", GATE_IP_FSYS, 12, 0, 0),
+	GATE(sclk_fimc0, "sclk_fimc0", "div_fimc0", SRC_MASK_CAM, 0,
+			CLK_SET_RATE_PARENT, 0),
+	GATE(sclk_fimc1, "sclk_fimc1", "div_fimc1", SRC_MASK_CAM, 4,
+			CLK_SET_RATE_PARENT, 0),
+	GATE(sclk_fimc2, "sclk_fimc2", "div_fimc2", SRC_MASK_CAM, 8,
+			CLK_SET_RATE_PARENT, 0),
+	GATE(sclk_fimc3, "sclk_fimc3", "div_fimc3", SRC_MASK_CAM, 12,
+			CLK_SET_RATE_PARENT, 0),
+	GATE(sclk_csis0, "sclk_csis0", "div_csis0", SRC_MASK_CAM, 24,
+			CLK_SET_RATE_PARENT, 0),
+	GATE(sclk_csis1, "sclk_csis1", "div_csis1", SRC_MASK_CAM, 28,
+			CLK_SET_RATE_PARENT, 0),
+	GATE(sclk_fimd0, "sclk_fimd0", "div_fimd0", SRC_MASK_LCD0, 0,
+			CLK_SET_RATE_PARENT, 0),
+	GATE(sclk_mmc0, "sclk_mmc0", "div_mmc_pre0", SRC_MASK_FSYS, 0,
+			CLK_SET_RATE_PARENT, 0),
+	GATE(sclk_mmc1, "sclk_mmc1", "div_mmc_pre1", SRC_MASK_FSYS, 4,
+			CLK_SET_RATE_PARENT, 0),
+	GATE(sclk_mmc2, "sclk_mmc2", "div_mmc_pre2", SRC_MASK_FSYS, 8,
+			CLK_SET_RATE_PARENT, 0),
+	GATE(sclk_mmc3, "sclk_mmc3", "div_mmc_pre3", SRC_MASK_FSYS, 12,
+			CLK_SET_RATE_PARENT, 0),
+	GATE(sclk_mmc4, "sclk_mmc4", "div_mmc_pre4", SRC_MASK_FSYS, 16,
+			CLK_SET_RATE_PARENT, 0),
+	GATE(sclk_uart0, "uclk0", "div_uart0", SRC_MASK_PERIL0, 0,
+			CLK_SET_RATE_PARENT, 0),
+	GATE(sclk_uart1, "uclk1", "div_uart1", SRC_MASK_PERIL0, 4,
+			CLK_SET_RATE_PARENT, 0),
+	GATE(sclk_uart2, "uclk2", "div_uart2", SRC_MASK_PERIL0, 8,
+			CLK_SET_RATE_PARENT, 0),
+	GATE(sclk_uart3, "uclk3", "div_uart3", SRC_MASK_PERIL0, 12,
+			CLK_SET_RATE_PARENT, 0),
+	GATE(sclk_uart4, "uclk4", "div_uart4", SRC_MASK_PERIL0, 16,
+			CLK_SET_RATE_PARENT, 0),
 	GATE(sclk_audio2, "sclk_audio2", "div_audio2", SRC_MASK_PERIL1, 4,
 			CLK_SET_RATE_PARENT, 0),
-	GATE_DA(sclk_spi0, "exynos4210-spi.0", "sclk_spi0", "div_spi_pre0",
-			SRC_MASK_PERIL1, 16, CLK_SET_RATE_PARENT,
-			0, "spi_busclk0"),
-	GATE_DA(sclk_spi1, "exynos4210-spi.1", "sclk_spi1", "div_spi_pre1",
-			SRC_MASK_PERIL1, 20, CLK_SET_RATE_PARENT,
-			0, "spi_busclk0"),
-	GATE_DA(sclk_spi2, "exynos4210-spi.2", "sclk_spi2", "div_spi_pre2",
-			SRC_MASK_PERIL1, 24, CLK_SET_RATE_PARENT,
-			0, "spi_busclk0"),
-	GATE_DA(fimc0, "exynos4-fimc.0", "fimc0", "aclk160",
-			GATE_IP_CAM, 0, 0, 0, "fimc"),
-	GATE_DA(fimc1, "exynos4-fimc.1", "fimc1", "aclk160",
-			GATE_IP_CAM, 1, 0, 0, "fimc"),
-	GATE_DA(fimc2, "exynos4-fimc.2", "fimc2", "aclk160",
-			GATE_IP_CAM, 2, 0, 0, "fimc"),
-	GATE_DA(fimc3, "exynos4-fimc.3", "fimc3", "aclk160",
-			GATE_IP_CAM, 3, 0, 0, "fimc"),
-	GATE_DA(csis0, "s5p-mipi-csis.0", "csis0", "aclk160",
-			GATE_IP_CAM, 4, 0, 0, "fimc"),
-	GATE_DA(csis1, "s5p-mipi-csis.1", "csis1", "aclk160",
-			GATE_IP_CAM, 5, 0, 0, "fimc"),
-	GATE_DA(smmu_fimc0, "exynos-sysmmu.5", "smmu_fimc0", "aclk160",
-			GATE_IP_CAM, 7, 0, 0, "sysmmu"),
-	GATE_DA(smmu_fimc1, "exynos-sysmmu.6", "smmu_fimc1", "aclk160",
-			GATE_IP_CAM, 8, 0, 0, "sysmmu"),
-	GATE_DA(smmu_fimc2, "exynos-sysmmu.7", "smmu_fimc2", "aclk160",
-			GATE_IP_CAM, 9, 0, 0, "sysmmu"),
-	GATE_DA(smmu_fimc3, "exynos-sysmmu.8", "smmu_fimc3", "aclk160",
-			GATE_IP_CAM, 10, 0, 0, "sysmmu"),
-	GATE_DA(smmu_jpeg, "exynos-sysmmu.3", "smmu_jpeg", "aclk160",
-			GATE_IP_CAM, 11, 0, 0, "sysmmu"),
+	GATE(sclk_spi0, "sclk_spi0", "div_spi_pre0", SRC_MASK_PERIL1, 16,
+			CLK_SET_RATE_PARENT, 0),
+	GATE(sclk_spi1, "sclk_spi1", "div_spi_pre1", SRC_MASK_PERIL1, 20,
+			CLK_SET_RATE_PARENT, 0),
+	GATE(sclk_spi2, "sclk_spi2", "div_spi_pre2", SRC_MASK_PERIL1, 24,
+			CLK_SET_RATE_PARENT, 0),
+	GATE(fimc0, "fimc0", "aclk160", GATE_IP_CAM, 0,
+			0, 0),
+	GATE(fimc1, "fimc1", "aclk160", GATE_IP_CAM, 1,
+			0, 0),
+	GATE(fimc2, "fimc2", "aclk160", GATE_IP_CAM, 2,
+			0, 0),
+	GATE(fimc3, "fimc3", "aclk160", GATE_IP_CAM, 3,
+			0, 0),
+	GATE(csis0, "csis0", "aclk160", GATE_IP_CAM, 4,
+			0, 0),
+	GATE(csis1, "csis1", "aclk160", GATE_IP_CAM, 5,
+			0, 0),
+	GATE(smmu_fimc0, "smmu_fimc0", "aclk160", GATE_IP_CAM, 7,
+			0, 0),
+	GATE(smmu_fimc1, "smmu_fimc1", "aclk160", GATE_IP_CAM, 8,
+			0, 0),
+	GATE(smmu_fimc2, "smmu_fimc2", "aclk160", GATE_IP_CAM, 9,
+			0, 0),
+	GATE(smmu_fimc3, "smmu_fimc3", "aclk160", GATE_IP_CAM, 10,
+			0, 0),
+	GATE(smmu_jpeg, "smmu_jpeg", "aclk160", GATE_IP_CAM, 11,
+			0, 0),
 	GATE(pixelasyncm0, "pxl_async0", "aclk160", GATE_IP_CAM, 17, 0, 0),
 	GATE(pixelasyncm1, "pxl_async1", "aclk160", GATE_IP_CAM, 18, 0, 0),
-	GATE_DA(smmu_tv, "exynos-sysmmu.2", "smmu_tv", "aclk160",
-			GATE_IP_TV, 4, 0, 0, "sysmmu"),
-	GATE_DA(mfc, "s5p-mfc", "mfc", "aclk100", GATE_IP_MFC, 0, 0, 0, "mfc"),
-	GATE_DA(smmu_mfcl, "exynos-sysmmu.0", "smmu_mfcl", "aclk100",
-			GATE_IP_MFC, 1, 0, 0, "sysmmu"),
-	GATE_DA(smmu_mfcr, "exynos-sysmmu.1", "smmu_mfcr", "aclk100",
-			GATE_IP_MFC, 2, 0, 0, "sysmmu"),
-	GATE_DA(fimd0, "exynos4-fb.0", "fimd0", "aclk160",
-			GATE_IP_LCD0, 0, 0, 0, "fimd"),
-	GATE_DA(smmu_fimd0, "exynos-sysmmu.10", "smmu_fimd0", "aclk160",
-			GATE_IP_LCD0, 4, 0, 0, "sysmmu"),
-	GATE_DA(pdma0, "dma-pl330.0", "pdma0", "aclk133",
-			GATE_IP_FSYS, 0, 0, 0, "dma"),
-	GATE_DA(pdma1, "dma-pl330.1", "pdma1", "aclk133",
-			GATE_IP_FSYS, 1, 0, 0, "dma"),
-	GATE_DA(sdmmc0, "exynos4-sdhci.0", "sdmmc0", "aclk133",
-			GATE_IP_FSYS, 5, 0, 0, "hsmmc"),
-	GATE_DA(sdmmc1, "exynos4-sdhci.1", "sdmmc1", "aclk133",
-			GATE_IP_FSYS, 6, 0, 0, "hsmmc"),
-	GATE_DA(sdmmc2, "exynos4-sdhci.2", "sdmmc2", "aclk133",
-			GATE_IP_FSYS, 7, 0, 0, "hsmmc"),
-	GATE_DA(sdmmc3, "exynos4-sdhci.3", "sdmmc3", "aclk133",
-			GATE_IP_FSYS, 8, 0, 0, "hsmmc"),
-	GATE_DA(uart0, "exynos4210-uart.0", "uart0", "aclk100",
-			GATE_IP_PERIL, 0, 0, 0, "uart"),
-	GATE_DA(uart1, "exynos4210-uart.1", "uart1", "aclk100",
-			GATE_IP_PERIL, 1, 0, 0, "uart"),
-	GATE_DA(uart2, "exynos4210-uart.2", "uart2", "aclk100",
-			GATE_IP_PERIL, 2, 0, 0, "uart"),
-	GATE_DA(uart3, "exynos4210-uart.3", "uart3", "aclk100",
-			GATE_IP_PERIL, 3, 0, 0, "uart"),
-	GATE_DA(uart4, "exynos4210-uart.4", "uart4", "aclk100",
-			GATE_IP_PERIL, 4, 0, 0, "uart"),
-	GATE_DA(i2c0, "s3c2440-i2c.0", "i2c0", "aclk100",
-			GATE_IP_PERIL, 6, 0, 0, "i2c"),
-	GATE_DA(i2c1, "s3c2440-i2c.1", "i2c1", "aclk100",
-			GATE_IP_PERIL, 7, 0, 0, "i2c"),
-	GATE_DA(i2c2, "s3c2440-i2c.2", "i2c2", "aclk100",
-			GATE_IP_PERIL, 8, 0, 0, "i2c"),
-	GATE_DA(i2c3, "s3c2440-i2c.3", "i2c3", "aclk100",
-			GATE_IP_PERIL, 9, 0, 0, "i2c"),
-	GATE_DA(i2c4, "s3c2440-i2c.4", "i2c4", "aclk100",
-			GATE_IP_PERIL, 10, 0, 0, "i2c"),
-	GATE_DA(i2c5, "s3c2440-i2c.5", "i2c5", "aclk100",
-			GATE_IP_PERIL, 11, 0, 0, "i2c"),
-	GATE_DA(i2c6, "s3c2440-i2c.6", "i2c6", "aclk100",
-			GATE_IP_PERIL, 12, 0, 0, "i2c"),
-	GATE_DA(i2c7, "s3c2440-i2c.7", "i2c7", "aclk100",
-			GATE_IP_PERIL, 13, 0, 0, "i2c"),
-	GATE_DA(i2c_hdmi, "s3c2440-hdmiphy-i2c", "i2c-hdmi", "aclk100",
-			GATE_IP_PERIL, 14, 0, 0, "i2c"),
-	GATE_DA(spi0, "exynos4210-spi.0", "spi0", "aclk100",
-			GATE_IP_PERIL, 16, 0, 0, "spi"),
-	GATE_DA(spi1, "exynos4210-spi.1", "spi1", "aclk100",
-			GATE_IP_PERIL, 17, 0, 0, "spi"),
-	GATE_DA(spi2, "exynos4210-spi.2", "spi2", "aclk100",
-			GATE_IP_PERIL, 18, 0, 0, "spi"),
-	GATE_DA(i2s1, "samsung-i2s.1", "i2s1", "aclk100",
-			GATE_IP_PERIL, 20, 0, 0, "iis"),
-	GATE_DA(i2s2, "samsung-i2s.2", "i2s2", "aclk100",
-			GATE_IP_PERIL, 21, 0, 0, "iis"),
-	GATE_DA(pcm1, "samsung-pcm.1", "pcm1", "aclk100",
-			GATE_IP_PERIL, 22, 0, 0, "pcm"),
-	GATE_DA(pcm2, "samsung-pcm.2", "pcm2", "aclk100",
-			GATE_IP_PERIL, 23, 0, 0, "pcm"),
-	GATE_DA(spdif, "samsung-spdif", "spdif", "aclk100",
-			GATE_IP_PERIL, 26, 0, 0, "spdif"),
-	GATE_DA(ac97, "samsung-ac97", "ac97", "aclk100",
-			GATE_IP_PERIL, 27, 0, 0, "ac97"),
+	GATE(smmu_tv, "smmu_tv", "aclk160", GATE_IP_TV, 4,
+			0, 0),
+	GATE(mfc, "mfc", "aclk100", GATE_IP_MFC, 0, 0, 0),
+	GATE(smmu_mfcl, "smmu_mfcl", "aclk100", GATE_IP_MFC, 1,
+			0, 0),
+	GATE(smmu_mfcr, "smmu_mfcr", "aclk100", GATE_IP_MFC, 2,
+			0, 0),
+	GATE(fimd0, "fimd0", "aclk160", GATE_IP_LCD0, 0,
+			0, 0),
+	GATE(smmu_fimd0, "smmu_fimd0", "aclk160", GATE_IP_LCD0, 4,
+			0, 0),
+	GATE(pdma0, "pdma0", "aclk133", GATE_IP_FSYS, 0,
+			0, 0),
+	GATE(pdma1, "pdma1", "aclk133", GATE_IP_FSYS, 1,
+			0, 0),
+	GATE(sdmmc0, "sdmmc0", "aclk133", GATE_IP_FSYS, 5,
+			0, 0),
+	GATE(sdmmc1, "sdmmc1", "aclk133", GATE_IP_FSYS, 6,
+			0, 0),
+	GATE(sdmmc2, "sdmmc2", "aclk133", GATE_IP_FSYS, 7,
+			0, 0),
+	GATE(sdmmc3, "sdmmc3", "aclk133", GATE_IP_FSYS, 8,
+			0, 0),
+	GATE(uart0, "uart0", "aclk100", GATE_IP_PERIL, 0,
+			0, 0),
+	GATE(uart1, "uart1", "aclk100", GATE_IP_PERIL, 1,
+			0, 0),
+	GATE(uart2, "uart2", "aclk100", GATE_IP_PERIL, 2,
+			0, 0),
+	GATE(uart3, "uart3", "aclk100", GATE_IP_PERIL, 3,
+			0, 0),
+	GATE(uart4, "uart4", "aclk100", GATE_IP_PERIL, 4,
+			0, 0),
+	GATE(i2c0, "i2c0", "aclk100", GATE_IP_PERIL, 6,
+			0, 0),
+	GATE(i2c1, "i2c1", "aclk100", GATE_IP_PERIL, 7,
+			0, 0),
+	GATE(i2c2, "i2c2", "aclk100", GATE_IP_PERIL, 8,
+			0, 0),
+	GATE(i2c3, "i2c3", "aclk100", GATE_IP_PERIL, 9,
+			0, 0),
+	GATE(i2c4, "i2c4", "aclk100", GATE_IP_PERIL, 10,
+			0, 0),
+	GATE(i2c5, "i2c5", "aclk100", GATE_IP_PERIL, 11,
+			0, 0),
+	GATE(i2c6, "i2c6", "aclk100", GATE_IP_PERIL, 12,
+			0, 0),
+	GATE(i2c7, "i2c7", "aclk100", GATE_IP_PERIL, 13,
+			0, 0),
+	GATE(i2c_hdmi, "i2c-hdmi", "aclk100", GATE_IP_PERIL, 14,
+			0, 0),
+	GATE(spi0, "spi0", "aclk100", GATE_IP_PERIL, 16,
+			0, 0),
+	GATE(spi1, "spi1", "aclk100", GATE_IP_PERIL, 17,
+			0, 0),
+	GATE(spi2, "spi2", "aclk100", GATE_IP_PERIL, 18,
+			0, 0),
+	GATE(i2s1, "i2s1", "aclk100", GATE_IP_PERIL, 20,
+			0, 0),
+	GATE(i2s2, "i2s2", "aclk100", GATE_IP_PERIL, 21,
+			0, 0),
+	GATE(pcm1, "pcm1", "aclk100", GATE_IP_PERIL, 22,
+			0, 0),
+	GATE(pcm2, "pcm2", "aclk100", GATE_IP_PERIL, 23,
+			0, 0),
+	GATE(spdif, "spdif", "aclk100", GATE_IP_PERIL, 26,
+			0, 0),
+	GATE(ac97, "ac97", "aclk100", GATE_IP_PERIL, 27,
+			0, 0),
 };
 
 /* list of gate clocks supported in exynos4210 soc */
-struct samsung_gate_clock exynos4210_gate_clks[] __initdata = {
+static struct samsung_gate_clock exynos4210_gate_clks[] __initdata = {
 	GATE(tvenc, "tvenc", "aclk160", GATE_IP_TV, 2, 0, 0),
 	GATE(g2d, "g2d", "aclk200", E4210_GATE_IP_IMAGE, 0, 0, 0),
 	GATE(rotator, "rotator", "aclk200", E4210_GATE_IP_IMAGE, 1, 0, 0),
@@ -811,17 +803,23 @@
 			SRC_MASK_FSYS, 24, CLK_SET_RATE_PARENT, 0),
 	GATE(sclk_mixer, "sclk_mixer", "mout_mixer", SRC_MASK_TV, 4, 0, 0),
 	GATE(sclk_dac, "sclk_dac", "mout_dac", SRC_MASK_TV, 8, 0, 0),
-	GATE_A(tsadc, "tsadc", "aclk100", GATE_IP_PERIL, 15, 0, 0, "adc"),
-	GATE_A(mct, "mct", "aclk100", E4210_GATE_IP_PERIR, 13, 0, 0, "mct"),
-	GATE_A(wdt, "watchdog", "aclk100", E4210_GATE_IP_PERIR, 14, 0, 0, "watchdog"),
-	GATE_A(rtc, "rtc", "aclk100", E4210_GATE_IP_PERIR, 15, 0, 0, "rtc"),
-	GATE_A(keyif, "keyif", "aclk100", E4210_GATE_IP_PERIR, 16, 0, 0, "keypad"),
-	GATE_DA(sclk_fimd1, "exynos4-fb.1", "sclk_fimd1", "div_fimd1",
-			E4210_SRC_MASK_LCD1, 0, CLK_SET_RATE_PARENT, 0, "sclk_fimd"),
+	GATE(tsadc, "tsadc", "aclk100", GATE_IP_PERIL, 15,
+			0, 0),
+	GATE(mct, "mct", "aclk100", E4210_GATE_IP_PERIR, 13,
+			0, 0),
+	GATE(wdt, "watchdog", "aclk100", E4210_GATE_IP_PERIR, 14,
+			0, 0),
+	GATE(rtc, "rtc", "aclk100", E4210_GATE_IP_PERIR, 15,
+			0, 0),
+	GATE(keyif, "keyif", "aclk100", E4210_GATE_IP_PERIR, 16,
+			0, 0),
+	GATE(sclk_fimd1, "sclk_fimd1", "div_fimd1", E4210_SRC_MASK_LCD1, 0,
+			CLK_SET_RATE_PARENT, 0),
+	GATE(tmu_apbif, "tmu_apbif", "aclk100", E4210_GATE_IP_PERIR, 17, 0, 0),
 };
 
 /* list of gate clocks supported in exynos4x12 soc */
-struct samsung_gate_clock exynos4x12_gate_clks[] __initdata = {
+static struct samsung_gate_clock exynos4x12_gate_clks[] __initdata = {
 	GATE(audss, "audss", "sclk_epll", E4X12_GATE_IP_MAUDIO, 0, 0, 0),
 	GATE(mdnie0, "mdnie0", "aclk160", GATE_IP_LCD0, 2, 0, 0),
 	GATE(rotator, "rotator", "aclk200", E4X12_GATE_IP_IMAGE, 1, 0, 0),
@@ -840,10 +838,11 @@
 			SRC_MASK_FSYS, 24, CLK_SET_RATE_PARENT, 0),
 	GATE(smmu_rotator, "smmu_rotator", "aclk200",
 			E4X12_GATE_IP_IMAGE, 4, 0, 0),
-	GATE_A(mct, "mct", "aclk100", E4X12_GATE_IP_PERIR, 13, 0, 0, "mct"),
-	GATE_A(rtc, "rtc", "aclk100", E4X12_GATE_IP_PERIR, 15, 0, 0, "rtc"),
-	GATE_A(keyif, "keyif", "aclk100",
-			E4X12_GATE_IP_PERIR, 16, 0, 0, "keypad"),
+	GATE(mct, "mct", "aclk100", E4X12_GATE_IP_PERIR, 13,
+			0, 0),
+	GATE(rtc, "rtc", "aclk100", E4X12_GATE_IP_PERIR, 15,
+			0, 0),
+	GATE(keyif, "keyif", "aclk100", E4X12_GATE_IP_PERIR, 16, 0, 0),
 	GATE(sclk_pwm_isp, "sclk_pwm_isp", "div_pwm_isp",
 			E4X12_SRC_MASK_ISP, 0, CLK_SET_RATE_PARENT, 0),
 	GATE(sclk_spi0_isp, "sclk_spi0_isp", "div_spi0_isp_pre",
@@ -860,12 +859,11 @@
 			E4X12_GATE_IP_ISP, 2, 0, 0),
 	GATE(uart_isp_sclk, "uart_isp_sclk", "sclk_uart_isp",
 			E4X12_GATE_IP_ISP, 3, 0, 0),
-	GATE_A(wdt, "watchdog", "aclk100",
-			E4X12_GATE_IP_PERIR, 14, 0, 0, "watchdog"),
-	GATE_DA(pcm0, "samsung-pcm.0", "pcm0", "aclk100",
-			E4X12_GATE_IP_MAUDIO, 2, 0, 0, "pcm"),
-	GATE_DA(i2s0, "samsung-i2s.0", "i2s0", "aclk100",
-			E4X12_GATE_IP_MAUDIO, 3, 0, 0, "iis"),
+	GATE(wdt, "watchdog", "aclk100", E4X12_GATE_IP_PERIR, 14, 0, 0),
+	GATE(pcm0, "pcm0", "aclk100", E4X12_GATE_IP_MAUDIO, 2,
+			0, 0),
+	GATE(i2s0, "i2s0", "aclk100", E4X12_GATE_IP_MAUDIO, 3,
+			0, 0),
 	GATE(fimc_isp, "isp", "aclk200", E4X12_GATE_ISP0, 0,
 			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(fimc_drc, "drc", "aclk200", E4X12_GATE_ISP0, 1,
@@ -919,6 +917,21 @@
 	GATE(spi1_isp, "spi1_isp", "aclk200", E4X12_GATE_ISP1, 13,
 			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(g2d, "g2d", "aclk200", GATE_IP_DMC, 23, 0, 0),
+	GATE(tmu_apbif, "tmu_apbif", "aclk100", E4X12_GATE_IP_PERIR, 17, 0, 0),
+};
+
+static struct samsung_clock_alias exynos4_aliases[] __initdata = {
+	ALIAS(mout_core, NULL, "moutcore"),
+	ALIAS(arm_clk, NULL, "armclk"),
+	ALIAS(sclk_apll, NULL, "mout_apll"),
+};
+
+static struct samsung_clock_alias exynos4210_aliases[] __initdata = {
+	ALIAS(sclk_mpll, NULL, "mout_mpll"),
+};
+
+static struct samsung_clock_alias exynos4x12_aliases[] __initdata = {
+	ALIAS(mout_mpll_user_c, NULL, "mout_mpll"),
 };
 
 /*
@@ -973,36 +986,116 @@
 
 }
 
-/*
- * This function allows non-dt platforms to specify the clock speed of the
- * xxti and xusbxti clocks. These clocks are then registered with the specified
- * clock speed.
- */
-void __init exynos4_clk_register_fixed_ext(unsigned long xxti_f,
-						unsigned long xusbxti_f)
-{
-	exynos4_fixed_rate_ext_clks[0].fixed_rate = xxti_f;
-	exynos4_fixed_rate_ext_clks[1].fixed_rate = xusbxti_f;
-	samsung_clk_register_fixed_rate(exynos4_fixed_rate_ext_clks,
-			ARRAY_SIZE(exynos4_fixed_rate_ext_clks));
-}
-
-static __initdata struct of_device_id ext_clk_match[] = {
+static struct of_device_id ext_clk_match[] __initdata = {
 	{ .compatible = "samsung,clock-xxti", .data = (void *)0, },
 	{ .compatible = "samsung,clock-xusbxti", .data = (void *)1, },
 	{},
 };
 
-/* register exynos4 clocks */
-void __init exynos4_clk_init(struct device_node *np, enum exynos4_soc exynos4_soc, void __iomem *reg_base, unsigned long xom)
-{
-	struct clk *apll, *mpll, *epll, *vpll;
+/* PLLs PMS values */
+static struct samsung_pll_rate_table exynos4210_apll_rates[] __initdata = {
+	PLL_45XX_RATE(1200000000, 150,  3, 1, 28),
+	PLL_45XX_RATE(1000000000, 250,  6, 1, 28),
+	PLL_45XX_RATE( 800000000, 200,  6, 1, 28),
+	PLL_45XX_RATE( 666857142, 389, 14, 1, 13),
+	PLL_45XX_RATE( 600000000, 100,  4, 1, 13),
+	PLL_45XX_RATE( 533000000, 533, 24, 1,  5),
+	PLL_45XX_RATE( 500000000, 250,  6, 2, 28),
+	PLL_45XX_RATE( 400000000, 200,  6, 2, 28),
+	PLL_45XX_RATE( 200000000, 200,  6, 3, 28),
+	{ /* sentinel */ }
+};
 
-	if (np) {
-		reg_base = of_iomap(np, 0);
-		if (!reg_base)
-			panic("%s: failed to map registers\n", __func__);
-	}
+static struct samsung_pll_rate_table exynos4210_epll_rates[] __initdata = {
+	PLL_4600_RATE(192000000, 48, 3, 1,     0, 0),
+	PLL_4600_RATE(180633605, 45, 3, 1, 10381, 0),
+	PLL_4600_RATE(180000000, 45, 3, 1,     0, 0),
+	PLL_4600_RATE( 73727996, 73, 3, 3, 47710, 1),
+	PLL_4600_RATE( 67737602, 90, 4, 3, 20762, 1),
+	PLL_4600_RATE( 49151992, 49, 3, 3,  9961, 0),
+	PLL_4600_RATE( 45158401, 45, 3, 3, 10381, 0),
+	{ /* sentinel */ }
+};
+
+static struct samsung_pll_rate_table exynos4210_vpll_rates[] __initdata = {
+	PLL_4650_RATE(360000000, 44, 3, 0, 1024, 0, 14, 0),
+	PLL_4650_RATE(324000000, 53, 2, 1, 1024, 1,  1, 1),
+	PLL_4650_RATE(259617187, 63, 3, 1, 1950, 0, 20, 1),
+	PLL_4650_RATE(110000000, 53, 3, 2, 2048, 0, 17, 0),
+	PLL_4650_RATE( 55360351, 53, 3, 3, 2417, 0, 17, 0),
+	{ /* sentinel */ }
+};
+
+static struct samsung_pll_rate_table exynos4x12_apll_rates[] __initdata = {
+	PLL_35XX_RATE(1500000000, 250, 4, 0),
+	PLL_35XX_RATE(1400000000, 175, 3, 0),
+	PLL_35XX_RATE(1300000000, 325, 6, 0),
+	PLL_35XX_RATE(1200000000, 200, 4, 0),
+	PLL_35XX_RATE(1100000000, 275, 6, 0),
+	PLL_35XX_RATE(1000000000, 125, 3, 0),
+	PLL_35XX_RATE( 900000000, 150, 4, 0),
+	PLL_35XX_RATE( 800000000, 100, 3, 0),
+	PLL_35XX_RATE( 700000000, 175, 3, 1),
+	PLL_35XX_RATE( 600000000, 200, 4, 1),
+	PLL_35XX_RATE( 500000000, 125, 3, 1),
+	PLL_35XX_RATE( 400000000, 100, 3, 1),
+	PLL_35XX_RATE( 300000000, 200, 4, 2),
+	PLL_35XX_RATE( 200000000, 100, 3, 2),
+	{ /* sentinel */ }
+};
+
+static struct samsung_pll_rate_table exynos4x12_epll_rates[] __initdata = {
+	PLL_36XX_RATE(192000000, 48, 3, 1,     0),
+	PLL_36XX_RATE(180633605, 45, 3, 1, 10381),
+	PLL_36XX_RATE(180000000, 45, 3, 1,     0),
+	PLL_36XX_RATE( 73727996, 73, 3, 3, 47710),
+	PLL_36XX_RATE( 67737602, 90, 4, 3, 20762),
+	PLL_36XX_RATE( 49151992, 49, 3, 3,  9961),
+	PLL_36XX_RATE( 45158401, 45, 3, 3, 10381),
+	{ /* sentinel */ }
+};
+
+static struct samsung_pll_rate_table exynos4x12_vpll_rates[] __initdata = {
+	PLL_36XX_RATE(533000000, 133, 3, 1, 16384),
+	PLL_36XX_RATE(440000000, 110, 3, 1,     0),
+	PLL_36XX_RATE(350000000, 175, 3, 2,     0),
+	PLL_36XX_RATE(266000000, 133, 3, 2,     0),
+	PLL_36XX_RATE(160000000, 160, 3, 3,     0),
+	PLL_36XX_RATE(106031250,  53, 3, 2,  1024),
+	PLL_36XX_RATE( 53015625,  53, 3, 3,  1024),
+	{ /* sentinel */ }
+};
+
+static struct samsung_pll_clock exynos4210_plls[nr_plls] __initdata = {
+	[apll] = PLL_A(pll_4508, fout_apll, "fout_apll", "fin_pll", APLL_LOCK,
+		APLL_CON0, "fout_apll", NULL),
+	[mpll] = PLL_A(pll_4508, fout_mpll, "fout_mpll", "fin_pll",
+		E4210_MPLL_LOCK, E4210_MPLL_CON0, "fout_mpll", NULL),
+	[epll] = PLL_A(pll_4600, fout_epll, "fout_epll", "fin_pll", EPLL_LOCK,
+		EPLL_CON0, "fout_epll", NULL),
+	[vpll] = PLL_A(pll_4650c, fout_vpll, "fout_vpll", "mout_vpllsrc",
+		VPLL_LOCK, VPLL_CON0, "fout_vpll", NULL),
+};
+
+static struct samsung_pll_clock exynos4x12_plls[nr_plls] __initdata = {
+	[apll] = PLL(pll_35xx, fout_apll, "fout_apll", "fin_pll",
+			APLL_LOCK, APLL_CON0, NULL),
+	[mpll] = PLL(pll_35xx, fout_mpll, "fout_mpll", "fin_pll",
+			E4X12_MPLL_LOCK, E4X12_MPLL_CON0, NULL),
+	[epll] = PLL(pll_36xx, fout_epll, "fout_epll", "fin_pll",
+			EPLL_LOCK, EPLL_CON0, NULL),
+	[vpll] = PLL(pll_36xx, fout_vpll, "fout_vpll", "fin_pll",
+			VPLL_LOCK, VPLL_CON0, NULL),
+};
+
+/* register exynos4 clocks */
+static void __init exynos4_clk_init(struct device_node *np,
+				    enum exynos4_soc exynos4_soc,
+				    void __iomem *reg_base, unsigned long xom)
+{
+	reg_base = of_iomap(np, 0);
+	if (!reg_base)
+		panic("%s: failed to map registers\n", __func__);
 
 	if (exynos4_soc == EXYNOS4210)
 		samsung_clk_init(np, reg_base, nr_clks,
@@ -1013,37 +1106,42 @@
 			exynos4_clk_regs, ARRAY_SIZE(exynos4_clk_regs),
 			exynos4x12_clk_save, ARRAY_SIZE(exynos4x12_clk_save));
 
-	if (np)
-		samsung_clk_of_register_fixed_ext(exynos4_fixed_rate_ext_clks,
+	samsung_clk_of_register_fixed_ext(exynos4_fixed_rate_ext_clks,
 			ARRAY_SIZE(exynos4_fixed_rate_ext_clks),
 			ext_clk_match);
 
 	exynos4_clk_register_finpll(xom);
 
 	if (exynos4_soc == EXYNOS4210) {
-		apll = samsung_clk_register_pll45xx("fout_apll", "fin_pll",
-					reg_base + APLL_CON0, pll_4508);
-		mpll = samsung_clk_register_pll45xx("fout_mpll", "fin_pll",
-					reg_base + E4210_MPLL_CON0, pll_4508);
-		epll = samsung_clk_register_pll46xx("fout_epll", "fin_pll",
-					reg_base + EPLL_CON0, pll_4600);
-		vpll = samsung_clk_register_pll46xx("fout_vpll", "mout_vpllsrc",
-					reg_base + VPLL_CON0, pll_4650c);
-	} else {
-		apll = samsung_clk_register_pll35xx("fout_apll", "fin_pll",
-					reg_base + APLL_CON0);
-		mpll = samsung_clk_register_pll35xx("fout_mpll", "fin_pll",
-					reg_base + E4X12_MPLL_CON0);
-		epll = samsung_clk_register_pll36xx("fout_epll", "fin_pll",
-					reg_base + EPLL_CON0);
-		vpll = samsung_clk_register_pll36xx("fout_vpll", "fin_pll",
-					reg_base + VPLL_CON0);
-	}
+		samsung_clk_register_mux(exynos4210_mux_early,
+					ARRAY_SIZE(exynos4210_mux_early));
 
-	samsung_clk_add_lookup(apll, fout_apll);
-	samsung_clk_add_lookup(mpll, fout_mpll);
-	samsung_clk_add_lookup(epll, fout_epll);
-	samsung_clk_add_lookup(vpll, fout_vpll);
+		if (_get_rate("fin_pll") == 24000000) {
+			exynos4210_plls[apll].rate_table =
+							exynos4210_apll_rates;
+			exynos4210_plls[epll].rate_table =
+							exynos4210_epll_rates;
+		}
+
+		if (_get_rate("mout_vpllsrc") == 24000000)
+			exynos4210_plls[vpll].rate_table =
+							exynos4210_vpll_rates;
+
+		samsung_clk_register_pll(exynos4210_plls,
+					ARRAY_SIZE(exynos4210_plls), reg_base);
+	} else {
+		if (_get_rate("fin_pll") == 24000000) {
+			exynos4x12_plls[apll].rate_table =
+							exynos4x12_apll_rates;
+			exynos4x12_plls[epll].rate_table =
+							exynos4x12_epll_rates;
+			exynos4x12_plls[vpll].rate_table =
+							exynos4x12_vpll_rates;
+		}
+
+		samsung_clk_register_pll(exynos4x12_plls,
+					ARRAY_SIZE(exynos4x12_plls), reg_base);
+	}
 
 	samsung_clk_register_fixed_rate(exynos4_fixed_rate_clks,
 			ARRAY_SIZE(exynos4_fixed_rate_clks));
@@ -1063,6 +1161,8 @@
 			ARRAY_SIZE(exynos4210_div_clks));
 		samsung_clk_register_gate(exynos4210_gate_clks,
 			ARRAY_SIZE(exynos4210_gate_clks));
+		samsung_clk_register_alias(exynos4210_aliases,
+			ARRAY_SIZE(exynos4210_aliases));
 	} else {
 		samsung_clk_register_mux(exynos4x12_mux_clks,
 			ARRAY_SIZE(exynos4x12_mux_clks));
@@ -1070,14 +1170,19 @@
 			ARRAY_SIZE(exynos4x12_div_clks));
 		samsung_clk_register_gate(exynos4x12_gate_clks,
 			ARRAY_SIZE(exynos4x12_gate_clks));
+		samsung_clk_register_alias(exynos4x12_aliases,
+			ARRAY_SIZE(exynos4x12_aliases));
 	}
 
+	samsung_clk_register_alias(exynos4_aliases,
+			ARRAY_SIZE(exynos4_aliases));
+
 	pr_info("%s clocks: sclk_apll = %ld, sclk_mpll = %ld\n"
 		"\tsclk_epll = %ld, sclk_vpll = %ld, arm_clk = %ld\n",
 		exynos4_soc == EXYNOS4210 ? "Exynos4210" : "Exynos4x12",
-		_get_rate("sclk_apll"),	_get_rate("mout_mpll"),
+		_get_rate("sclk_apll"),	_get_rate("sclk_mpll"),
 		_get_rate("sclk_epll"), _get_rate("sclk_vpll"),
-		_get_rate("armclk"));
+		_get_rate("arm_clk"));
 }
 
 
diff --git a/drivers/clk/samsung/clk-exynos5250.c b/drivers/clk/samsung/clk-exynos5250.c
index 6f767c5..adf3234 100644
--- a/drivers/clk/samsung/clk-exynos5250.c
+++ b/drivers/clk/samsung/clk-exynos5250.c
@@ -17,11 +17,22 @@
 #include <linux/of_address.h>
 
 #include "clk.h"
-#include "clk-pll.h"
 
+#define APLL_LOCK		0x0
+#define APLL_CON0		0x100
 #define SRC_CPU			0x200
 #define DIV_CPU0		0x500
+#define MPLL_LOCK		0x4000
+#define MPLL_CON0		0x4100
 #define SRC_CORE1		0x4204
+#define CPLL_LOCK		0x10020
+#define EPLL_LOCK		0x10030
+#define VPLL_LOCK		0x10040
+#define GPLL_LOCK		0x10050
+#define CPLL_CON0		0x10120
+#define EPLL_CON0		0x10130
+#define VPLL_CON0		0x10140
+#define GPLL_CON0		0x10150
 #define SRC_TOP0		0x10210
 #define SRC_TOP2		0x10218
 #define SRC_GSCL		0x10220
@@ -59,9 +70,18 @@
 #define GATE_IP_FSYS		0x10944
 #define GATE_IP_PERIC		0x10950
 #define GATE_IP_PERIS		0x10960
+#define BPLL_LOCK		0x20010
+#define BPLL_CON0		0x20110
 #define SRC_CDREX		0x20200
 #define PLL_DIV2_SEL		0x20a24
 #define GATE_IP_DISP1		0x10928
+#define GATE_IP_ACP		0x10000
+
+/* list of PLLs to be registered */
+enum exynos5250_plls {
+	apll, mpll, cpll, epll, vpll, gpll, bpll,
+	nr_plls			/* number of PLLs */
+};
 
 /*
  * Let each supported clock get a unique id. This id is used to lookup the clock
@@ -79,7 +99,8 @@
 	none,
 
 	/* core clocks */
-	fin_pll,
+	fin_pll, fout_apll, fout_mpll, fout_bpll, fout_gpll, fout_cpll,
+	fout_epll, fout_vpll,
 
 	/* gate for special clocks (sclk) */
 	sclk_cam_bayer = 128, sclk_cam0, sclk_cam1, sclk_gscl_wa, sclk_gscl_wb,
@@ -87,7 +108,7 @@
 	sclk_mmc0, sclk_mmc1, sclk_mmc2, sclk_mmc3, sclk_sata, sclk_usb3,
 	sclk_jpeg, sclk_uart0, sclk_uart1, sclk_uart2, sclk_uart3, sclk_pwm,
 	sclk_audio1, sclk_audio2, sclk_spdif, sclk_spi0, sclk_spi1, sclk_spi2,
-	div_i2s1, div_i2s2,
+	div_i2s1, div_i2s2, sclk_hdmiphy,
 
 	/* gate clocks */
 	gscl0 = 256, gscl1, gscl2, gscl3, gscl_wa, gscl_wb, smmu_gscl0,
@@ -99,7 +120,10 @@
 	spi2, i2s1, i2s2, pcm1, pcm2, pwm, spdif, ac97, hsi2c0, hsi2c1, hsi2c2,
 	hsi2c3, chipid, sysreg, pmu, cmu_top, cmu_core, cmu_mem, tzpc0, tzpc1,
 	tzpc2, tzpc3, tzpc4, tzpc5, tzpc6, tzpc7, tzpc8, tzpc9, hdmi_cec, mct,
-	wdt, rtc, tmu, fimd1, mie1, dsim0, dp, mixer, hdmi,
+	wdt, rtc, tmu, fimd1, mie1, dsim0, dp, mixer, hdmi, g2d,
+
+	/* mux clocks */
+	mout_hdmi = 1024,
 
 	nr_clks,
 };
@@ -108,7 +132,7 @@
  * list of controller registers to be saved and restored during a
  * suspend/resume cycle.
  */
-static __initdata unsigned long exynos5250_clk_regs[] = {
+static unsigned long exynos5250_clk_regs[] __initdata = {
 	SRC_CPU,
 	DIV_CPU0,
 	SRC_CORE1,
@@ -152,6 +176,7 @@
 	SRC_CDREX,
 	PLL_DIV2_SEL,
 	GATE_IP_DISP1,
+	GATE_IP_ACP,
 };
 
 /* list of all parent clock list */
@@ -191,31 +216,34 @@
 				"spdif_extclk" };
 
 /* fixed rate clocks generated outside the soc */
-struct samsung_fixed_rate_clock exynos5250_fixed_rate_ext_clks[] __initdata = {
+static struct samsung_fixed_rate_clock exynos5250_fixed_rate_ext_clks[] __initdata = {
 	FRATE(fin_pll, "fin_pll", NULL, CLK_IS_ROOT, 0),
 };
 
 /* fixed rate clocks generated inside the soc */
-struct samsung_fixed_rate_clock exynos5250_fixed_rate_clks[] __initdata = {
-	FRATE(none, "sclk_hdmiphy", NULL, CLK_IS_ROOT, 24000000),
+static struct samsung_fixed_rate_clock exynos5250_fixed_rate_clks[] __initdata = {
+	FRATE(sclk_hdmiphy, "sclk_hdmiphy", NULL, CLK_IS_ROOT, 24000000),
 	FRATE(none, "sclk_hdmi27m", NULL, CLK_IS_ROOT, 27000000),
 	FRATE(none, "sclk_dptxphy", NULL, CLK_IS_ROOT, 24000000),
 	FRATE(none, "sclk_uhostphy", NULL, CLK_IS_ROOT, 48000000),
 };
 
-struct samsung_fixed_factor_clock exynos5250_fixed_factor_clks[] __initdata = {
+static struct samsung_fixed_factor_clock exynos5250_fixed_factor_clks[] __initdata = {
 	FFACTOR(none, "fout_mplldiv2", "fout_mpll", 1, 2, 0),
 	FFACTOR(none, "fout_bplldiv2", "fout_bpll", 1, 2, 0),
 };
 
-struct samsung_mux_clock exynos5250_mux_clks[] __initdata = {
+static struct samsung_mux_clock exynos5250_pll_pmux_clks[] __initdata = {
+	MUX(none, "mout_vpllsrc", mout_vpllsrc_p, SRC_TOP2, 0, 1),
+};
+
+static struct samsung_mux_clock exynos5250_mux_clks[] __initdata = {
 	MUX_A(none, "mout_apll", mout_apll_p, SRC_CPU, 0, 1, "mout_apll"),
 	MUX_A(none, "mout_cpu", mout_cpu_p, SRC_CPU, 16, 1, "mout_cpu"),
 	MUX(none, "mout_mpll_fout", mout_mpll_fout_p, PLL_DIV2_SEL, 4, 1),
 	MUX_A(none, "sclk_mpll", mout_mpll_p, SRC_CORE1, 8, 1, "mout_mpll"),
 	MUX(none, "mout_bpll_fout", mout_bpll_fout_p, PLL_DIV2_SEL, 0, 1),
 	MUX(none, "sclk_bpll", mout_bpll_p, SRC_CDREX, 0, 1),
-	MUX(none, "mout_vpllsrc", mout_vpllsrc_p, SRC_TOP2, 0, 1),
 	MUX(none, "sclk_vpll", mout_vpll_p, SRC_TOP2, 16, 1),
 	MUX(none, "sclk_epll", mout_epll_p, SRC_TOP2, 12, 1),
 	MUX(none, "sclk_cpll", mout_cpll_p, SRC_TOP2, 8, 1),
@@ -232,7 +260,7 @@
 	MUX(none, "mout_fimd1", mout_group1_p, SRC_DISP1_0, 0, 4),
 	MUX(none, "mout_mipi1", mout_group1_p, SRC_DISP1_0, 12, 4),
 	MUX(none, "mout_dp", mout_group1_p, SRC_DISP1_0, 16, 4),
-	MUX(none, "mout_hdmi", mout_hdmi_p, SRC_DISP1_0, 20, 1),
+	MUX(mout_hdmi, "mout_hdmi", mout_hdmi_p, SRC_DISP1_0, 20, 1),
 	MUX(none, "mout_audio0", mout_audio0_p, SRC_MAU, 0, 4),
 	MUX(none, "mout_mmc0", mout_group1_p, SRC_FSYS, 0, 4),
 	MUX(none, "mout_mmc1", mout_group1_p, SRC_FSYS, 4, 4),
@@ -254,7 +282,7 @@
 	MUX(none, "mout_spi2", mout_group1_p, SRC_PERIC1, 24, 4),
 };
 
-struct samsung_div_clock exynos5250_div_clks[] __initdata = {
+static struct samsung_div_clock exynos5250_div_clks[] __initdata = {
 	DIV(none, "div_arm", "mout_cpu", DIV_CPU0, 0, 3),
 	DIV(none, "sclk_apll", "mout_apll", DIV_CPU0, 24, 3),
 	DIV(none, "aclk66_pre", "sclk_mpll_user", DIV_TOP1, 24, 3),
@@ -314,7 +342,7 @@
 			DIV_PERIC2, 8, 8, CLK_SET_RATE_PARENT, 0),
 };
 
-struct samsung_gate_clock exynos5250_gate_clks[] __initdata = {
+static struct samsung_gate_clock exynos5250_gate_clks[] __initdata = {
 	GATE(gscl0, "gscl0", "none", GATE_IP_GSCL, 0, 0, 0),
 	GATE(gscl1, "gscl1", "none", GATE_IP_GSCL, 1, 0, 0),
 	GATE(gscl2, "gscl2", "aclk266", GATE_IP_GSCL, 2, 0, 0),
@@ -461,20 +489,60 @@
 	GATE(mie1, "mie1", "aclk200", GATE_IP_DISP1, 1, 0, 0),
 	GATE(dsim0, "dsim0", "aclk200", GATE_IP_DISP1, 3, 0, 0),
 	GATE(dp, "dp", "aclk200", GATE_IP_DISP1, 4, 0, 0),
-	GATE(mixer, "mixer", "aclk200", GATE_IP_DISP1, 5, 0, 0),
-	GATE(hdmi, "hdmi", "aclk200", GATE_IP_DISP1, 6, 0, 0),
+	GATE(mixer, "mixer", "mout_aclk200_disp1", GATE_IP_DISP1, 5, 0, 0),
+	GATE(hdmi, "hdmi", "mout_aclk200_disp1", GATE_IP_DISP1, 6, 0, 0),
+	GATE(g2d, "g2d", "aclk200", GATE_IP_ACP, 3, 0, 0),
 };
 
-static __initdata struct of_device_id ext_clk_match[] = {
+static struct samsung_pll_rate_table vpll_24mhz_tbl[] __initdata = {
+	/* sorted in descending order */
+	/* PLL_36XX_RATE(rate, m, p, s, k) */
+	PLL_36XX_RATE(266000000, 266, 3, 3, 0),
+	/* Not in UM, but need for eDP on snow */
+	PLL_36XX_RATE(70500000, 94, 2, 4, 0),
+	{ },
+};
+
+static struct samsung_pll_rate_table epll_24mhz_tbl[] __initdata = {
+	/* sorted in descending order */
+	/* PLL_36XX_RATE(rate, m, p, s, k) */
+	PLL_36XX_RATE(192000000, 64, 2, 2, 0),
+	PLL_36XX_RATE(180633600, 90, 3, 2, 20762),
+	PLL_36XX_RATE(180000000, 90, 3, 2, 0),
+	PLL_36XX_RATE(73728000, 98, 2, 4, 19923),
+	PLL_36XX_RATE(67737600, 90, 2, 4, 20762),
+	PLL_36XX_RATE(49152000, 98, 3, 4, 19923),
+	PLL_36XX_RATE(45158400, 90, 3, 4, 20762),
+	PLL_36XX_RATE(32768000, 131, 3, 5, 4719),
+	{ },
+};
+
+static struct samsung_pll_clock exynos5250_plls[nr_plls] __initdata = {
+	[apll] = PLL_A(pll_35xx, fout_apll, "fout_apll", "fin_pll", APLL_LOCK,
+		APLL_CON0, "fout_apll", NULL),
+	[mpll] = PLL_A(pll_35xx, fout_mpll, "fout_mpll", "fin_pll", MPLL_LOCK,
+		MPLL_CON0, "fout_mpll", NULL),
+	[bpll] = PLL(pll_35xx, fout_bpll, "fout_bpll", "fin_pll", BPLL_LOCK,
+		BPLL_CON0, NULL),
+	[gpll] = PLL(pll_35xx, fout_gpll, "fout_gpll", "fin_pll", GPLL_LOCK,
+		GPLL_CON0, NULL),
+	[cpll] = PLL(pll_35xx, fout_cpll, "fout_cpll", "fin_pll", CPLL_LOCK,
+		CPLL_CON0, NULL),
+	[epll] = PLL(pll_36xx, fout_epll, "fout_epll", "fin_pll", EPLL_LOCK,
+		EPLL_CON0, NULL),
+	[vpll] = PLL(pll_36xx, fout_vpll, "fout_vpll", "mout_vpllsrc",
+		VPLL_LOCK, VPLL_CON0, NULL),
+};
+
+static struct of_device_id ext_clk_match[] __initdata = {
 	{ .compatible = "samsung,clock-xxti", .data = (void *)0, },
 	{ },
 };
 
 /* register exynox5250 clocks */
-void __init exynos5250_clk_init(struct device_node *np)
+static void __init exynos5250_clk_init(struct device_node *np)
 {
 	void __iomem *reg_base;
-	struct clk *apll, *mpll, *epll, *vpll, *bpll, *gpll, *cpll;
 
 	if (np) {
 		reg_base = of_iomap(np, 0);
@@ -490,22 +558,17 @@
 	samsung_clk_of_register_fixed_ext(exynos5250_fixed_rate_ext_clks,
 			ARRAY_SIZE(exynos5250_fixed_rate_ext_clks),
 			ext_clk_match);
+	samsung_clk_register_mux(exynos5250_pll_pmux_clks,
+				ARRAY_SIZE(exynos5250_pll_pmux_clks));
 
-	apll = samsung_clk_register_pll35xx("fout_apll", "fin_pll",
-			reg_base + 0x100);
-	mpll = samsung_clk_register_pll35xx("fout_mpll", "fin_pll",
-			reg_base + 0x4100);
-	bpll = samsung_clk_register_pll35xx("fout_bpll", "fin_pll",
-			reg_base + 0x20110);
-	gpll = samsung_clk_register_pll35xx("fout_gpll", "fin_pll",
-			reg_base + 0x10150);
-	cpll = samsung_clk_register_pll35xx("fout_cpll", "fin_pll",
-			reg_base + 0x10120);
-	epll = samsung_clk_register_pll36xx("fout_epll", "fin_pll",
-			reg_base + 0x10130);
-	vpll = samsung_clk_register_pll36xx("fout_vpll", "mout_vpllsrc",
-			reg_base + 0x10140);
+	if (_get_rate("fin_pll") == 24 * MHZ)
+		exynos5250_plls[epll].rate_table = epll_24mhz_tbl;
 
+	if (_get_rate("mout_vpllsrc") == 24 * MHZ)
+		exynos5250_plls[vpll].rate_table =  vpll_24mhz_tbl;
+
+	samsung_clk_register_pll(exynos5250_plls, ARRAY_SIZE(exynos5250_plls),
+					reg_base);
 	samsung_clk_register_fixed_rate(exynos5250_fixed_rate_clks,
 			ARRAY_SIZE(exynos5250_fixed_rate_clks));
 	samsung_clk_register_fixed_factor(exynos5250_fixed_factor_clks,
diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c
index 68a96cb..48c4a93 100644
--- a/drivers/clk/samsung/clk-exynos5420.c
+++ b/drivers/clk/samsung/clk-exynos5420.c
@@ -17,13 +17,30 @@
 #include <linux/of_address.h>
 
 #include "clk.h"
-#include "clk-pll.h"
 
+#define APLL_LOCK		0x0
+#define APLL_CON0		0x100
 #define SRC_CPU			0x200
 #define DIV_CPU0		0x500
 #define DIV_CPU1		0x504
 #define GATE_BUS_CPU		0x700
 #define GATE_SCLK_CPU		0x800
+#define CPLL_LOCK		0x10020
+#define DPLL_LOCK		0x10030
+#define EPLL_LOCK		0x10040
+#define RPLL_LOCK		0x10050
+#define IPLL_LOCK		0x10060
+#define SPLL_LOCK		0x10070
+#define VPLL_LOCK		0x10070
+#define MPLL_LOCK		0x10090
+#define CPLL_CON0		0x10120
+#define DPLL_CON0		0x10128
+#define EPLL_CON0		0x10130
+#define RPLL_CON0		0x10140
+#define IPLL_CON0		0x10150
+#define SPLL_CON0		0x10160
+#define VPLL_CON0		0x10170
+#define MPLL_CON0		0x10180
 #define SRC_TOP0		0x10200
 #define SRC_TOP1		0x10204
 #define SRC_TOP2		0x10208
@@ -75,15 +92,27 @@
 #define GATE_TOP_SCLK_MAU	0x1083c
 #define GATE_TOP_SCLK_FSYS	0x10840
 #define GATE_TOP_SCLK_PERIC	0x10850
+#define BPLL_LOCK		0x20010
+#define BPLL_CON0		0x20110
 #define SRC_CDREX		0x20200
+#define KPLL_LOCK		0x28000
+#define KPLL_CON0		0x28100
 #define SRC_KFC			0x28200
 #define DIV_KFC0		0x28500
 
+/* list of PLLs */
+enum exynos5420_plls {
+	apll, cpll, dpll, epll, rpll, ipll, spll, vpll, mpll,
+	bpll, kpll,
+	nr_plls			/* number of PLLs */
+};
+
 enum exynos5420_clks {
 	none,
 
 	/* core clocks */
-	fin_pll,
+	fin_pll,  fout_apll, fout_cpll, fout_dpll, fout_epll, fout_rpll,
+	fout_ipll, fout_spll, fout_vpll, fout_mpll, fout_bpll, fout_kpll,
 
 	/* gate for special clocks (sclk) */
 	sclk_uart0 = 128, sclk_uart1, sclk_uart2, sclk_uart3, sclk_mmc0,
@@ -91,7 +120,7 @@
 	sclk_i2s2, sclk_pcm1, sclk_pcm2, sclk_spdif, sclk_hdmi, sclk_pixel,
 	sclk_dp1, sclk_mipi1, sclk_fimd1, sclk_maudio0, sclk_maupcm0,
 	sclk_usbd300, sclk_usbd301, sclk_usbphy300, sclk_usbphy301, sclk_unipro,
-	sclk_pwm, sclk_gscl_wa, sclk_gscl_wb,
+	sclk_pwm, sclk_gscl_wa, sclk_gscl_wb, sclk_hdmiphy,
 
 	/* gate clocks */
 	aclk66_peric = 256, uart0, uart1, uart2, uart3, i2c0, i2c1, i2c2, i2c3,
@@ -109,7 +138,13 @@
 	aclk300_gscl = 460, smmu_gscl0, smmu_gscl1, gscl_wa, gscl_wb, gscl0,
 	gscl1, clk_3aa, aclk266_g2d = 470, sss, slim_sss, mdma0,
 	aclk333_g2d = 480, g2d, aclk333_432_gscl = 490, smmu_3aa, smmu_fimcl0,
-	smmu_fimcl1, smmu_fimcl3, fimc_lite3, aclk_g3d = 500, g3d,
+	smmu_fimcl1, smmu_fimcl3, fimc_lite3, aclk_g3d = 500, g3d, smmu_mixer,
+
+	/* mux clocks */
+	mout_hdmi = 640,
+
+	/* divider clocks */
+	dout_pixel = 768,
 
 	nr_clks,
 };
@@ -118,7 +153,7 @@
  * list of controller registers to be saved and restored during a
  * suspend/resume cycle.
  */
-static __initdata unsigned long exynos5420_clk_regs[] = {
+static unsigned long exynos5420_clk_regs[] __initdata = {
 	SRC_CPU,
 	DIV_CPU0,
 	DIV_CPU1,
@@ -257,29 +292,29 @@
 		  "sclk_spll", "sclk_ipll", "sclk_epll", "sclk_rpll" };
 PNAME(spdif_p)	= { "fin_pll", "dout_audio0", "dout_audio1", "dout_audio2",
 		  "spdif_extclk", "sclk_ipll", "sclk_epll", "sclk_rpll" };
-PNAME(hdmi_p)	= { "sclk_hdmiphy", "dout_hdmi_pixel" };
+PNAME(hdmi_p)	= { "dout_hdmi_pixel", "sclk_hdmiphy" };
 PNAME(maudio0_p)	= { "fin_pll", "maudio_clk", "sclk_dpll", "sclk_mpll",
 			  "sclk_spll", "sclk_ipll", "sclk_epll", "sclk_rpll" };
 
 /* fixed rate clocks generated outside the soc */
-struct samsung_fixed_rate_clock exynos5420_fixed_rate_ext_clks[] __initdata = {
+static struct samsung_fixed_rate_clock exynos5420_fixed_rate_ext_clks[] __initdata = {
 	FRATE(fin_pll, "fin_pll", NULL, CLK_IS_ROOT, 0),
 };
 
 /* fixed rate clocks generated inside the soc */
-struct samsung_fixed_rate_clock exynos5420_fixed_rate_clks[] __initdata = {
-	FRATE(none, "sclk_hdmiphy", NULL, CLK_IS_ROOT, 24000000),
+static struct samsung_fixed_rate_clock exynos5420_fixed_rate_clks[] __initdata = {
+	FRATE(sclk_hdmiphy, "sclk_hdmiphy", NULL, CLK_IS_ROOT, 24000000),
 	FRATE(none, "sclk_pwi", NULL, CLK_IS_ROOT, 24000000),
 	FRATE(none, "sclk_usbh20", NULL, CLK_IS_ROOT, 48000000),
 	FRATE(none, "mphy_refclk_ixtal24", NULL, CLK_IS_ROOT, 48000000),
 	FRATE(none, "sclk_usbh20_scan_clk", NULL, CLK_IS_ROOT, 480000000),
 };
 
-struct samsung_fixed_factor_clock exynos5420_fixed_factor_clks[] __initdata = {
+static struct samsung_fixed_factor_clock exynos5420_fixed_factor_clks[] __initdata = {
 	FFACTOR(none, "sclk_hsic_12m", "fin_pll", 1, 2, 0),
 };
 
-struct samsung_mux_clock exynos5420_mux_clks[] __initdata = {
+static struct samsung_mux_clock exynos5420_mux_clks[] __initdata = {
 	MUX(none, "mout_mspll_kfc", mspll_cpu_p, SRC_TOP7, 8, 2),
 	MUX(none, "mout_mspll_cpu", mspll_cpu_p, SRC_TOP7, 12, 2),
 	MUX(none, "mout_apll", apll_p, SRC_CPU, 0, 1),
@@ -371,7 +406,7 @@
 	MUX(none, "mout_mipi1", group2_p, SRC_DISP10, 16, 3),
 	MUX(none, "mout_dp1", group2_p, SRC_DISP10, 20, 3),
 	MUX(none, "mout_pixel", group2_p, SRC_DISP10, 24, 3),
-	MUX(none, "mout_hdmi", hdmi_p, SRC_DISP10, 28, 1),
+	MUX(mout_hdmi, "mout_hdmi", hdmi_p, SRC_DISP10, 28, 1),
 
 	/* MAU Block */
 	MUX(none, "mout_maudio0", maudio0_p, SRC_MAU, 28, 3),
@@ -399,7 +434,7 @@
 	MUX(none, "mout_spi2", group2_p, SRC_PERIC1, 28, 3),
 };
 
-struct samsung_div_clock exynos5420_div_clks[] __initdata = {
+static struct samsung_div_clock exynos5420_div_clks[] __initdata = {
 	DIV(none, "div_arm", "mout_cpu", DIV_CPU0, 0, 3),
 	DIV(none, "sclk_apll", "mout_apll", DIV_CPU0, 24, 3),
 	DIV(none, "armclk2", "div_arm", DIV_CPU0, 28, 3),
@@ -431,7 +466,7 @@
 	DIV(none, "dout_fimd1", "mout_fimd1", DIV_DISP10, 0, 4),
 	DIV(none, "dout_mipi1", "mout_mipi1", DIV_DISP10, 16, 8),
 	DIV(none, "dout_dp1", "mout_dp1", DIV_DISP10, 24, 4),
-	DIV(none, "dout_hdmi_pixel", "mout_pixel", DIV_DISP10, 28, 4),
+	DIV(dout_pixel, "dout_hdmi_pixel", "mout_pixel", DIV_DISP10, 28, 4),
 
 	/* Audio Block */
 	DIV(none, "dout_maudio0", "mout_maudio0", DIV_MAU, 20, 4),
@@ -479,7 +514,7 @@
 	DIV(none, "dout_pre_spi2", "dout_spi2", DIV_PERIC4, 24, 8),
 };
 
-struct samsung_gate_clock exynos5420_gate_clks[] __initdata = {
+static struct samsung_gate_clock exynos5420_gate_clks[] __initdata = {
 	/* TODO: Re-verify the CG bits for all the gate clocks */
 	GATE_A(mct, "pclk_st", "aclk66_psgen", GATE_BUS_PERIS1, 2, 0, 0, "mct"),
 
@@ -696,19 +731,43 @@
 	GATE(smmu_mscl0, "smmu_mscl0", "aclk400_mscl", GATE_IP_MSCL, 8, 0, 0),
 	GATE(smmu_mscl1, "smmu_mscl1", "aclk400_mscl", GATE_IP_MSCL, 9, 0, 0),
 	GATE(smmu_mscl2, "smmu_mscl2", "aclk400_mscl", GATE_IP_MSCL, 10, 0, 0),
+	GATE(smmu_mixer, "smmu_mixer", "aclk200_disp1", GATE_IP_DISP1, 9, 0, 0),
 };
 
-static __initdata struct of_device_id ext_clk_match[] = {
+static struct samsung_pll_clock exynos5420_plls[nr_plls] __initdata = {
+	[apll] = PLL(pll_2550, fout_apll, "fout_apll", "fin_pll", APLL_LOCK,
+		APLL_CON0, NULL),
+	[cpll] = PLL(pll_2550, fout_mpll, "fout_mpll", "fin_pll", MPLL_LOCK,
+		MPLL_CON0, NULL),
+	[dpll] = PLL(pll_2550, fout_dpll, "fout_dpll", "fin_pll", DPLL_LOCK,
+		DPLL_CON0, NULL),
+	[epll] = PLL(pll_2650, fout_epll, "fout_epll", "fin_pll", EPLL_LOCK,
+		EPLL_CON0, NULL),
+	[rpll] = PLL(pll_2650, fout_rpll, "fout_rpll", "fin_pll", RPLL_LOCK,
+		RPLL_CON0, NULL),
+	[ipll] = PLL(pll_2550, fout_ipll, "fout_ipll", "fin_pll", IPLL_LOCK,
+		IPLL_CON0, NULL),
+	[spll] = PLL(pll_2550, fout_spll, "fout_spll", "fin_pll", SPLL_LOCK,
+		SPLL_CON0, NULL),
+	[vpll] = PLL(pll_2550, fout_vpll, "fout_vpll", "fin_pll", VPLL_LOCK,
+		VPLL_CON0, NULL),
+	[mpll] = PLL(pll_2550, fout_mpll, "fout_mpll", "fin_pll", MPLL_LOCK,
+		MPLL_CON0, NULL),
+	[bpll] = PLL(pll_2550, fout_bpll, "fout_bpll", "fin_pll", BPLL_LOCK,
+		BPLL_CON0, NULL),
+	[kpll] = PLL(pll_2550, fout_kpll, "fout_kpll", "fin_pll", KPLL_LOCK,
+		KPLL_CON0, NULL),
+};
+
+static struct of_device_id ext_clk_match[] __initdata = {
 	{ .compatible = "samsung,exynos5420-oscclk", .data = (void *)0, },
 	{ },
 };
 
 /* register exynos5420 clocks */
-void __init exynos5420_clk_init(struct device_node *np)
+static void __init exynos5420_clk_init(struct device_node *np)
 {
 	void __iomem *reg_base;
-	struct clk *apll, *bpll, *cpll, *dpll, *epll, *ipll, *kpll, *mpll;
-	struct clk *rpll, *spll, *vpll;
 
 	if (np) {
 		reg_base = of_iomap(np, 0);
@@ -724,30 +783,8 @@
 	samsung_clk_of_register_fixed_ext(exynos5420_fixed_rate_ext_clks,
 			ARRAY_SIZE(exynos5420_fixed_rate_ext_clks),
 			ext_clk_match);
-
-	apll = samsung_clk_register_pll35xx("fout_apll", "fin_pll",
-			reg_base + 0x100);
-	bpll = samsung_clk_register_pll35xx("fout_bpll", "fin_pll",
-			reg_base + 0x20110);
-	cpll = samsung_clk_register_pll35xx("fout_cpll", "fin_pll",
-			reg_base + 0x10120);
-	dpll = samsung_clk_register_pll35xx("fout_dpll", "fin_pll",
-			reg_base + 0x10128);
-	epll = samsung_clk_register_pll36xx("fout_epll", "fin_pll",
-			reg_base + 0x10130);
-	ipll = samsung_clk_register_pll35xx("fout_ipll", "fin_pll",
-			reg_base + 0x10150);
-	kpll = samsung_clk_register_pll35xx("fout_kpll", "fin_pll",
-			reg_base + 0x28100);
-	mpll = samsung_clk_register_pll35xx("fout_mpll", "fin_pll",
-			reg_base + 0x10180);
-	rpll = samsung_clk_register_pll36xx("fout_rpll", "fin_pll",
-			reg_base + 0x10140);
-	spll = samsung_clk_register_pll35xx("fout_spll", "fin_pll",
-			reg_base + 0x10160);
-	vpll = samsung_clk_register_pll35xx("fout_vpll", "fin_pll",
-			reg_base + 0x10170);
-
+	samsung_clk_register_pll(exynos5420_plls, ARRAY_SIZE(exynos5420_plls),
+					reg_base);
 	samsung_clk_register_fixed_rate(exynos5420_fixed_rate_clks,
 			ARRAY_SIZE(exynos5420_fixed_rate_clks));
 	samsung_clk_register_fixed_factor(exynos5420_fixed_factor_clks,
diff --git a/drivers/clk/samsung/clk-exynos5440.c b/drivers/clk/samsung/clk-exynos5440.c
index 7d54341..f865894 100644
--- a/drivers/clk/samsung/clk-exynos5440.c
+++ b/drivers/clk/samsung/clk-exynos5440.c
@@ -41,12 +41,12 @@
 PNAME(mout_spi_p)	= { "div125", "div200" };
 
 /* fixed rate clocks generated outside the soc */
-struct samsung_fixed_rate_clock exynos5440_fixed_rate_ext_clks[] __initdata = {
+static struct samsung_fixed_rate_clock exynos5440_fixed_rate_ext_clks[] __initdata = {
 	FRATE(none, "xtal", NULL, CLK_IS_ROOT, 0),
 };
 
 /* fixed rate clocks */
-struct samsung_fixed_rate_clock exynos5440_fixed_rate_clks[] __initdata = {
+static struct samsung_fixed_rate_clock exynos5440_fixed_rate_clks[] __initdata = {
 	FRATE(none, "ppll", NULL, CLK_IS_ROOT, 1000000000),
 	FRATE(none, "usb_phy0", NULL, CLK_IS_ROOT, 60000000),
 	FRATE(none, "usb_phy1", NULL, CLK_IS_ROOT, 60000000),
@@ -55,26 +55,26 @@
 };
 
 /* fixed factor clocks */
-struct samsung_fixed_factor_clock exynos5440_fixed_factor_clks[] __initdata = {
+static struct samsung_fixed_factor_clock exynos5440_fixed_factor_clks[] __initdata = {
 	FFACTOR(none, "div250", "ppll", 1, 4, 0),
 	FFACTOR(none, "div200", "ppll", 1, 5, 0),
 	FFACTOR(none, "div125", "div250", 1, 2, 0),
 };
 
 /* mux clocks */
-struct samsung_mux_clock exynos5440_mux_clks[] __initdata = {
+static struct samsung_mux_clock exynos5440_mux_clks[] __initdata = {
 	MUX(none, "mout_spi", mout_spi_p, MISC_DOUT1, 5, 1),
 	MUX_A(arm_clk, "arm_clk", mout_armclk_p,
 			CPU_CLK_STATUS, 0, 1, "armclk"),
 };
 
 /* divider clocks */
-struct samsung_div_clock exynos5440_div_clks[] __initdata = {
+static struct samsung_div_clock exynos5440_div_clks[] __initdata = {
 	DIV(spi_baud, "div_spi", "mout_spi", MISC_DOUT1, 3, 2),
 };
 
 /* gate clocks */
-struct samsung_gate_clock exynos5440_gate_clks[] __initdata = {
+static struct samsung_gate_clock exynos5440_gate_clks[] __initdata = {
 	GATE(pb0_250, "pb0_250", "div250", CLKEN_OV_VAL, 3, 0, 0),
 	GATE(pr0_250, "pr0_250", "div250", CLKEN_OV_VAL, 4, 0, 0),
 	GATE(pr1_250, "pr1_250", "div250", CLKEN_OV_VAL, 5, 0, 0),
@@ -97,13 +97,13 @@
 	GATE(cs250_o, "cs250_o", "cs250", CLKEN_OV_VAL, 19, 0, 0),
 };
 
-static __initdata struct of_device_id ext_clk_match[] = {
+static struct of_device_id ext_clk_match[] __initdata = {
 	{ .compatible = "samsung,clock-xtal", .data = (void *)0, },
 	{},
 };
 
 /* register exynos5440 clocks */
-void __init exynos5440_clk_init(struct device_node *np)
+static void __init exynos5440_clk_init(struct device_node *np)
 {
 	void __iomem *reg_base;
 
@@ -132,7 +132,7 @@
 	samsung_clk_register_gate(exynos5440_gate_clks,
 			ARRAY_SIZE(exynos5440_gate_clks));
 
-	pr_info("Exynos5440: arm_clk = %ldHz\n", _get_rate("armclk"));
+	pr_info("Exynos5440: arm_clk = %ldHz\n", _get_rate("arm_clk"));
 	pr_info("exynos5440 clock initialization complete\n");
 }
 CLK_OF_DECLARE(exynos5440_clk, "samsung,exynos5440-clock", exynos5440_clk_init);
diff --git a/drivers/clk/samsung/clk-pll.c b/drivers/clk/samsung/clk-pll.c
index 362f12d..529e11d 100644
--- a/drivers/clk/samsung/clk-pll.c
+++ b/drivers/clk/samsung/clk-pll.c
@@ -10,31 +10,73 @@
 */
 
 #include <linux/errno.h>
+#include <linux/hrtimer.h>
 #include "clk.h"
 #include "clk-pll.h"
 
+#define PLL_TIMEOUT_MS		10
+
+struct samsung_clk_pll {
+	struct clk_hw		hw;
+	void __iomem		*lock_reg;
+	void __iomem		*con_reg;
+	enum samsung_pll_type	type;
+	unsigned int		rate_count;
+	const struct samsung_pll_rate_table *rate_table;
+};
+
+#define to_clk_pll(_hw) container_of(_hw, struct samsung_clk_pll, hw)
+
+static const struct samsung_pll_rate_table *samsung_get_pll_settings(
+				struct samsung_clk_pll *pll, unsigned long rate)
+{
+	const struct samsung_pll_rate_table  *rate_table = pll->rate_table;
+	int i;
+
+	for (i = 0; i < pll->rate_count; i++) {
+		if (rate == rate_table[i].rate)
+			return &rate_table[i];
+	}
+
+	return NULL;
+}
+
+static long samsung_pll_round_rate(struct clk_hw *hw,
+			unsigned long drate, unsigned long *prate)
+{
+	struct samsung_clk_pll *pll = to_clk_pll(hw);
+	const struct samsung_pll_rate_table *rate_table = pll->rate_table;
+	int i;
+
+	/* Assumming rate_table is in descending order */
+	for (i = 0; i < pll->rate_count; i++) {
+		if (drate >= rate_table[i].rate)
+			return rate_table[i].rate;
+	}
+
+	/* return minimum supported value */
+	return rate_table[i - 1].rate;
+}
+
 /*
  * PLL35xx Clock Type
  */
+/* Maximum lock time can be 270 * PDIV cycles */
+#define PLL35XX_LOCK_FACTOR	(270)
 
 #define PLL35XX_MDIV_MASK       (0x3FF)
 #define PLL35XX_PDIV_MASK       (0x3F)
 #define PLL35XX_SDIV_MASK       (0x7)
+#define PLL35XX_LOCK_STAT_MASK	(0x1)
 #define PLL35XX_MDIV_SHIFT      (16)
 #define PLL35XX_PDIV_SHIFT      (8)
 #define PLL35XX_SDIV_SHIFT      (0)
-
-struct samsung_clk_pll35xx {
-	struct clk_hw		hw;
-	const void __iomem	*con_reg;
-};
-
-#define to_clk_pll35xx(_hw) container_of(_hw, struct samsung_clk_pll35xx, hw)
+#define PLL35XX_LOCK_STAT_SHIFT	(29)
 
 static unsigned long samsung_pll35xx_recalc_rate(struct clk_hw *hw,
 				unsigned long parent_rate)
 {
-	struct samsung_clk_pll35xx *pll = to_clk_pll35xx(hw);
+	struct samsung_clk_pll *pll = to_clk_pll(hw);
 	u32 mdiv, pdiv, sdiv, pll_con;
 	u64 fvco = parent_rate;
 
@@ -49,48 +91,80 @@
 	return (unsigned long)fvco;
 }
 
+static inline bool samsung_pll35xx_mp_change(
+		const struct samsung_pll_rate_table *rate, u32 pll_con)
+{
+	u32 old_mdiv, old_pdiv;
+
+	old_mdiv = (pll_con >> PLL35XX_MDIV_SHIFT) & PLL35XX_MDIV_MASK;
+	old_pdiv = (pll_con >> PLL35XX_PDIV_SHIFT) & PLL35XX_PDIV_MASK;
+
+	return (rate->mdiv != old_mdiv || rate->pdiv != old_pdiv);
+}
+
+static int samsung_pll35xx_set_rate(struct clk_hw *hw, unsigned long drate,
+					unsigned long prate)
+{
+	struct samsung_clk_pll *pll = to_clk_pll(hw);
+	const struct samsung_pll_rate_table *rate;
+	u32 tmp;
+
+	/* Get required rate settings from table */
+	rate = samsung_get_pll_settings(pll, drate);
+	if (!rate) {
+		pr_err("%s: Invalid rate : %lu for pll clk %s\n", __func__,
+			drate, __clk_get_name(hw->clk));
+		return -EINVAL;
+	}
+
+	tmp = __raw_readl(pll->con_reg);
+
+	if (!(samsung_pll35xx_mp_change(rate, tmp))) {
+		/* If only s change, change just s value only*/
+		tmp &= ~(PLL35XX_SDIV_MASK << PLL35XX_SDIV_SHIFT);
+		tmp |= rate->sdiv << PLL35XX_SDIV_SHIFT;
+		__raw_writel(tmp, pll->con_reg);
+
+		return 0;
+	}
+
+	/* Set PLL lock time. */
+	__raw_writel(rate->pdiv * PLL35XX_LOCK_FACTOR,
+			pll->lock_reg);
+
+	/* Change PLL PMS values */
+	tmp &= ~((PLL35XX_MDIV_MASK << PLL35XX_MDIV_SHIFT) |
+			(PLL35XX_PDIV_MASK << PLL35XX_PDIV_SHIFT) |
+			(PLL35XX_SDIV_MASK << PLL35XX_SDIV_SHIFT));
+	tmp |= (rate->mdiv << PLL35XX_MDIV_SHIFT) |
+			(rate->pdiv << PLL35XX_PDIV_SHIFT) |
+			(rate->sdiv << PLL35XX_SDIV_SHIFT);
+	__raw_writel(tmp, pll->con_reg);
+
+	/* wait_lock_time */
+	do {
+		cpu_relax();
+		tmp = __raw_readl(pll->con_reg);
+	} while (!(tmp & (PLL35XX_LOCK_STAT_MASK
+				<< PLL35XX_LOCK_STAT_SHIFT)));
+	return 0;
+}
+
 static const struct clk_ops samsung_pll35xx_clk_ops = {
 	.recalc_rate = samsung_pll35xx_recalc_rate,
+	.round_rate = samsung_pll_round_rate,
+	.set_rate = samsung_pll35xx_set_rate,
 };
 
-struct clk * __init samsung_clk_register_pll35xx(const char *name,
-			const char *pname, const void __iomem *con_reg)
-{
-	struct samsung_clk_pll35xx *pll;
-	struct clk *clk;
-	struct clk_init_data init;
-
-	pll = kzalloc(sizeof(*pll), GFP_KERNEL);
-	if (!pll) {
-		pr_err("%s: could not allocate pll clk %s\n", __func__, name);
-		return NULL;
-	}
-
-	init.name = name;
-	init.ops = &samsung_pll35xx_clk_ops;
-	init.flags = CLK_GET_RATE_NOCACHE;
-	init.parent_names = &pname;
-	init.num_parents = 1;
-
-	pll->hw.init = &init;
-	pll->con_reg = con_reg;
-
-	clk = clk_register(NULL, &pll->hw);
-	if (IS_ERR(clk)) {
-		pr_err("%s: failed to register pll clock %s\n", __func__,
-				name);
-		kfree(pll);
-	}
-
-	if (clk_register_clkdev(clk, name, NULL))
-		pr_err("%s: failed to register lookup for %s", __func__, name);
-
-	return clk;
-}
+static const struct clk_ops samsung_pll35xx_clk_min_ops = {
+	.recalc_rate = samsung_pll35xx_recalc_rate,
+};
 
 /*
  * PLL36xx Clock Type
  */
+/* Maximum lock time can be 3000 * PDIV cycles */
+#define PLL36XX_LOCK_FACTOR    (3000)
 
 #define PLL36XX_KDIV_MASK	(0xFFFF)
 #define PLL36XX_MDIV_MASK	(0x1FF)
@@ -99,18 +173,13 @@
 #define PLL36XX_MDIV_SHIFT	(16)
 #define PLL36XX_PDIV_SHIFT	(8)
 #define PLL36XX_SDIV_SHIFT	(0)
-
-struct samsung_clk_pll36xx {
-	struct clk_hw		hw;
-	const void __iomem	*con_reg;
-};
-
-#define to_clk_pll36xx(_hw) container_of(_hw, struct samsung_clk_pll36xx, hw)
+#define PLL36XX_KDIV_SHIFT	(0)
+#define PLL36XX_LOCK_STAT_SHIFT	(29)
 
 static unsigned long samsung_pll36xx_recalc_rate(struct clk_hw *hw,
 				unsigned long parent_rate)
 {
-	struct samsung_clk_pll36xx *pll = to_clk_pll36xx(hw);
+	struct samsung_clk_pll *pll = to_clk_pll(hw);
 	u32 mdiv, pdiv, sdiv, pll_con0, pll_con1;
 	s16 kdiv;
 	u64 fvco = parent_rate;
@@ -129,68 +198,102 @@
 	return (unsigned long)fvco;
 }
 
+static inline bool samsung_pll36xx_mpk_change(
+	const struct samsung_pll_rate_table *rate, u32 pll_con0, u32 pll_con1)
+{
+	u32 old_mdiv, old_pdiv, old_kdiv;
+
+	old_mdiv = (pll_con0 >> PLL36XX_MDIV_SHIFT) & PLL36XX_MDIV_MASK;
+	old_pdiv = (pll_con0 >> PLL36XX_PDIV_SHIFT) & PLL36XX_PDIV_MASK;
+	old_kdiv = (pll_con1 >> PLL36XX_KDIV_SHIFT) & PLL36XX_KDIV_MASK;
+
+	return (rate->mdiv != old_mdiv || rate->pdiv != old_pdiv ||
+		rate->kdiv != old_kdiv);
+}
+
+static int samsung_pll36xx_set_rate(struct clk_hw *hw, unsigned long drate,
+					unsigned long parent_rate)
+{
+	struct samsung_clk_pll *pll = to_clk_pll(hw);
+	u32 tmp, pll_con0, pll_con1;
+	const struct samsung_pll_rate_table *rate;
+
+	rate = samsung_get_pll_settings(pll, drate);
+	if (!rate) {
+		pr_err("%s: Invalid rate : %lu for pll clk %s\n", __func__,
+			drate, __clk_get_name(hw->clk));
+		return -EINVAL;
+	}
+
+	pll_con0 = __raw_readl(pll->con_reg);
+	pll_con1 = __raw_readl(pll->con_reg + 4);
+
+	if (!(samsung_pll36xx_mpk_change(rate, pll_con0, pll_con1))) {
+		/* If only s change, change just s value only*/
+		pll_con0 &= ~(PLL36XX_SDIV_MASK << PLL36XX_SDIV_SHIFT);
+		pll_con0 |= (rate->sdiv << PLL36XX_SDIV_SHIFT);
+		__raw_writel(pll_con0, pll->con_reg);
+
+		return 0;
+	}
+
+	/* Set PLL lock time. */
+	__raw_writel(rate->pdiv * PLL36XX_LOCK_FACTOR, pll->lock_reg);
+
+	 /* Change PLL PMS values */
+	pll_con0 &= ~((PLL36XX_MDIV_MASK << PLL36XX_MDIV_SHIFT) |
+			(PLL36XX_PDIV_MASK << PLL36XX_PDIV_SHIFT) |
+			(PLL36XX_SDIV_MASK << PLL36XX_SDIV_SHIFT));
+	pll_con0 |= (rate->mdiv << PLL36XX_MDIV_SHIFT) |
+			(rate->pdiv << PLL36XX_PDIV_SHIFT) |
+			(rate->sdiv << PLL36XX_SDIV_SHIFT);
+	__raw_writel(pll_con0, pll->con_reg);
+
+	pll_con1 &= ~(PLL36XX_KDIV_MASK << PLL36XX_KDIV_SHIFT);
+	pll_con1 |= rate->kdiv << PLL36XX_KDIV_SHIFT;
+	__raw_writel(pll_con1, pll->con_reg + 4);
+
+	/* wait_lock_time */
+	do {
+		cpu_relax();
+		tmp = __raw_readl(pll->con_reg);
+	} while (!(tmp & (1 << PLL36XX_LOCK_STAT_SHIFT)));
+
+	return 0;
+}
+
 static const struct clk_ops samsung_pll36xx_clk_ops = {
 	.recalc_rate = samsung_pll36xx_recalc_rate,
+	.set_rate = samsung_pll36xx_set_rate,
+	.round_rate = samsung_pll_round_rate,
 };
 
-struct clk * __init samsung_clk_register_pll36xx(const char *name,
-			const char *pname, const void __iomem *con_reg)
-{
-	struct samsung_clk_pll36xx *pll;
-	struct clk *clk;
-	struct clk_init_data init;
-
-	pll = kzalloc(sizeof(*pll), GFP_KERNEL);
-	if (!pll) {
-		pr_err("%s: could not allocate pll clk %s\n", __func__, name);
-		return NULL;
-	}
-
-	init.name = name;
-	init.ops = &samsung_pll36xx_clk_ops;
-	init.flags = CLK_GET_RATE_NOCACHE;
-	init.parent_names = &pname;
-	init.num_parents = 1;
-
-	pll->hw.init = &init;
-	pll->con_reg = con_reg;
-
-	clk = clk_register(NULL, &pll->hw);
-	if (IS_ERR(clk)) {
-		pr_err("%s: failed to register pll clock %s\n", __func__,
-				name);
-		kfree(pll);
-	}
-
-	if (clk_register_clkdev(clk, name, NULL))
-		pr_err("%s: failed to register lookup for %s", __func__, name);
-
-	return clk;
-}
+static const struct clk_ops samsung_pll36xx_clk_min_ops = {
+	.recalc_rate = samsung_pll36xx_recalc_rate,
+};
 
 /*
  * PLL45xx Clock Type
  */
+#define PLL4502_LOCK_FACTOR	400
+#define PLL4508_LOCK_FACTOR	240
 
 #define PLL45XX_MDIV_MASK	(0x3FF)
 #define PLL45XX_PDIV_MASK	(0x3F)
 #define PLL45XX_SDIV_MASK	(0x7)
+#define PLL45XX_AFC_MASK	(0x1F)
 #define PLL45XX_MDIV_SHIFT	(16)
 #define PLL45XX_PDIV_SHIFT	(8)
 #define PLL45XX_SDIV_SHIFT	(0)
+#define PLL45XX_AFC_SHIFT	(0)
 
-struct samsung_clk_pll45xx {
-	struct clk_hw		hw;
-	enum pll45xx_type	type;
-	const void __iomem	*con_reg;
-};
-
-#define to_clk_pll45xx(_hw) container_of(_hw, struct samsung_clk_pll45xx, hw)
+#define PLL45XX_ENABLE		BIT(31)
+#define PLL45XX_LOCKED		BIT(29)
 
 static unsigned long samsung_pll45xx_recalc_rate(struct clk_hw *hw,
 				unsigned long parent_rate)
 {
-	struct samsung_clk_pll45xx *pll = to_clk_pll45xx(hw);
+	struct samsung_clk_pll *pll = to_clk_pll(hw);
 	u32 mdiv, pdiv, sdiv, pll_con;
 	u64 fvco = parent_rate;
 
@@ -208,54 +311,113 @@
 	return (unsigned long)fvco;
 }
 
+static bool samsung_pll45xx_mp_change(u32 pll_con0, u32 pll_con1,
+				const struct samsung_pll_rate_table *rate)
+{
+	u32 old_mdiv, old_pdiv, old_afc;
+
+	old_mdiv = (pll_con0 >> PLL45XX_MDIV_SHIFT) & PLL45XX_MDIV_MASK;
+	old_pdiv = (pll_con0 >> PLL45XX_PDIV_SHIFT) & PLL45XX_PDIV_MASK;
+	old_afc = (pll_con1 >> PLL45XX_AFC_SHIFT) & PLL45XX_AFC_MASK;
+
+	return (old_mdiv != rate->mdiv || old_pdiv != rate->pdiv
+		|| old_afc != rate->afc);
+}
+
+static int samsung_pll45xx_set_rate(struct clk_hw *hw, unsigned long drate,
+					unsigned long prate)
+{
+	struct samsung_clk_pll *pll = to_clk_pll(hw);
+	const struct samsung_pll_rate_table *rate;
+	u32 con0, con1;
+	ktime_t start;
+
+	/* Get required rate settings from table */
+	rate = samsung_get_pll_settings(pll, drate);
+	if (!rate) {
+		pr_err("%s: Invalid rate : %lu for pll clk %s\n", __func__,
+			drate, __clk_get_name(hw->clk));
+		return -EINVAL;
+	}
+
+	con0 = __raw_readl(pll->con_reg);
+	con1 = __raw_readl(pll->con_reg + 0x4);
+
+	if (!(samsung_pll45xx_mp_change(con0, con1, rate))) {
+		/* If only s change, change just s value only*/
+		con0 &= ~(PLL45XX_SDIV_MASK << PLL45XX_SDIV_SHIFT);
+		con0 |= rate->sdiv << PLL45XX_SDIV_SHIFT;
+		__raw_writel(con0, pll->con_reg);
+
+		return 0;
+	}
+
+	/* Set PLL PMS values. */
+	con0 &= ~((PLL45XX_MDIV_MASK << PLL45XX_MDIV_SHIFT) |
+			(PLL45XX_PDIV_MASK << PLL45XX_PDIV_SHIFT) |
+			(PLL45XX_SDIV_MASK << PLL45XX_SDIV_SHIFT));
+	con0 |= (rate->mdiv << PLL45XX_MDIV_SHIFT) |
+			(rate->pdiv << PLL45XX_PDIV_SHIFT) |
+			(rate->sdiv << PLL45XX_SDIV_SHIFT);
+
+	/* Set PLL AFC value. */
+	con1 = __raw_readl(pll->con_reg + 0x4);
+	con1 &= ~(PLL45XX_AFC_MASK << PLL45XX_AFC_SHIFT);
+	con1 |= (rate->afc << PLL45XX_AFC_SHIFT);
+
+	/* Set PLL lock time. */
+	switch (pll->type) {
+	case pll_4502:
+		__raw_writel(rate->pdiv * PLL4502_LOCK_FACTOR, pll->lock_reg);
+		break;
+	case pll_4508:
+		__raw_writel(rate->pdiv * PLL4508_LOCK_FACTOR, pll->lock_reg);
+		break;
+	default:
+		break;
+	};
+
+	/* Set new configuration. */
+	__raw_writel(con1, pll->con_reg + 0x4);
+	__raw_writel(con0, pll->con_reg);
+
+	/* Wait for locking. */
+	start = ktime_get();
+	while (!(__raw_readl(pll->con_reg) & PLL45XX_LOCKED)) {
+		ktime_t delta = ktime_sub(ktime_get(), start);
+
+		if (ktime_to_ms(delta) > PLL_TIMEOUT_MS) {
+			pr_err("%s: could not lock PLL %s\n",
+					__func__, __clk_get_name(hw->clk));
+			return -EFAULT;
+		}
+
+		cpu_relax();
+	}
+
+	return 0;
+}
+
 static const struct clk_ops samsung_pll45xx_clk_ops = {
 	.recalc_rate = samsung_pll45xx_recalc_rate,
+	.round_rate = samsung_pll_round_rate,
+	.set_rate = samsung_pll45xx_set_rate,
 };
 
-struct clk * __init samsung_clk_register_pll45xx(const char *name,
-			const char *pname, const void __iomem *con_reg,
-			enum pll45xx_type type)
-{
-	struct samsung_clk_pll45xx *pll;
-	struct clk *clk;
-	struct clk_init_data init;
-
-	pll = kzalloc(sizeof(*pll), GFP_KERNEL);
-	if (!pll) {
-		pr_err("%s: could not allocate pll clk %s\n", __func__, name);
-		return NULL;
-	}
-
-	init.name = name;
-	init.ops = &samsung_pll45xx_clk_ops;
-	init.flags = CLK_GET_RATE_NOCACHE;
-	init.parent_names = &pname;
-	init.num_parents = 1;
-
-	pll->hw.init = &init;
-	pll->con_reg = con_reg;
-	pll->type = type;
-
-	clk = clk_register(NULL, &pll->hw);
-	if (IS_ERR(clk)) {
-		pr_err("%s: failed to register pll clock %s\n", __func__,
-				name);
-		kfree(pll);
-	}
-
-	if (clk_register_clkdev(clk, name, NULL))
-		pr_err("%s: failed to register lookup for %s", __func__, name);
-
-	return clk;
-}
+static const struct clk_ops samsung_pll45xx_clk_min_ops = {
+	.recalc_rate = samsung_pll45xx_recalc_rate,
+};
 
 /*
  * PLL46xx Clock Type
  */
+#define PLL46XX_LOCK_FACTOR	3000
 
+#define PLL46XX_VSEL_MASK	(1)
 #define PLL46XX_MDIV_MASK	(0x1FF)
 #define PLL46XX_PDIV_MASK	(0x3F)
 #define PLL46XX_SDIV_MASK	(0x7)
+#define PLL46XX_VSEL_SHIFT	(27)
 #define PLL46XX_MDIV_SHIFT	(16)
 #define PLL46XX_PDIV_SHIFT	(8)
 #define PLL46XX_SDIV_SHIFT	(0)
@@ -263,19 +425,20 @@
 #define PLL46XX_KDIV_MASK	(0xFFFF)
 #define PLL4650C_KDIV_MASK	(0xFFF)
 #define PLL46XX_KDIV_SHIFT	(0)
+#define PLL46XX_MFR_MASK	(0x3F)
+#define PLL46XX_MRR_MASK	(0x1F)
+#define PLL46XX_KDIV_SHIFT	(0)
+#define PLL46XX_MFR_SHIFT	(16)
+#define PLL46XX_MRR_SHIFT	(24)
 
-struct samsung_clk_pll46xx {
-	struct clk_hw		hw;
-	enum pll46xx_type	type;
-	const void __iomem	*con_reg;
-};
-
-#define to_clk_pll46xx(_hw) container_of(_hw, struct samsung_clk_pll46xx, hw)
+#define PLL46XX_ENABLE		BIT(31)
+#define PLL46XX_LOCKED		BIT(29)
+#define PLL46XX_VSEL		BIT(27)
 
 static unsigned long samsung_pll46xx_recalc_rate(struct clk_hw *hw,
 				unsigned long parent_rate)
 {
-	struct samsung_clk_pll46xx *pll = to_clk_pll46xx(hw);
+	struct samsung_clk_pll *pll = to_clk_pll(hw);
 	u32 mdiv, pdiv, sdiv, kdiv, pll_con0, pll_con1, shift;
 	u64 fvco = parent_rate;
 
@@ -295,47 +458,175 @@
 	return (unsigned long)fvco;
 }
 
+static bool samsung_pll46xx_mpk_change(u32 pll_con0, u32 pll_con1,
+				const struct samsung_pll_rate_table *rate)
+{
+	u32 old_mdiv, old_pdiv, old_kdiv;
+
+	old_mdiv = (pll_con0 >> PLL46XX_MDIV_SHIFT) & PLL46XX_MDIV_MASK;
+	old_pdiv = (pll_con0 >> PLL46XX_PDIV_SHIFT) & PLL46XX_PDIV_MASK;
+	old_kdiv = (pll_con1 >> PLL46XX_KDIV_SHIFT) & PLL46XX_KDIV_MASK;
+
+	return (old_mdiv != rate->mdiv || old_pdiv != rate->pdiv
+		|| old_kdiv != rate->kdiv);
+}
+
+static int samsung_pll46xx_set_rate(struct clk_hw *hw, unsigned long drate,
+					unsigned long prate)
+{
+	struct samsung_clk_pll *pll = to_clk_pll(hw);
+	const struct samsung_pll_rate_table *rate;
+	u32 con0, con1, lock;
+	ktime_t start;
+
+	/* Get required rate settings from table */
+	rate = samsung_get_pll_settings(pll, drate);
+	if (!rate) {
+		pr_err("%s: Invalid rate : %lu for pll clk %s\n", __func__,
+			drate, __clk_get_name(hw->clk));
+		return -EINVAL;
+	}
+
+	con0 = __raw_readl(pll->con_reg);
+	con1 = __raw_readl(pll->con_reg + 0x4);
+
+	if (!(samsung_pll46xx_mpk_change(con0, con1, rate))) {
+		/* If only s change, change just s value only*/
+		con0 &= ~(PLL46XX_SDIV_MASK << PLL46XX_SDIV_SHIFT);
+		con0 |= rate->sdiv << PLL46XX_SDIV_SHIFT;
+		__raw_writel(con0, pll->con_reg);
+
+		return 0;
+	}
+
+	/* Set PLL lock time. */
+	lock = rate->pdiv * PLL46XX_LOCK_FACTOR;
+	if (lock > 0xffff)
+		/* Maximum lock time bitfield is 16-bit. */
+		lock = 0xffff;
+
+	/* Set PLL PMS and VSEL values. */
+	con0 &= ~((PLL46XX_MDIV_MASK << PLL46XX_MDIV_SHIFT) |
+			(PLL46XX_PDIV_MASK << PLL46XX_PDIV_SHIFT) |
+			(PLL46XX_SDIV_MASK << PLL46XX_SDIV_SHIFT) |
+			(PLL46XX_VSEL_MASK << PLL46XX_VSEL_SHIFT));
+	con0 |= (rate->mdiv << PLL46XX_MDIV_SHIFT) |
+			(rate->pdiv << PLL46XX_PDIV_SHIFT) |
+			(rate->sdiv << PLL46XX_SDIV_SHIFT) |
+			(rate->vsel << PLL46XX_VSEL_SHIFT);
+
+	/* Set PLL K, MFR and MRR values. */
+	con1 = __raw_readl(pll->con_reg + 0x4);
+	con1 &= ~((PLL46XX_KDIV_MASK << PLL46XX_KDIV_SHIFT) |
+			(PLL46XX_MFR_MASK << PLL46XX_MFR_SHIFT) |
+			(PLL46XX_MRR_MASK << PLL46XX_MRR_SHIFT));
+	con1 |= (rate->kdiv << PLL46XX_KDIV_SHIFT) |
+			(rate->mfr << PLL46XX_MFR_SHIFT) |
+			(rate->mrr << PLL46XX_MRR_SHIFT);
+
+	/* Write configuration to PLL */
+	__raw_writel(lock, pll->lock_reg);
+	__raw_writel(con0, pll->con_reg);
+	__raw_writel(con1, pll->con_reg + 0x4);
+
+	/* Wait for locking. */
+	start = ktime_get();
+	while (!(__raw_readl(pll->con_reg) & PLL46XX_LOCKED)) {
+		ktime_t delta = ktime_sub(ktime_get(), start);
+
+		if (ktime_to_ms(delta) > PLL_TIMEOUT_MS) {
+			pr_err("%s: could not lock PLL %s\n",
+					__func__, __clk_get_name(hw->clk));
+			return -EFAULT;
+		}
+
+		cpu_relax();
+	}
+
+	return 0;
+}
+
 static const struct clk_ops samsung_pll46xx_clk_ops = {
 	.recalc_rate = samsung_pll46xx_recalc_rate,
+	.round_rate = samsung_pll_round_rate,
+	.set_rate = samsung_pll46xx_set_rate,
+};
+
+static const struct clk_ops samsung_pll46xx_clk_min_ops = {
+	.recalc_rate = samsung_pll46xx_recalc_rate,
 };
 
-struct clk * __init samsung_clk_register_pll46xx(const char *name,
-			const char *pname, const void __iomem *con_reg,
-			enum pll46xx_type type)
+/*
+ * PLL6552 Clock Type
+ */
+
+#define PLL6552_MDIV_MASK	0x3ff
+#define PLL6552_PDIV_MASK	0x3f
+#define PLL6552_SDIV_MASK	0x7
+#define PLL6552_MDIV_SHIFT	16
+#define PLL6552_PDIV_SHIFT	8
+#define PLL6552_SDIV_SHIFT	0
+
+static unsigned long samsung_pll6552_recalc_rate(struct clk_hw *hw,
+						unsigned long parent_rate)
 {
-	struct samsung_clk_pll46xx *pll;
-	struct clk *clk;
-	struct clk_init_data init;
+	struct samsung_clk_pll *pll = to_clk_pll(hw);
+	u32 mdiv, pdiv, sdiv, pll_con;
+	u64 fvco = parent_rate;
 
-	pll = kzalloc(sizeof(*pll), GFP_KERNEL);
-	if (!pll) {
-		pr_err("%s: could not allocate pll clk %s\n", __func__, name);
-		return NULL;
-	}
+	pll_con = __raw_readl(pll->con_reg);
+	mdiv = (pll_con >> PLL6552_MDIV_SHIFT) & PLL6552_MDIV_MASK;
+	pdiv = (pll_con >> PLL6552_PDIV_SHIFT) & PLL6552_PDIV_MASK;
+	sdiv = (pll_con >> PLL6552_SDIV_SHIFT) & PLL6552_SDIV_MASK;
 
-	init.name = name;
-	init.ops = &samsung_pll46xx_clk_ops;
-	init.flags = CLK_GET_RATE_NOCACHE;
-	init.parent_names = &pname;
-	init.num_parents = 1;
+	fvco *= mdiv;
+	do_div(fvco, (pdiv << sdiv));
 
-	pll->hw.init = &init;
-	pll->con_reg = con_reg;
-	pll->type = type;
-
-	clk = clk_register(NULL, &pll->hw);
-	if (IS_ERR(clk)) {
-		pr_err("%s: failed to register pll clock %s\n", __func__,
-				name);
-		kfree(pll);
-	}
-
-	if (clk_register_clkdev(clk, name, NULL))
-		pr_err("%s: failed to register lookup for %s", __func__, name);
-
-	return clk;
+	return (unsigned long)fvco;
 }
 
+static const struct clk_ops samsung_pll6552_clk_ops = {
+	.recalc_rate = samsung_pll6552_recalc_rate,
+};
+
+/*
+ * PLL6553 Clock Type
+ */
+
+#define PLL6553_MDIV_MASK	0xff
+#define PLL6553_PDIV_MASK	0x3f
+#define PLL6553_SDIV_MASK	0x7
+#define PLL6553_KDIV_MASK	0xffff
+#define PLL6553_MDIV_SHIFT	16
+#define PLL6553_PDIV_SHIFT	8
+#define PLL6553_SDIV_SHIFT	0
+#define PLL6553_KDIV_SHIFT	0
+
+static unsigned long samsung_pll6553_recalc_rate(struct clk_hw *hw,
+						unsigned long parent_rate)
+{
+	struct samsung_clk_pll *pll = to_clk_pll(hw);
+	u32 mdiv, pdiv, sdiv, kdiv, pll_con0, pll_con1;
+	u64 fvco = parent_rate;
+
+	pll_con0 = __raw_readl(pll->con_reg);
+	pll_con1 = __raw_readl(pll->con_reg + 0x4);
+	mdiv = (pll_con0 >> PLL6553_MDIV_SHIFT) & PLL6553_MDIV_MASK;
+	pdiv = (pll_con0 >> PLL6553_PDIV_SHIFT) & PLL6553_PDIV_MASK;
+	sdiv = (pll_con0 >> PLL6553_SDIV_SHIFT) & PLL6553_SDIV_MASK;
+	kdiv = (pll_con1 >> PLL6553_KDIV_SHIFT) & PLL6553_KDIV_MASK;
+
+	fvco *= (mdiv << 16) + kdiv;
+	do_div(fvco, (pdiv << sdiv));
+	fvco >>= 16;
+
+	return (unsigned long)fvco;
+}
+
+static const struct clk_ops samsung_pll6553_clk_ops = {
+	.recalc_rate = samsung_pll6553_recalc_rate,
+};
+
 /*
  * PLL2550x Clock Type
  */
@@ -418,3 +709,117 @@
 
 	return clk;
 }
+
+static void __init _samsung_clk_register_pll(struct samsung_pll_clock *pll_clk,
+						void __iomem *base)
+{
+	struct samsung_clk_pll *pll;
+	struct clk *clk;
+	struct clk_init_data init;
+	int ret, len;
+
+	pll = kzalloc(sizeof(*pll), GFP_KERNEL);
+	if (!pll) {
+		pr_err("%s: could not allocate pll clk %s\n",
+			__func__, pll_clk->name);
+		return;
+	}
+
+	init.name = pll_clk->name;
+	init.flags = pll_clk->flags;
+	init.parent_names = &pll_clk->parent_name;
+	init.num_parents = 1;
+
+	if (pll_clk->rate_table) {
+		/* find count of rates in rate_table */
+		for (len = 0; pll_clk->rate_table[len].rate != 0; )
+			len++;
+
+		pll->rate_count = len;
+		pll->rate_table = kmemdup(pll_clk->rate_table,
+					pll->rate_count *
+					sizeof(struct samsung_pll_rate_table),
+					GFP_KERNEL);
+		WARN(!pll->rate_table,
+			"%s: could not allocate rate table for %s\n",
+			__func__, pll_clk->name);
+	}
+
+	switch (pll_clk->type) {
+	/* clk_ops for 35xx and 2550 are similar */
+	case pll_35xx:
+	case pll_2550:
+		if (!pll->rate_table)
+			init.ops = &samsung_pll35xx_clk_min_ops;
+		else
+			init.ops = &samsung_pll35xx_clk_ops;
+		break;
+	case pll_4500:
+		init.ops = &samsung_pll45xx_clk_min_ops;
+		break;
+	case pll_4502:
+	case pll_4508:
+		if (!pll->rate_table)
+			init.ops = &samsung_pll45xx_clk_min_ops;
+		else
+			init.ops = &samsung_pll45xx_clk_ops;
+		break;
+	/* clk_ops for 36xx and 2650 are similar */
+	case pll_36xx:
+	case pll_2650:
+		if (!pll->rate_table)
+			init.ops = &samsung_pll36xx_clk_min_ops;
+		else
+			init.ops = &samsung_pll36xx_clk_ops;
+		break;
+	case pll_6552:
+		init.ops = &samsung_pll6552_clk_ops;
+		break;
+	case pll_6553:
+		init.ops = &samsung_pll6553_clk_ops;
+		break;
+	case pll_4600:
+	case pll_4650:
+	case pll_4650c:
+		if (!pll->rate_table)
+			init.ops = &samsung_pll46xx_clk_min_ops;
+		else
+			init.ops = &samsung_pll46xx_clk_ops;
+		break;
+	default:
+		pr_warn("%s: Unknown pll type for pll clk %s\n",
+			__func__, pll_clk->name);
+	}
+
+	pll->hw.init = &init;
+	pll->type = pll_clk->type;
+	pll->lock_reg = base + pll_clk->lock_offset;
+	pll->con_reg = base + pll_clk->con_offset;
+
+	clk = clk_register(NULL, &pll->hw);
+	if (IS_ERR(clk)) {
+		pr_err("%s: failed to register pll clock %s : %ld\n",
+			__func__, pll_clk->name, PTR_ERR(clk));
+		kfree(pll);
+		return;
+	}
+
+	samsung_clk_add_lookup(clk, pll_clk->id);
+
+	if (!pll_clk->alias)
+		return;
+
+	ret = clk_register_clkdev(clk, pll_clk->alias, pll_clk->dev_name);
+	if (ret)
+		pr_err("%s: failed to register lookup for %s : %d",
+			__func__, pll_clk->name, ret);
+}
+
+void __init samsung_clk_register_pll(struct samsung_pll_clock *pll_list,
+				unsigned int nr_pll, void __iomem *base)
+{
+	int cnt;
+
+	for (cnt = 0; cnt < nr_pll; cnt++)
+		_samsung_clk_register_pll(&pll_list[cnt], base);
+}
diff --git a/drivers/clk/samsung/clk-pll.h b/drivers/clk/samsung/clk-pll.h
index f33786e..6c39030 100644
--- a/drivers/clk/samsung/clk-pll.h
+++ b/drivers/clk/samsung/clk-pll.h
@@ -12,28 +12,83 @@
 #ifndef __SAMSUNG_CLK_PLL_H
 #define __SAMSUNG_CLK_PLL_H
 
-enum pll45xx_type {
+enum samsung_pll_type {
+	pll_35xx,
+	pll_36xx,
+	pll_2550,
+	pll_2650,
 	pll_4500,
 	pll_4502,
-	pll_4508
-};
-
-enum pll46xx_type {
+	pll_4508,
 	pll_4600,
 	pll_4650,
 	pll_4650c,
+	pll_6552,
+	pll_6553,
 };
 
-extern struct clk * __init samsung_clk_register_pll35xx(const char *name,
-			const char *pname, const void __iomem *con_reg);
-extern struct clk * __init samsung_clk_register_pll36xx(const char *name,
-			const char *pname, const void __iomem *con_reg);
-extern struct clk * __init samsung_clk_register_pll45xx(const char *name,
-			const char *pname, const void __iomem *con_reg,
-			enum pll45xx_type type);
-extern struct clk * __init samsung_clk_register_pll46xx(const char *name,
-			const char *pname, const void __iomem *con_reg,
-			enum pll46xx_type type);
+#define PLL_35XX_RATE(_rate, _m, _p, _s)			\
+	{							\
+		.rate	=	(_rate),				\
+		.mdiv	=	(_m),				\
+		.pdiv	=	(_p),				\
+		.sdiv	=	(_s),				\
+	}
+
+#define PLL_36XX_RATE(_rate, _m, _p, _s, _k)			\
+	{							\
+		.rate	=	(_rate),				\
+		.mdiv	=	(_m),				\
+		.pdiv	=	(_p),				\
+		.sdiv	=	(_s),				\
+		.kdiv	=	(_k),				\
+	}
+
+#define PLL_45XX_RATE(_rate, _m, _p, _s, _afc)			\
+	{							\
+		.rate	=	(_rate),			\
+		.mdiv	=	(_m),				\
+		.pdiv	=	(_p),				\
+		.sdiv	=	(_s),				\
+		.afc	=	(_afc),				\
+	}
+
+#define PLL_4600_RATE(_rate, _m, _p, _s, _k, _vsel)		\
+	{							\
+		.rate	=	(_rate),			\
+		.mdiv	=	(_m),				\
+		.pdiv	=	(_p),				\
+		.sdiv	=	(_s),				\
+		.kdiv	=	(_k),				\
+		.vsel	=	(_vsel),			\
+	}
+
+#define PLL_4650_RATE(_rate, _m, _p, _s, _k, _mfr, _mrr, _vsel)	\
+	{							\
+		.rate	=	(_rate),			\
+		.mdiv	=	(_m),				\
+		.pdiv	=	(_p),				\
+		.sdiv	=	(_s),				\
+		.kdiv	=	(_k),				\
+		.mfr	=	(_mfr),				\
+		.mrr	=	(_mrr),				\
+		.vsel	=	(_vsel),			\
+	}
+
+/* NOTE: Rate table should be kept sorted in descending order. */
+
+struct samsung_pll_rate_table {
+	unsigned int rate;
+	unsigned int pdiv;
+	unsigned int mdiv;
+	unsigned int sdiv;
+	unsigned int kdiv;
+	unsigned int afc;
+	unsigned int mfr;
+	unsigned int mrr;
+	unsigned int vsel;
+};
+
 extern struct clk * __init samsung_clk_register_pll2550x(const char *name,
 			const char *pname, const void __iomem *reg_base,
 			const unsigned long offset);
diff --git a/drivers/clk/samsung/clk-s3c64xx.c b/drivers/clk/samsung/clk-s3c64xx.c
new file mode 100644
index 0000000..7d2c842
--- /dev/null
+++ b/drivers/clk/samsung/clk-s3c64xx.c
@@ -0,0 +1,473 @@
+/*
+ * Copyright (c) 2013 Tomasz Figa <tomasz.figa at gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Common Clock Framework support for all S3C64xx SoCs.
+*/
+
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/clk-provider.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <dt-bindings/clock/samsung,s3c64xx-clock.h>
+
+#include "clk.h"
+#include "clk-pll.h"
+
+/* S3C64xx clock controller register offsets. */
+#define APLL_LOCK		0x000
+#define MPLL_LOCK		0x004
+#define EPLL_LOCK		0x008
+#define APLL_CON		0x00c
+#define MPLL_CON		0x010
+#define EPLL_CON0		0x014
+#define EPLL_CON1		0x018
+#define CLK_SRC			0x01c
+#define CLK_DIV0		0x020
+#define CLK_DIV1		0x024
+#define CLK_DIV2		0x028
+#define HCLK_GATE		0x030
+#define PCLK_GATE		0x034
+#define SCLK_GATE		0x038
+#define MEM0_GATE		0x03c
+#define CLK_SRC2		0x10c
+#define OTHERS			0x900
+
+/* Helper macros to define clock arrays. */
+#define FIXED_RATE_CLOCKS(name)	\
+		static struct samsung_fixed_rate_clock name[]
+#define MUX_CLOCKS(name)	\
+		static struct samsung_mux_clock name[]
+#define DIV_CLOCKS(name)	\
+		static struct samsung_div_clock name[]
+#define GATE_CLOCKS(name)	\
+		static struct samsung_gate_clock name[]
+
+/* Helper macros for gate types present on S3C64xx. */
+#define GATE_BUS(_id, cname, pname, o, b) \
+		GATE(_id, cname, pname, o, b, 0, 0)
+#define GATE_SCLK(_id, cname, pname, o, b) \
+		GATE(_id, cname, pname, o, b, CLK_SET_RATE_PARENT, 0)
+#define GATE_ON(_id, cname, pname, o, b) \
+		GATE(_id, cname, pname, o, b, CLK_IGNORE_UNUSED, 0)
+
+/* list of PLLs to be registered */
+enum s3c64xx_plls {
+	apll, mpll, epll,
+};
+
+/*
+ * List of controller registers to be saved and restored during
+ * a suspend/resume cycle.
+ */
+static unsigned long s3c64xx_clk_regs[] __initdata = {
+	APLL_LOCK,
+	MPLL_LOCK,
+	EPLL_LOCK,
+	APLL_CON,
+	MPLL_CON,
+	EPLL_CON0,
+	EPLL_CON1,
+	CLK_SRC,
+	CLK_DIV0,
+	CLK_DIV1,
+	CLK_DIV2,
+	HCLK_GATE,
+	PCLK_GATE,
+	SCLK_GATE,
+};
+
+static unsigned long s3c6410_clk_regs[] __initdata = {
+	CLK_SRC2,
+	MEM0_GATE,
+};
+
+/* List of parent clocks common for all S3C64xx SoCs. */
+PNAME(spi_mmc_p)	= { "mout_epll", "dout_mpll", "fin_pll", "clk27m" };
+PNAME(uart_p)		= { "mout_epll", "dout_mpll" };
+PNAME(audio0_p)		= { "mout_epll", "dout_mpll", "fin_pll", "iiscdclk0",
+				"pcmcdclk0", "none", "none", "none" };
+PNAME(audio1_p)		= { "mout_epll", "dout_mpll", "fin_pll", "iiscdclk1",
+				"pcmcdclk0", "none", "none", "none" };
+PNAME(mfc_p)		= { "hclkx2", "mout_epll" };
+PNAME(apll_p)		= { "fin_pll", "fout_apll" };
+PNAME(mpll_p)		= { "fin_pll", "fout_mpll" };
+PNAME(epll_p)		= { "fin_pll", "fout_epll" };
+PNAME(hclkx2_p)		= { "mout_mpll", "mout_apll" };
+
+/* S3C6400-specific parent clocks. */
+PNAME(scaler_lcd_p6400)	= { "mout_epll", "dout_mpll", "none", "none" };
+PNAME(irda_p6400)	= { "mout_epll", "dout_mpll", "none", "clk48m" };
+PNAME(uhost_p6400)	= { "clk48m", "mout_epll", "dout_mpll", "none" };
+
+/* S3C6410-specific parent clocks. */
+PNAME(clk27_p6410)	= { "clk27m", "fin_pll" };
+PNAME(scaler_lcd_p6410)	= { "mout_epll", "dout_mpll", "fin_pll", "none" };
+PNAME(irda_p6410)	= { "mout_epll", "dout_mpll", "fin_pll", "clk48m" };
+PNAME(uhost_p6410)	= { "clk48m", "mout_epll", "dout_mpll", "fin_pll" };
+PNAME(audio2_p6410)	= { "mout_epll", "dout_mpll", "fin_pll", "iiscdclk2",
+				"pcmcdclk1", "none", "none", "none" };
+
+/* Fixed rate clocks generated outside the SoC. */
+FIXED_RATE_CLOCKS(s3c64xx_fixed_rate_ext_clks) __initdata = {
+	FRATE(0, "fin_pll", NULL, CLK_IS_ROOT, 0),
+	FRATE(0, "xusbxti", NULL, CLK_IS_ROOT, 0),
+};
+
+/* Fixed rate clocks generated inside the SoC. */
+FIXED_RATE_CLOCKS(s3c64xx_fixed_rate_clks) __initdata = {
+	FRATE(CLK27M, "clk27m", NULL, CLK_IS_ROOT, 27000000),
+	FRATE(CLK48M, "clk48m", NULL, CLK_IS_ROOT, 48000000),
+};
+
+/* List of clock muxes present on all S3C64xx SoCs. */
+MUX_CLOCKS(s3c64xx_mux_clks) __initdata = {
+	MUX_F(0, "mout_syncmux", hclkx2_p, OTHERS, 6, 1, 0, CLK_MUX_READ_ONLY),
+	MUX(MOUT_APLL, "mout_apll", apll_p, CLK_SRC, 0, 1),
+	MUX(MOUT_MPLL, "mout_mpll", mpll_p, CLK_SRC, 1, 1),
+	MUX(MOUT_EPLL, "mout_epll", epll_p, CLK_SRC, 2, 1),
+	MUX(MOUT_MFC, "mout_mfc", mfc_p, CLK_SRC, 4, 1),
+	MUX(MOUT_AUDIO0, "mout_audio0", audio0_p, CLK_SRC, 7, 3),
+	MUX(MOUT_AUDIO1, "mout_audio1", audio1_p, CLK_SRC, 10, 3),
+	MUX(MOUT_UART, "mout_uart", uart_p, CLK_SRC, 13, 1),
+	MUX(MOUT_SPI0, "mout_spi0", spi_mmc_p, CLK_SRC, 14, 2),
+	MUX(MOUT_SPI1, "mout_spi1", spi_mmc_p, CLK_SRC, 16, 2),
+	MUX(MOUT_MMC0, "mout_mmc0", spi_mmc_p, CLK_SRC, 18, 2),
+	MUX(MOUT_MMC1, "mout_mmc1", spi_mmc_p, CLK_SRC, 20, 2),
+	MUX(MOUT_MMC2, "mout_mmc2", spi_mmc_p, CLK_SRC, 22, 2),
+};
+
+/* List of clock muxes present on S3C6400. */
+MUX_CLOCKS(s3c6400_mux_clks) __initdata = {
+	MUX(MOUT_UHOST, "mout_uhost", uhost_p6400, CLK_SRC, 5, 2),
+	MUX(MOUT_IRDA, "mout_irda", irda_p6400, CLK_SRC, 24, 2),
+	MUX(MOUT_LCD, "mout_lcd", scaler_lcd_p6400, CLK_SRC, 26, 2),
+	MUX(MOUT_SCALER, "mout_scaler", scaler_lcd_p6400, CLK_SRC, 28, 2),
+};
+
+/* List of clock muxes present on S3C6410. */
+MUX_CLOCKS(s3c6410_mux_clks) __initdata = {
+	MUX(MOUT_UHOST, "mout_uhost", uhost_p6410, CLK_SRC, 5, 2),
+	MUX(MOUT_IRDA, "mout_irda", irda_p6410, CLK_SRC, 24, 2),
+	MUX(MOUT_LCD, "mout_lcd", scaler_lcd_p6410, CLK_SRC, 26, 2),
+	MUX(MOUT_SCALER, "mout_scaler", scaler_lcd_p6410, CLK_SRC, 28, 2),
+	MUX(MOUT_DAC27, "mout_dac27", clk27_p6410, CLK_SRC, 30, 1),
+	MUX(MOUT_TV27, "mout_tv27", clk27_p6410, CLK_SRC, 31, 1),
+	MUX(MOUT_AUDIO2, "mout_audio2", audio2_p6410, CLK_SRC2, 0, 3),
+};
+
+/* List of clock dividers present on all S3C64xx SoCs. */
+DIV_CLOCKS(s3c64xx_div_clks) __initdata = {
+	DIV(DOUT_MPLL, "dout_mpll", "mout_mpll", CLK_DIV0, 4, 1),
+	DIV(HCLKX2, "hclkx2", "mout_syncmux", CLK_DIV0, 9, 3),
+	DIV(HCLK, "hclk", "hclkx2", CLK_DIV0, 8, 1),
+	DIV(PCLK, "pclk", "hclkx2", CLK_DIV0, 12, 4),
+	DIV(DOUT_SECUR, "dout_secur", "hclkx2", CLK_DIV0, 18, 2),
+	DIV(DOUT_CAM, "dout_cam", "hclkx2", CLK_DIV0, 20, 4),
+	DIV(DOUT_JPEG, "dout_jpeg", "hclkx2", CLK_DIV0, 24, 4),
+	DIV(DOUT_MFC, "dout_mfc", "mout_mfc", CLK_DIV0, 28, 4),
+	DIV(DOUT_MMC0, "dout_mmc0", "mout_mmc0", CLK_DIV1, 0, 4),
+	DIV(DOUT_MMC1, "dout_mmc1", "mout_mmc1", CLK_DIV1, 4, 4),
+	DIV(DOUT_MMC2, "dout_mmc2", "mout_mmc2", CLK_DIV1, 8, 4),
+	DIV(DOUT_LCD, "dout_lcd", "mout_lcd", CLK_DIV1, 12, 4),
+	DIV(DOUT_SCALER, "dout_scaler", "mout_scaler", CLK_DIV1, 16, 4),
+	DIV(DOUT_UHOST, "dout_uhost", "mout_uhost", CLK_DIV1, 20, 4),
+	DIV(DOUT_SPI0, "dout_spi0", "mout_spi0", CLK_DIV2, 0, 4),
+	DIV(DOUT_SPI1, "dout_spi1", "mout_spi1", CLK_DIV2, 4, 4),
+	DIV(DOUT_AUDIO0, "dout_audio0", "mout_audio0", CLK_DIV2, 8, 4),
+	DIV(DOUT_AUDIO1, "dout_audio1", "mout_audio1", CLK_DIV2, 12, 4),
+	DIV(DOUT_UART, "dout_uart", "mout_uart", CLK_DIV2, 16, 4),
+	DIV(DOUT_IRDA, "dout_irda", "mout_irda", CLK_DIV2, 20, 4),
+};
+
+/* List of clock dividers present on S3C6400. */
+DIV_CLOCKS(s3c6400_div_clks) __initdata = {
+	DIV(ARMCLK, "armclk", "mout_apll", CLK_DIV0, 0, 3),
+};
+
+/* List of clock dividers present on S3C6410. */
+DIV_CLOCKS(s3c6410_div_clks) __initdata = {
+	DIV(ARMCLK, "armclk", "mout_apll", CLK_DIV0, 0, 4),
+	DIV(DOUT_FIMC, "dout_fimc", "hclk", CLK_DIV1, 24, 4),
+	DIV(DOUT_AUDIO2, "dout_audio2", "mout_audio2", CLK_DIV2, 24, 4),
+};
+
+/* List of clock gates present on all S3C64xx SoCs. */
+GATE_CLOCKS(s3c64xx_gate_clks) __initdata = {
+	GATE_BUS(HCLK_UHOST, "hclk_uhost", "hclk", HCLK_GATE, 29),
+	GATE_BUS(HCLK_SECUR, "hclk_secur", "hclk", HCLK_GATE, 28),
+	GATE_BUS(HCLK_SDMA1, "hclk_sdma1", "hclk", HCLK_GATE, 27),
+	GATE_BUS(HCLK_SDMA0, "hclk_sdma0", "hclk", HCLK_GATE, 26),
+	GATE_ON(HCLK_DDR1, "hclk_ddr1", "hclk", HCLK_GATE, 24),
+	GATE_BUS(HCLK_USB, "hclk_usb", "hclk", HCLK_GATE, 20),
+	GATE_BUS(HCLK_HSMMC2, "hclk_hsmmc2", "hclk", HCLK_GATE, 19),
+	GATE_BUS(HCLK_HSMMC1, "hclk_hsmmc1", "hclk", HCLK_GATE, 18),
+	GATE_BUS(HCLK_HSMMC0, "hclk_hsmmc0", "hclk", HCLK_GATE, 17),
+	GATE_BUS(HCLK_MDP, "hclk_mdp", "hclk", HCLK_GATE, 16),
+	GATE_BUS(HCLK_DHOST, "hclk_dhost", "hclk", HCLK_GATE, 15),
+	GATE_BUS(HCLK_IHOST, "hclk_ihost", "hclk", HCLK_GATE, 14),
+	GATE_BUS(HCLK_DMA1, "hclk_dma1", "hclk", HCLK_GATE, 13),
+	GATE_BUS(HCLK_DMA0, "hclk_dma0", "hclk", HCLK_GATE, 12),
+	GATE_BUS(HCLK_JPEG, "hclk_jpeg", "hclk", HCLK_GATE, 11),
+	GATE_BUS(HCLK_CAMIF, "hclk_camif", "hclk", HCLK_GATE, 10),
+	GATE_BUS(HCLK_SCALER, "hclk_scaler", "hclk", HCLK_GATE, 9),
+	GATE_BUS(HCLK_2D, "hclk_2d", "hclk", HCLK_GATE, 8),
+	GATE_BUS(HCLK_TV, "hclk_tv", "hclk", HCLK_GATE, 7),
+	GATE_BUS(HCLK_POST0, "hclk_post0", "hclk", HCLK_GATE, 5),
+	GATE_BUS(HCLK_ROT, "hclk_rot", "hclk", HCLK_GATE, 4),
+	GATE_BUS(HCLK_LCD, "hclk_lcd", "hclk", HCLK_GATE, 3),
+	GATE_BUS(HCLK_TZIC, "hclk_tzic", "hclk", HCLK_GATE, 2),
+	GATE_ON(HCLK_INTC, "hclk_intc", "hclk", HCLK_GATE, 1),
+	GATE_ON(PCLK_SKEY, "pclk_skey", "pclk", PCLK_GATE, 24),
+	GATE_ON(PCLK_CHIPID, "pclk_chipid", "pclk", PCLK_GATE, 23),
+	GATE_BUS(PCLK_SPI1, "pclk_spi1", "pclk", PCLK_GATE, 22),
+	GATE_BUS(PCLK_SPI0, "pclk_spi0", "pclk", PCLK_GATE, 21),
+	GATE_BUS(PCLK_HSIRX, "pclk_hsirx", "pclk", PCLK_GATE, 20),
+	GATE_BUS(PCLK_HSITX, "pclk_hsitx", "pclk", PCLK_GATE, 19),
+	GATE_ON(PCLK_GPIO, "pclk_gpio", "pclk", PCLK_GATE, 18),
+	GATE_BUS(PCLK_IIC0, "pclk_iic0", "pclk", PCLK_GATE, 17),
+	GATE_BUS(PCLK_IIS1, "pclk_iis1", "pclk", PCLK_GATE, 16),
+	GATE_BUS(PCLK_IIS0, "pclk_iis0", "pclk", PCLK_GATE, 15),
+	GATE_BUS(PCLK_AC97, "pclk_ac97", "pclk", PCLK_GATE, 14),
+	GATE_BUS(PCLK_TZPC, "pclk_tzpc", "pclk", PCLK_GATE, 13),
+	GATE_BUS(PCLK_TSADC, "pclk_tsadc", "pclk", PCLK_GATE, 12),
+	GATE_BUS(PCLK_KEYPAD, "pclk_keypad", "pclk", PCLK_GATE, 11),
+	GATE_BUS(PCLK_IRDA, "pclk_irda", "pclk", PCLK_GATE, 10),
+	GATE_BUS(PCLK_PCM1, "pclk_pcm1", "pclk", PCLK_GATE, 9),
+	GATE_BUS(PCLK_PCM0, "pclk_pcm0", "pclk", PCLK_GATE, 8),
+	GATE_BUS(PCLK_PWM, "pclk_pwm", "pclk", PCLK_GATE, 7),
+	GATE_BUS(PCLK_RTC, "pclk_rtc", "pclk", PCLK_GATE, 6),
+	GATE_BUS(PCLK_WDT, "pclk_wdt", "pclk", PCLK_GATE, 5),
+	GATE_BUS(PCLK_UART3, "pclk_uart3", "pclk", PCLK_GATE, 4),
+	GATE_BUS(PCLK_UART2, "pclk_uart2", "pclk", PCLK_GATE, 3),
+	GATE_BUS(PCLK_UART1, "pclk_uart1", "pclk", PCLK_GATE, 2),
+	GATE_BUS(PCLK_UART0, "pclk_uart0", "pclk", PCLK_GATE, 1),
+	GATE_BUS(PCLK_MFC, "pclk_mfc", "pclk", PCLK_GATE, 0),
+	GATE_SCLK(SCLK_UHOST, "sclk_uhost", "dout_uhost", SCLK_GATE, 30),
+	GATE_SCLK(SCLK_MMC2_48, "sclk_mmc2_48", "clk48m", SCLK_GATE, 29),
+	GATE_SCLK(SCLK_MMC1_48, "sclk_mmc1_48", "clk48m", SCLK_GATE, 28),
+	GATE_SCLK(SCLK_MMC0_48, "sclk_mmc0_48", "clk48m", SCLK_GATE, 27),
+	GATE_SCLK(SCLK_MMC2, "sclk_mmc2", "dout_mmc2", SCLK_GATE, 26),
+	GATE_SCLK(SCLK_MMC1, "sclk_mmc1", "dout_mmc1", SCLK_GATE, 25),
+	GATE_SCLK(SCLK_MMC0, "sclk_mmc0", "dout_mmc0", SCLK_GATE, 24),
+	GATE_SCLK(SCLK_SPI1_48, "sclk_spi1_48", "clk48m", SCLK_GATE, 23),
+	GATE_SCLK(SCLK_SPI0_48, "sclk_spi0_48", "clk48m", SCLK_GATE, 22),
+	GATE_SCLK(SCLK_SPI1, "sclk_spi1", "dout_spi1", SCLK_GATE, 21),
+	GATE_SCLK(SCLK_SPI0, "sclk_spi0", "dout_spi0", SCLK_GATE, 20),
+	GATE_SCLK(SCLK_DAC27, "sclk_dac27", "mout_dac27", SCLK_GATE, 19),
+	GATE_SCLK(SCLK_TV27, "sclk_tv27", "mout_tv27", SCLK_GATE, 18),
+	GATE_SCLK(SCLK_SCALER27, "sclk_scaler27", "clk27m", SCLK_GATE, 17),
+	GATE_SCLK(SCLK_SCALER, "sclk_scaler", "dout_scaler", SCLK_GATE, 16),
+	GATE_SCLK(SCLK_LCD27, "sclk_lcd27", "clk27m", SCLK_GATE, 15),
+	GATE_SCLK(SCLK_LCD, "sclk_lcd", "dout_lcd", SCLK_GATE, 14),
+	GATE_SCLK(SCLK_POST0_27, "sclk_post0_27", "clk27m", SCLK_GATE, 12),
+	GATE_SCLK(SCLK_POST0, "sclk_post0", "dout_lcd", SCLK_GATE, 10),
+	GATE_SCLK(SCLK_AUDIO1, "sclk_audio1", "dout_audio1", SCLK_GATE, 9),
+	GATE_SCLK(SCLK_AUDIO0, "sclk_audio0", "dout_audio0", SCLK_GATE, 8),
+	GATE_SCLK(SCLK_SECUR, "sclk_secur", "dout_secur", SCLK_GATE, 7),
+	GATE_SCLK(SCLK_IRDA, "sclk_irda", "dout_irda", SCLK_GATE, 6),
+	GATE_SCLK(SCLK_UART, "sclk_uart", "dout_uart", SCLK_GATE, 5),
+	GATE_SCLK(SCLK_MFC, "sclk_mfc", "dout_mfc", SCLK_GATE, 3),
+	GATE_SCLK(SCLK_CAM, "sclk_cam", "dout_cam", SCLK_GATE, 2),
+	GATE_SCLK(SCLK_JPEG, "sclk_jpeg", "dout_jpeg", SCLK_GATE, 1),
+};
+
+/* List of clock gates present on S3C6400. */
+GATE_CLOCKS(s3c6400_gate_clks) __initdata = {
+	GATE_ON(HCLK_DDR0, "hclk_ddr0", "hclk", HCLK_GATE, 23),
+	GATE_SCLK(SCLK_ONENAND, "sclk_onenand", "parent", SCLK_GATE, 4),
+};
+
+/* List of clock gates present on S3C6410. */
+GATE_CLOCKS(s3c6410_gate_clks) __initdata = {
+	GATE_BUS(HCLK_3DSE, "hclk_3dse", "hclk", HCLK_GATE, 31),
+	GATE_ON(HCLK_IROM, "hclk_irom", "hclk", HCLK_GATE, 25),
+	GATE_ON(HCLK_MEM1, "hclk_mem1", "hclk", HCLK_GATE, 22),
+	GATE_ON(HCLK_MEM0, "hclk_mem0", "hclk", HCLK_GATE, 21),
+	GATE_BUS(HCLK_MFC, "hclk_mfc", "hclk", HCLK_GATE, 0),
+	GATE_BUS(PCLK_IIC1, "pclk_iic1", "pclk", PCLK_GATE, 27),
+	GATE_BUS(PCLK_IIS2, "pclk_iis2", "pclk", PCLK_GATE, 26),
+	GATE_SCLK(SCLK_FIMC, "sclk_fimc", "dout_fimc", SCLK_GATE, 13),
+	GATE_SCLK(SCLK_AUDIO2, "sclk_audio2", "dout_audio2", SCLK_GATE, 11),
+	GATE_BUS(MEM0_CFCON, "mem0_cfcon", "hclk_mem0", MEM0_GATE, 5),
+	GATE_BUS(MEM0_ONENAND1, "mem0_onenand1", "hclk_mem0", MEM0_GATE, 4),
+	GATE_BUS(MEM0_ONENAND0, "mem0_onenand0", "hclk_mem0", MEM0_GATE, 3),
+	GATE_BUS(MEM0_NFCON, "mem0_nfcon", "hclk_mem0", MEM0_GATE, 2),
+	GATE_ON(MEM0_SROM, "mem0_srom", "hclk_mem0", MEM0_GATE, 1),
+};
+
+/* List of PLL clocks. */
+static struct samsung_pll_clock s3c64xx_pll_clks[] __initdata = {
+	[apll] = PLL(pll_6552, FOUT_APLL, "fout_apll", "fin_pll",
+						APLL_LOCK, APLL_CON, NULL),
+	[mpll] = PLL(pll_6552, FOUT_MPLL, "fout_mpll", "fin_pll",
+						MPLL_LOCK, MPLL_CON, NULL),
+	[epll] = PLL(pll_6553, FOUT_EPLL, "fout_epll", "fin_pll",
+						EPLL_LOCK, EPLL_CON0, NULL),
+};
+
+/* Aliases for common s3c64xx clocks. */
+static struct samsung_clock_alias s3c64xx_clock_aliases[] = {
+	ALIAS(FOUT_APLL, NULL, "fout_apll"),
+	ALIAS(FOUT_MPLL, NULL, "fout_mpll"),
+	ALIAS(FOUT_EPLL, NULL, "fout_epll"),
+	ALIAS(MOUT_EPLL, NULL, "mout_epll"),
+	ALIAS(DOUT_MPLL, NULL, "dout_mpll"),
+	ALIAS(HCLKX2, NULL, "hclk2"),
+	ALIAS(HCLK, NULL, "hclk"),
+	ALIAS(PCLK, NULL, "pclk"),
+	ALIAS(PCLK, NULL, "clk_uart_baud2"),
+	ALIAS(ARMCLK, NULL, "armclk"),
+	ALIAS(HCLK_UHOST, "s3c2410-ohci", "usb-host"),
+	ALIAS(HCLK_USB, "s3c-hsotg", "otg"),
+	ALIAS(HCLK_HSMMC2, "s3c-sdhci.2", "hsmmc"),
+	ALIAS(HCLK_HSMMC2, "s3c-sdhci.2", "mmc_busclk.0"),
+	ALIAS(HCLK_HSMMC1, "s3c-sdhci.1", "hsmmc"),
+	ALIAS(HCLK_HSMMC1, "s3c-sdhci.1", "mmc_busclk.0"),
+	ALIAS(HCLK_HSMMC0, "s3c-sdhci.0", "hsmmc"),
+	ALIAS(HCLK_HSMMC0, "s3c-sdhci.0", "mmc_busclk.0"),
+	ALIAS(HCLK_DMA1, NULL, "dma1"),
+	ALIAS(HCLK_DMA0, NULL, "dma0"),
+	ALIAS(HCLK_CAMIF, "s3c-camif", "camif"),
+	ALIAS(HCLK_LCD, "s3c-fb", "lcd"),
+	ALIAS(PCLK_SPI1, "s3c6410-spi.1", "spi"),
+	ALIAS(PCLK_SPI0, "s3c6410-spi.0", "spi"),
+	ALIAS(PCLK_IIC0, "s3c2440-i2c.0", "i2c"),
+	ALIAS(PCLK_IIS1, "samsung-i2s.1", "iis"),
+	ALIAS(PCLK_IIS0, "samsung-i2s.0", "iis"),
+	ALIAS(PCLK_AC97, "samsung-ac97", "ac97"),
+	ALIAS(PCLK_TSADC, "s3c64xx-adc", "adc"),
+	ALIAS(PCLK_KEYPAD, "samsung-keypad", "keypad"),
+	ALIAS(PCLK_PCM1, "samsung-pcm.1", "pcm"),
+	ALIAS(PCLK_PCM0, "samsung-pcm.0", "pcm"),
+	ALIAS(PCLK_PWM, NULL, "timers"),
+	ALIAS(PCLK_RTC, "s3c64xx-rtc", "rtc"),
+	ALIAS(PCLK_WDT, NULL, "watchdog"),
+	ALIAS(PCLK_UART3, "s3c6400-uart.3", "uart"),
+	ALIAS(PCLK_UART2, "s3c6400-uart.2", "uart"),
+	ALIAS(PCLK_UART1, "s3c6400-uart.1", "uart"),
+	ALIAS(PCLK_UART0, "s3c6400-uart.0", "uart"),
+	ALIAS(SCLK_UHOST, "s3c2410-ohci", "usb-bus-host"),
+	ALIAS(SCLK_MMC2, "s3c-sdhci.2", "mmc_busclk.2"),
+	ALIAS(SCLK_MMC1, "s3c-sdhci.1", "mmc_busclk.2"),
+	ALIAS(SCLK_MMC0, "s3c-sdhci.0", "mmc_busclk.2"),
+	ALIAS(SCLK_SPI1, "s3c6410-spi.1", "spi-bus"),
+	ALIAS(SCLK_SPI0, "s3c6410-spi.0", "spi-bus"),
+	ALIAS(SCLK_AUDIO1, "samsung-pcm.1", "audio-bus"),
+	ALIAS(SCLK_AUDIO1, "samsung-i2s.1", "audio-bus"),
+	ALIAS(SCLK_AUDIO0, "samsung-pcm.0", "audio-bus"),
+	ALIAS(SCLK_AUDIO0, "samsung-i2s.0", "audio-bus"),
+	ALIAS(SCLK_UART, NULL, "clk_uart_baud3"),
+	ALIAS(SCLK_CAM, "s3c-camif", "camera"),
+};
+
+/* Aliases for s3c6400-specific clocks. */
+static struct samsung_clock_alias s3c6400_clock_aliases[] = {
+	/* Nothing to place here yet. */
+};
+
+/* Aliases for s3c6410-specific clocks. */
+static struct samsung_clock_alias s3c6410_clock_aliases[] = {
+	ALIAS(PCLK_IIC1, "s3c2440-i2c.1", "i2c"),
+	ALIAS(PCLK_IIS2, "samsung-i2s.2", "iis"),
+	ALIAS(SCLK_FIMC, "s3c-camif", "fimc"),
+	ALIAS(SCLK_AUDIO2, "samsung-i2s.2", "audio-bus"),
+	ALIAS(MEM0_SROM, NULL, "srom"),
+};
+
+static void __init s3c64xx_clk_register_fixed_ext(unsigned long fin_pll_f,
+							unsigned long xusbxti_f)
+{
+	s3c64xx_fixed_rate_ext_clks[0].fixed_rate = fin_pll_f;
+	s3c64xx_fixed_rate_ext_clks[1].fixed_rate = xusbxti_f;
+	samsung_clk_register_fixed_rate(s3c64xx_fixed_rate_ext_clks,
+				ARRAY_SIZE(s3c64xx_fixed_rate_ext_clks));
+}
+
+/* Register s3c64xx clocks. */
+void __init s3c64xx_clk_init(struct device_node *np, unsigned long xtal_f,
+			     unsigned long xusbxti_f, bool is_s3c6400,
+			     void __iomem *reg_base)
+{
+	unsigned long *soc_regs = NULL;
+	unsigned long nr_soc_regs = 0;
+
+	if (np) {
+		reg_base = of_iomap(np, 0);
+		if (!reg_base)
+			panic("%s: failed to map registers\n", __func__);
+	}
+
+	if (!is_s3c6400) {
+		soc_regs = s3c6410_clk_regs;
+		nr_soc_regs = ARRAY_SIZE(s3c6410_clk_regs);
+	}
+
+	samsung_clk_init(np, reg_base, NR_CLKS, s3c64xx_clk_regs,
+			ARRAY_SIZE(s3c64xx_clk_regs), soc_regs, nr_soc_regs);
+
+	/* Register external clocks. */
+	if (!np)
+		s3c64xx_clk_register_fixed_ext(xtal_f, xusbxti_f);
+
+	/* Register PLLs. */
+	samsung_clk_register_pll(s3c64xx_pll_clks,
+				ARRAY_SIZE(s3c64xx_pll_clks), reg_base);
+
+	/* Register common internal clocks. */
+	samsung_clk_register_fixed_rate(s3c64xx_fixed_rate_clks,
+					ARRAY_SIZE(s3c64xx_fixed_rate_clks));
+	samsung_clk_register_mux(s3c64xx_mux_clks,
+					ARRAY_SIZE(s3c64xx_mux_clks));
+	samsung_clk_register_div(s3c64xx_div_clks,
+					ARRAY_SIZE(s3c64xx_div_clks));
+	samsung_clk_register_gate(s3c64xx_gate_clks,
+					ARRAY_SIZE(s3c64xx_gate_clks));
+
+	/* Register SoC-specific clocks. */
+	if (is_s3c6400) {
+		samsung_clk_register_mux(s3c6400_mux_clks,
+					ARRAY_SIZE(s3c6400_mux_clks));
+		samsung_clk_register_div(s3c6400_div_clks,
+					ARRAY_SIZE(s3c6400_div_clks));
+		samsung_clk_register_gate(s3c6400_gate_clks,
+					ARRAY_SIZE(s3c6400_gate_clks));
+		samsung_clk_register_alias(s3c6400_clock_aliases,
+					ARRAY_SIZE(s3c6400_clock_aliases));
+	} else {
+		samsung_clk_register_mux(s3c6410_mux_clks,
+					ARRAY_SIZE(s3c6410_mux_clks));
+		samsung_clk_register_div(s3c6410_div_clks,
+					ARRAY_SIZE(s3c6410_div_clks));
+		samsung_clk_register_gate(s3c6410_gate_clks,
+					ARRAY_SIZE(s3c6410_gate_clks));
+		samsung_clk_register_alias(s3c6410_clock_aliases,
+					ARRAY_SIZE(s3c6410_clock_aliases));
+	}
+
+	samsung_clk_register_alias(s3c64xx_clock_aliases,
+					ARRAY_SIZE(s3c64xx_clock_aliases));
+
+	pr_info("%s clocks: apll = %lu, mpll = %lu\n"
+		"\tepll = %lu, arm_clk = %lu\n",
+		is_s3c6400 ? "S3C6400" : "S3C6410",
+		_get_rate("fout_apll"),	_get_rate("fout_mpll"),
+		_get_rate("fout_epll"), _get_rate("armclk"));
+}
+
+static void __init s3c6400_clk_init(struct device_node *np)
+{
+	s3c64xx_clk_init(np, 0, 0, true, NULL);
+}
+CLK_OF_DECLARE(s3c6400_clk, "samsung,s3c6400-clock", s3c6400_clk_init);
+
+static void __init s3c6410_clk_init(struct device_node *np)
+{
+	s3c64xx_clk_init(np, 0, 0, false, NULL);
+}
+CLK_OF_DECLARE(s3c6410_clk, "samsung,s3c6410-clock", s3c6410_clk_init);
diff --git a/drivers/clk/samsung/clk.c b/drivers/clk/samsung/clk.c
index cd3c40a..f503f32 100644
--- a/drivers/clk/samsung/clk.c
+++ b/drivers/clk/samsung/clk.c
@@ -307,14 +307,12 @@
 unsigned long _get_rate(const char *clk_name)
 {
 	struct clk *clk;
-	unsigned long rate;
 
-	clk = clk_get(NULL, clk_name);
-	if (IS_ERR(clk)) {
+	clk = __clk_lookup(clk_name);
+	if (!clk) {
 		pr_err("%s: could not find clock %s\n", __func__, clk_name);
 		return 0;
 	}
-	rate = clk_get_rate(clk);
-	clk_put(clk);
-	return rate;
+
+	return clk_get_rate(clk);
 }
diff --git a/drivers/clk/samsung/clk.h b/drivers/clk/samsung/clk.h
index 2f7dba2..31b4174 100644
--- a/drivers/clk/samsung/clk.h
+++ b/drivers/clk/samsung/clk.h
@@ -19,6 +19,7 @@
 #include <linux/clk-provider.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include "clk-pll.h"
 
 /**
  * struct samsung_clock_alias: information about mux clock
@@ -39,6 +40,8 @@
 		.alias		= a,				\
 	}
 
+#define MHZ (1000 * 1000)
+
 /**
  * struct samsung_fixed_rate_clock: information about fixed-rate clock
  * @id: platform specific id of the clock.
@@ -127,7 +130,7 @@
 		.name		= cname,			\
 		.parent_names	= pnames,			\
 		.num_parents	= ARRAY_SIZE(pnames),		\
-		.flags		= f,				\
+		.flags		= (f) | CLK_SET_RATE_NO_REPARENT, \
 		.offset		= o,				\
 		.shift		= s,				\
 		.width		= w,				\
@@ -261,6 +264,54 @@
 	u32	value;
 };
 
+/**
+ * struct samsung_pll_clock: information about pll clock
+ * @id: platform specific id of the clock.
+ * @dev_name: name of the device to which this clock belongs.
+ * @name: name of this pll clock.
+ * @parent_name: name of the parent clock.
+ * @flags: optional flags for basic clock.
+ * @con_offset: offset of the register for configuring the PLL.
+ * @lock_offset: offset of the register for locking the PLL.
+ * @type: Type of PLL to be registered.
+ * @alias: optional clock alias name to be assigned to this clock.
+ */
+struct samsung_pll_clock {
+	unsigned int		id;
+	const char		*dev_name;
+	const char		*name;
+	const char		*parent_name;
+	unsigned long		flags;
+	int			con_offset;
+	int			lock_offset;
+	enum samsung_pll_type	type;
+	const struct samsung_pll_rate_table *rate_table;
+	const char              *alias;
+};
+
+#define __PLL(_typ, _id, _dname, _name, _pname, _flags, _lock, _con,	\
+		_rtable, _alias)					\
+	{								\
+		.id		= _id,					\
+		.type		= _typ,					\
+		.dev_name	= _dname,				\
+		.name		= _name,				\
+		.parent_name	= _pname,				\
+		.flags		= CLK_GET_RATE_NOCACHE,			\
+		.con_offset	= _con,					\
+		.lock_offset	= _lock,				\
+		.rate_table	= _rtable,				\
+		.alias		= _alias,				\
+	}
+
+#define PLL(_typ, _id, _name, _pname, _lock, _con, _rtable)	\
+	__PLL(_typ, _id, NULL, _name, _pname, CLK_GET_RATE_NOCACHE,	\
+		_lock, _con, _rtable, _name)
+
+#define PLL_A(_typ, _id, _name, _pname, _lock, _con, _alias, _rtable) \
+	__PLL(_typ, _id, NULL, _name, _pname, CLK_GET_RATE_NOCACHE,	\
+		_lock, _con, _rtable, _alias)
+
 extern void __init samsung_clk_init(struct device_node *np, void __iomem *base,
 		unsigned long nr_clks, unsigned long *rdump,
 		unsigned long nr_rdump, unsigned long *soc_rdump,
@@ -284,6 +335,8 @@
 		unsigned int nr_clk);
 extern void __init samsung_clk_register_gate(
 		struct samsung_gate_clock *clk_list, unsigned int nr_clk);
+extern void __init samsung_clk_register_pll(struct samsung_pll_clock *pll_list,
+		unsigned int nr_clk, void __iomem *base);
 
 extern unsigned long _get_rate(const char *clk_name);
 
diff --git a/drivers/clk/spear/spear1310_clock.c b/drivers/clk/spear/spear1310_clock.c
index aedbbe1..65894f7 100644
--- a/drivers/clk/spear/spear1310_clock.c
+++ b/drivers/clk/spear/spear1310_clock.c
@@ -416,9 +416,9 @@
 	/* clock derived from 24 or 25 MHz osc clk */
 	/* vco-pll */
 	clk = clk_register_mux(NULL, "vco1_mclk", vco_parents,
-			ARRAY_SIZE(vco_parents), 0, SPEAR1310_PLL_CFG,
-			SPEAR1310_PLL1_CLK_SHIFT, SPEAR1310_PLL_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(vco_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_PLL_CFG, SPEAR1310_PLL1_CLK_SHIFT,
+			SPEAR1310_PLL_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "vco1_mclk", NULL);
 	clk = clk_register_vco_pll("vco1_clk", "pll1_clk", NULL, "vco1_mclk",
 			0, SPEAR1310_PLL1_CTR, SPEAR1310_PLL1_FRQ, pll_rtbl,
@@ -427,9 +427,9 @@
 	clk_register_clkdev(clk1, "pll1_clk", NULL);
 
 	clk = clk_register_mux(NULL, "vco2_mclk", vco_parents,
-			ARRAY_SIZE(vco_parents), 0, SPEAR1310_PLL_CFG,
-			SPEAR1310_PLL2_CLK_SHIFT, SPEAR1310_PLL_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(vco_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_PLL_CFG, SPEAR1310_PLL2_CLK_SHIFT,
+			SPEAR1310_PLL_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "vco2_mclk", NULL);
 	clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, "vco2_mclk",
 			0, SPEAR1310_PLL2_CTR, SPEAR1310_PLL2_FRQ, pll_rtbl,
@@ -438,9 +438,9 @@
 	clk_register_clkdev(clk1, "pll2_clk", NULL);
 
 	clk = clk_register_mux(NULL, "vco3_mclk", vco_parents,
-			ARRAY_SIZE(vco_parents), 0, SPEAR1310_PLL_CFG,
-			SPEAR1310_PLL3_CLK_SHIFT, SPEAR1310_PLL_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(vco_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_PLL_CFG, SPEAR1310_PLL3_CLK_SHIFT,
+			SPEAR1310_PLL_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "vco3_mclk", NULL);
 	clk = clk_register_vco_pll("vco3_clk", "pll3_clk", NULL, "vco3_mclk",
 			0, SPEAR1310_PLL3_CTR, SPEAR1310_PLL3_FRQ, pll_rtbl,
@@ -515,9 +515,9 @@
 
 	/* gpt clocks */
 	clk = clk_register_mux(NULL, "gpt0_mclk", gpt_parents,
-			ARRAY_SIZE(gpt_parents), 0, SPEAR1310_PERIP_CLK_CFG,
-			SPEAR1310_GPT0_CLK_SHIFT, SPEAR1310_GPT_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(gpt_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_PERIP_CLK_CFG, SPEAR1310_GPT0_CLK_SHIFT,
+			SPEAR1310_GPT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "gpt0_mclk", NULL);
 	clk = clk_register_gate(NULL, "gpt0_clk", "gpt0_mclk", 0,
 			SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_GPT0_CLK_ENB, 0,
@@ -525,9 +525,9 @@
 	clk_register_clkdev(clk, NULL, "gpt0");
 
 	clk = clk_register_mux(NULL, "gpt1_mclk", gpt_parents,
-			ARRAY_SIZE(gpt_parents), 0, SPEAR1310_PERIP_CLK_CFG,
-			SPEAR1310_GPT1_CLK_SHIFT, SPEAR1310_GPT_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(gpt_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_PERIP_CLK_CFG, SPEAR1310_GPT1_CLK_SHIFT,
+			SPEAR1310_GPT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "gpt1_mclk", NULL);
 	clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mclk", 0,
 			SPEAR1310_PERIP1_CLK_ENB, SPEAR1310_GPT1_CLK_ENB, 0,
@@ -535,9 +535,9 @@
 	clk_register_clkdev(clk, NULL, "gpt1");
 
 	clk = clk_register_mux(NULL, "gpt2_mclk", gpt_parents,
-			ARRAY_SIZE(gpt_parents), 0, SPEAR1310_PERIP_CLK_CFG,
-			SPEAR1310_GPT2_CLK_SHIFT, SPEAR1310_GPT_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(gpt_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_PERIP_CLK_CFG, SPEAR1310_GPT2_CLK_SHIFT,
+			SPEAR1310_GPT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "gpt2_mclk", NULL);
 	clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mclk", 0,
 			SPEAR1310_PERIP2_CLK_ENB, SPEAR1310_GPT2_CLK_ENB, 0,
@@ -545,9 +545,9 @@
 	clk_register_clkdev(clk, NULL, "gpt2");
 
 	clk = clk_register_mux(NULL, "gpt3_mclk", gpt_parents,
-			ARRAY_SIZE(gpt_parents), 0, SPEAR1310_PERIP_CLK_CFG,
-			SPEAR1310_GPT3_CLK_SHIFT, SPEAR1310_GPT_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(gpt_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_PERIP_CLK_CFG, SPEAR1310_GPT3_CLK_SHIFT,
+			SPEAR1310_GPT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "gpt3_mclk", NULL);
 	clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mclk", 0,
 			SPEAR1310_PERIP2_CLK_ENB, SPEAR1310_GPT3_CLK_ENB, 0,
@@ -562,7 +562,8 @@
 	clk_register_clkdev(clk1, "uart_syn_gclk", NULL);
 
 	clk = clk_register_mux(NULL, "uart0_mclk", uart0_parents,
-			ARRAY_SIZE(uart0_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(uart0_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			SPEAR1310_PERIP_CLK_CFG, SPEAR1310_UART_CLK_SHIFT,
 			SPEAR1310_UART_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "uart0_mclk", NULL);
@@ -602,7 +603,8 @@
 	clk_register_clkdev(clk1, "c3_syn_gclk", NULL);
 
 	clk = clk_register_mux(NULL, "c3_mclk", c3_parents,
-			ARRAY_SIZE(c3_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(c3_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			SPEAR1310_PERIP_CLK_CFG, SPEAR1310_C3_CLK_SHIFT,
 			SPEAR1310_C3_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "c3_mclk", NULL);
@@ -614,8 +616,8 @@
 
 	/* gmac */
 	clk = clk_register_mux(NULL, "phy_input_mclk", gmac_phy_input_parents,
-			ARRAY_SIZE(gmac_phy_input_parents), 0,
-			SPEAR1310_GMAC_CLK_CFG,
+			ARRAY_SIZE(gmac_phy_input_parents),
+			CLK_SET_RATE_NO_REPARENT, SPEAR1310_GMAC_CLK_CFG,
 			SPEAR1310_GMAC_PHY_INPUT_CLK_SHIFT,
 			SPEAR1310_GMAC_PHY_INPUT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "phy_input_mclk", NULL);
@@ -627,15 +629,16 @@
 	clk_register_clkdev(clk1, "phy_syn_gclk", NULL);
 
 	clk = clk_register_mux(NULL, "phy_mclk", gmac_phy_parents,
-			ARRAY_SIZE(gmac_phy_parents), 0,
+			ARRAY_SIZE(gmac_phy_parents), CLK_SET_RATE_NO_REPARENT,
 			SPEAR1310_PERIP_CLK_CFG, SPEAR1310_GMAC_PHY_CLK_SHIFT,
 			SPEAR1310_GMAC_PHY_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "stmmacphy.0", NULL);
 
 	/* clcd */
 	clk = clk_register_mux(NULL, "clcd_syn_mclk", clcd_synth_parents,
-			ARRAY_SIZE(clcd_synth_parents), 0,
-			SPEAR1310_CLCD_CLK_SYNT, SPEAR1310_CLCD_SYNT_CLK_SHIFT,
+			ARRAY_SIZE(clcd_synth_parents),
+			CLK_SET_RATE_NO_REPARENT, SPEAR1310_CLCD_CLK_SYNT,
+			SPEAR1310_CLCD_SYNT_CLK_SHIFT,
 			SPEAR1310_CLCD_SYNT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "clcd_syn_mclk", NULL);
 
@@ -645,7 +648,8 @@
 	clk_register_clkdev(clk, "clcd_syn_clk", NULL);
 
 	clk = clk_register_mux(NULL, "clcd_pixel_mclk", clcd_pixel_parents,
-			ARRAY_SIZE(clcd_pixel_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(clcd_pixel_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			SPEAR1310_PERIP_CLK_CFG, SPEAR1310_CLCD_CLK_SHIFT,
 			SPEAR1310_CLCD_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "clcd_pixel_mclk", NULL);
@@ -657,9 +661,9 @@
 
 	/* i2s */
 	clk = clk_register_mux(NULL, "i2s_src_mclk", i2s_src_parents,
-			ARRAY_SIZE(i2s_src_parents), 0, SPEAR1310_I2S_CLK_CFG,
-			SPEAR1310_I2S_SRC_CLK_SHIFT, SPEAR1310_I2S_SRC_CLK_MASK,
-			0, &_lock);
+			ARRAY_SIZE(i2s_src_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_I2S_CLK_CFG, SPEAR1310_I2S_SRC_CLK_SHIFT,
+			SPEAR1310_I2S_SRC_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "i2s_src_mclk", NULL);
 
 	clk = clk_register_aux("i2s_prs1_clk", NULL, "i2s_src_mclk", 0,
@@ -668,7 +672,8 @@
 	clk_register_clkdev(clk, "i2s_prs1_clk", NULL);
 
 	clk = clk_register_mux(NULL, "i2s_ref_mclk", i2s_ref_parents,
-			ARRAY_SIZE(i2s_ref_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(i2s_ref_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			SPEAR1310_I2S_CLK_CFG, SPEAR1310_I2S_REF_SHIFT,
 			SPEAR1310_I2S_REF_SEL_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "i2s_ref_mclk", NULL);
@@ -806,13 +811,15 @@
 
 	/* RAS clks */
 	clk = clk_register_mux(NULL, "gen_syn0_1_mclk", gen_synth0_1_parents,
-			ARRAY_SIZE(gen_synth0_1_parents), 0, SPEAR1310_PLL_CFG,
+			ARRAY_SIZE(gen_synth0_1_parents),
+			CLK_SET_RATE_NO_REPARENT, SPEAR1310_PLL_CFG,
 			SPEAR1310_RAS_SYNT0_1_CLK_SHIFT,
 			SPEAR1310_RAS_SYNT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "gen_syn0_1_clk", NULL);
 
 	clk = clk_register_mux(NULL, "gen_syn2_3_mclk", gen_synth2_3_parents,
-			ARRAY_SIZE(gen_synth2_3_parents), 0, SPEAR1310_PLL_CFG,
+			ARRAY_SIZE(gen_synth2_3_parents),
+			CLK_SET_RATE_NO_REPARENT, SPEAR1310_PLL_CFG,
 			SPEAR1310_RAS_SYNT2_3_CLK_SHIFT,
 			SPEAR1310_RAS_SYNT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "gen_syn2_3_clk", NULL);
@@ -929,8 +936,8 @@
 
 	clk = clk_register_mux(NULL, "smii_rgmii_phy_mclk",
 			smii_rgmii_phy_parents,
-			ARRAY_SIZE(smii_rgmii_phy_parents), 0,
-			SPEAR1310_RAS_CTRL_REG1,
+			ARRAY_SIZE(smii_rgmii_phy_parents),
+			CLK_SET_RATE_NO_REPARENT, SPEAR1310_RAS_CTRL_REG1,
 			SPEAR1310_SMII_RGMII_PHY_CLK_SHIFT,
 			SPEAR1310_PHY_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "stmmacphy.1", NULL);
@@ -938,15 +945,15 @@
 	clk_register_clkdev(clk, "stmmacphy.4", NULL);
 
 	clk = clk_register_mux(NULL, "rmii_phy_mclk", rmii_phy_parents,
-			ARRAY_SIZE(rmii_phy_parents), 0,
+			ARRAY_SIZE(rmii_phy_parents), CLK_SET_RATE_NO_REPARENT,
 			SPEAR1310_RAS_CTRL_REG1, SPEAR1310_RMII_PHY_CLK_SHIFT,
 			SPEAR1310_PHY_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "stmmacphy.3", NULL);
 
 	clk = clk_register_mux(NULL, "uart1_mclk", uart_parents,
-			ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0,
-			SPEAR1310_UART1_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK,
-			0, &_lock);
+			ARRAY_SIZE(uart_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_RAS_CTRL_REG0, SPEAR1310_UART1_CLK_SHIFT,
+			SPEAR1310_RAS_UART_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "uart1_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "uart1_clk", "uart1_mclk", 0,
@@ -955,9 +962,9 @@
 	clk_register_clkdev(clk, NULL, "5c800000.serial");
 
 	clk = clk_register_mux(NULL, "uart2_mclk", uart_parents,
-			ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0,
-			SPEAR1310_UART2_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK,
-			0, &_lock);
+			ARRAY_SIZE(uart_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_RAS_CTRL_REG0, SPEAR1310_UART2_CLK_SHIFT,
+			SPEAR1310_RAS_UART_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "uart2_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "uart2_clk", "uart2_mclk", 0,
@@ -966,9 +973,9 @@
 	clk_register_clkdev(clk, NULL, "5c900000.serial");
 
 	clk = clk_register_mux(NULL, "uart3_mclk", uart_parents,
-			ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0,
-			SPEAR1310_UART3_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK,
-			0, &_lock);
+			ARRAY_SIZE(uart_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_RAS_CTRL_REG0, SPEAR1310_UART3_CLK_SHIFT,
+			SPEAR1310_RAS_UART_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "uart3_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "uart3_clk", "uart3_mclk", 0,
@@ -977,9 +984,9 @@
 	clk_register_clkdev(clk, NULL, "5ca00000.serial");
 
 	clk = clk_register_mux(NULL, "uart4_mclk", uart_parents,
-			ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0,
-			SPEAR1310_UART4_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK,
-			0, &_lock);
+			ARRAY_SIZE(uart_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_RAS_CTRL_REG0, SPEAR1310_UART4_CLK_SHIFT,
+			SPEAR1310_RAS_UART_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "uart4_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "uart4_clk", "uart4_mclk", 0,
@@ -988,9 +995,9 @@
 	clk_register_clkdev(clk, NULL, "5cb00000.serial");
 
 	clk = clk_register_mux(NULL, "uart5_mclk", uart_parents,
-			ARRAY_SIZE(uart_parents), 0, SPEAR1310_RAS_CTRL_REG0,
-			SPEAR1310_UART5_CLK_SHIFT, SPEAR1310_RAS_UART_CLK_MASK,
-			0, &_lock);
+			ARRAY_SIZE(uart_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_RAS_CTRL_REG0, SPEAR1310_UART5_CLK_SHIFT,
+			SPEAR1310_RAS_UART_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "uart5_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "uart5_clk", "uart5_mclk", 0,
@@ -999,9 +1006,9 @@
 	clk_register_clkdev(clk, NULL, "5cc00000.serial");
 
 	clk = clk_register_mux(NULL, "i2c1_mclk", i2c_parents,
-			ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0,
-			SPEAR1310_I2C1_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(i2c_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_RAS_CTRL_REG0, SPEAR1310_I2C1_CLK_SHIFT,
+			SPEAR1310_I2C_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "i2c1_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "i2c1_clk", "i2c1_mclk", 0,
@@ -1010,9 +1017,9 @@
 	clk_register_clkdev(clk, NULL, "5cd00000.i2c");
 
 	clk = clk_register_mux(NULL, "i2c2_mclk", i2c_parents,
-			ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0,
-			SPEAR1310_I2C2_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(i2c_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_RAS_CTRL_REG0, SPEAR1310_I2C2_CLK_SHIFT,
+			SPEAR1310_I2C_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "i2c2_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "i2c2_clk", "i2c2_mclk", 0,
@@ -1021,9 +1028,9 @@
 	clk_register_clkdev(clk, NULL, "5ce00000.i2c");
 
 	clk = clk_register_mux(NULL, "i2c3_mclk", i2c_parents,
-			ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0,
-			SPEAR1310_I2C3_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(i2c_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_RAS_CTRL_REG0, SPEAR1310_I2C3_CLK_SHIFT,
+			SPEAR1310_I2C_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "i2c3_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "i2c3_clk", "i2c3_mclk", 0,
@@ -1032,9 +1039,9 @@
 	clk_register_clkdev(clk, NULL, "5cf00000.i2c");
 
 	clk = clk_register_mux(NULL, "i2c4_mclk", i2c_parents,
-			ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0,
-			SPEAR1310_I2C4_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(i2c_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_RAS_CTRL_REG0, SPEAR1310_I2C4_CLK_SHIFT,
+			SPEAR1310_I2C_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "i2c4_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "i2c4_clk", "i2c4_mclk", 0,
@@ -1043,9 +1050,9 @@
 	clk_register_clkdev(clk, NULL, "5d000000.i2c");
 
 	clk = clk_register_mux(NULL, "i2c5_mclk", i2c_parents,
-			ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0,
-			SPEAR1310_I2C5_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(i2c_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_RAS_CTRL_REG0, SPEAR1310_I2C5_CLK_SHIFT,
+			SPEAR1310_I2C_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "i2c5_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "i2c5_clk", "i2c5_mclk", 0,
@@ -1054,9 +1061,9 @@
 	clk_register_clkdev(clk, NULL, "5d100000.i2c");
 
 	clk = clk_register_mux(NULL, "i2c6_mclk", i2c_parents,
-			ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0,
-			SPEAR1310_I2C6_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(i2c_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_RAS_CTRL_REG0, SPEAR1310_I2C6_CLK_SHIFT,
+			SPEAR1310_I2C_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "i2c6_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "i2c6_clk", "i2c6_mclk", 0,
@@ -1065,9 +1072,9 @@
 	clk_register_clkdev(clk, NULL, "5d200000.i2c");
 
 	clk = clk_register_mux(NULL, "i2c7_mclk", i2c_parents,
-			ARRAY_SIZE(i2c_parents), 0, SPEAR1310_RAS_CTRL_REG0,
-			SPEAR1310_I2C7_CLK_SHIFT, SPEAR1310_I2C_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(i2c_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_RAS_CTRL_REG0, SPEAR1310_I2C7_CLK_SHIFT,
+			SPEAR1310_I2C_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "i2c7_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "i2c7_clk", "i2c7_mclk", 0,
@@ -1076,9 +1083,9 @@
 	clk_register_clkdev(clk, NULL, "5d300000.i2c");
 
 	clk = clk_register_mux(NULL, "ssp1_mclk", ssp1_parents,
-			ARRAY_SIZE(ssp1_parents), 0, SPEAR1310_RAS_CTRL_REG0,
-			SPEAR1310_SSP1_CLK_SHIFT, SPEAR1310_SSP1_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(ssp1_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_RAS_CTRL_REG0, SPEAR1310_SSP1_CLK_SHIFT,
+			SPEAR1310_SSP1_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "ssp1_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "ssp1_clk", "ssp1_mclk", 0,
@@ -1087,9 +1094,9 @@
 	clk_register_clkdev(clk, NULL, "5d400000.spi");
 
 	clk = clk_register_mux(NULL, "pci_mclk", pci_parents,
-			ARRAY_SIZE(pci_parents), 0, SPEAR1310_RAS_CTRL_REG0,
-			SPEAR1310_PCI_CLK_SHIFT, SPEAR1310_PCI_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(pci_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_RAS_CTRL_REG0, SPEAR1310_PCI_CLK_SHIFT,
+			SPEAR1310_PCI_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "pci_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "pci_clk", "pci_mclk", 0,
@@ -1098,9 +1105,9 @@
 	clk_register_clkdev(clk, NULL, "pci");
 
 	clk = clk_register_mux(NULL, "tdm1_mclk", tdm_parents,
-			ARRAY_SIZE(tdm_parents), 0, SPEAR1310_RAS_CTRL_REG0,
-			SPEAR1310_TDM1_CLK_SHIFT, SPEAR1310_TDM_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(tdm_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_RAS_CTRL_REG0, SPEAR1310_TDM1_CLK_SHIFT,
+			SPEAR1310_TDM_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "tdm1_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "tdm1_clk", "tdm1_mclk", 0,
@@ -1109,9 +1116,9 @@
 	clk_register_clkdev(clk, NULL, "tdm_hdlc.0");
 
 	clk = clk_register_mux(NULL, "tdm2_mclk", tdm_parents,
-			ARRAY_SIZE(tdm_parents), 0, SPEAR1310_RAS_CTRL_REG0,
-			SPEAR1310_TDM2_CLK_SHIFT, SPEAR1310_TDM_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(tdm_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1310_RAS_CTRL_REG0, SPEAR1310_TDM2_CLK_SHIFT,
+			SPEAR1310_TDM_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "tdm2_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "tdm2_clk", "tdm2_mclk", 0,
diff --git a/drivers/clk/spear/spear1340_clock.c b/drivers/clk/spear/spear1340_clock.c
index 9d0b394..fe835c1 100644
--- a/drivers/clk/spear/spear1340_clock.c
+++ b/drivers/clk/spear/spear1340_clock.c
@@ -473,9 +473,9 @@
 	/* clock derived from 24 or 25 MHz osc clk */
 	/* vco-pll */
 	clk = clk_register_mux(NULL, "vco1_mclk", vco_parents,
-			ARRAY_SIZE(vco_parents), 0, SPEAR1340_PLL_CFG,
-			SPEAR1340_PLL1_CLK_SHIFT, SPEAR1340_PLL_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(vco_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1340_PLL_CFG, SPEAR1340_PLL1_CLK_SHIFT,
+			SPEAR1340_PLL_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "vco1_mclk", NULL);
 	clk = clk_register_vco_pll("vco1_clk", "pll1_clk", NULL, "vco1_mclk", 0,
 			SPEAR1340_PLL1_CTR, SPEAR1340_PLL1_FRQ, pll_rtbl,
@@ -484,9 +484,9 @@
 	clk_register_clkdev(clk1, "pll1_clk", NULL);
 
 	clk = clk_register_mux(NULL, "vco2_mclk", vco_parents,
-			ARRAY_SIZE(vco_parents), 0, SPEAR1340_PLL_CFG,
-			SPEAR1340_PLL2_CLK_SHIFT, SPEAR1340_PLL_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(vco_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1340_PLL_CFG, SPEAR1340_PLL2_CLK_SHIFT,
+			SPEAR1340_PLL_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "vco2_mclk", NULL);
 	clk = clk_register_vco_pll("vco2_clk", "pll2_clk", NULL, "vco2_mclk", 0,
 			SPEAR1340_PLL2_CTR, SPEAR1340_PLL2_FRQ, pll_rtbl,
@@ -495,9 +495,9 @@
 	clk_register_clkdev(clk1, "pll2_clk", NULL);
 
 	clk = clk_register_mux(NULL, "vco3_mclk", vco_parents,
-			ARRAY_SIZE(vco_parents), 0, SPEAR1340_PLL_CFG,
-			SPEAR1340_PLL3_CLK_SHIFT, SPEAR1340_PLL_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(vco_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1340_PLL_CFG, SPEAR1340_PLL3_CLK_SHIFT,
+			SPEAR1340_PLL_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "vco3_mclk", NULL);
 	clk = clk_register_vco_pll("vco3_clk", "pll3_clk", NULL, "vco3_mclk", 0,
 			SPEAR1340_PLL3_CTR, SPEAR1340_PLL3_FRQ, pll_rtbl,
@@ -561,8 +561,8 @@
 	clk_register_clkdev(clk, "amba_syn_clk", NULL);
 
 	clk = clk_register_mux(NULL, "sys_mclk", sys_parents,
-			ARRAY_SIZE(sys_parents), 0, SPEAR1340_SYS_CLK_CTRL,
-			SPEAR1340_SCLK_SRC_SEL_SHIFT,
+			ARRAY_SIZE(sys_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1340_SYS_CLK_CTRL, SPEAR1340_SCLK_SRC_SEL_SHIFT,
 			SPEAR1340_SCLK_SRC_SEL_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "sys_mclk", NULL);
 
@@ -583,8 +583,8 @@
 	clk_register_clkdev(clk, NULL, "smp_twd");
 
 	clk = clk_register_mux(NULL, "ahb_clk", ahb_parents,
-			ARRAY_SIZE(ahb_parents), 0, SPEAR1340_SYS_CLK_CTRL,
-			SPEAR1340_HCLK_SRC_SEL_SHIFT,
+			ARRAY_SIZE(ahb_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1340_SYS_CLK_CTRL, SPEAR1340_HCLK_SRC_SEL_SHIFT,
 			SPEAR1340_HCLK_SRC_SEL_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "ahb_clk", NULL);
 
@@ -594,9 +594,9 @@
 
 	/* gpt clocks */
 	clk = clk_register_mux(NULL, "gpt0_mclk", gpt_parents,
-			ARRAY_SIZE(gpt_parents), 0, SPEAR1340_PERIP_CLK_CFG,
-			SPEAR1340_GPT0_CLK_SHIFT, SPEAR1340_GPT_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(gpt_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1340_PERIP_CLK_CFG, SPEAR1340_GPT0_CLK_SHIFT,
+			SPEAR1340_GPT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "gpt0_mclk", NULL);
 	clk = clk_register_gate(NULL, "gpt0_clk", "gpt0_mclk", 0,
 			SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_GPT0_CLK_ENB, 0,
@@ -604,9 +604,9 @@
 	clk_register_clkdev(clk, NULL, "gpt0");
 
 	clk = clk_register_mux(NULL, "gpt1_mclk", gpt_parents,
-			ARRAY_SIZE(gpt_parents), 0, SPEAR1340_PERIP_CLK_CFG,
-			SPEAR1340_GPT1_CLK_SHIFT, SPEAR1340_GPT_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(gpt_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1340_PERIP_CLK_CFG, SPEAR1340_GPT1_CLK_SHIFT,
+			SPEAR1340_GPT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "gpt1_mclk", NULL);
 	clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mclk", 0,
 			SPEAR1340_PERIP1_CLK_ENB, SPEAR1340_GPT1_CLK_ENB, 0,
@@ -614,9 +614,9 @@
 	clk_register_clkdev(clk, NULL, "gpt1");
 
 	clk = clk_register_mux(NULL, "gpt2_mclk", gpt_parents,
-			ARRAY_SIZE(gpt_parents), 0, SPEAR1340_PERIP_CLK_CFG,
-			SPEAR1340_GPT2_CLK_SHIFT, SPEAR1340_GPT_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(gpt_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1340_PERIP_CLK_CFG, SPEAR1340_GPT2_CLK_SHIFT,
+			SPEAR1340_GPT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "gpt2_mclk", NULL);
 	clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mclk", 0,
 			SPEAR1340_PERIP2_CLK_ENB, SPEAR1340_GPT2_CLK_ENB, 0,
@@ -624,9 +624,9 @@
 	clk_register_clkdev(clk, NULL, "gpt2");
 
 	clk = clk_register_mux(NULL, "gpt3_mclk", gpt_parents,
-			ARRAY_SIZE(gpt_parents), 0, SPEAR1340_PERIP_CLK_CFG,
-			SPEAR1340_GPT3_CLK_SHIFT, SPEAR1340_GPT_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(gpt_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1340_PERIP_CLK_CFG, SPEAR1340_GPT3_CLK_SHIFT,
+			SPEAR1340_GPT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "gpt3_mclk", NULL);
 	clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mclk", 0,
 			SPEAR1340_PERIP2_CLK_ENB, SPEAR1340_GPT3_CLK_ENB, 0,
@@ -641,7 +641,8 @@
 	clk_register_clkdev(clk1, "uart0_syn_gclk", NULL);
 
 	clk = clk_register_mux(NULL, "uart0_mclk", uart0_parents,
-			ARRAY_SIZE(uart0_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(uart0_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			SPEAR1340_PERIP_CLK_CFG, SPEAR1340_UART0_CLK_SHIFT,
 			SPEAR1340_UART_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "uart0_mclk", NULL);
@@ -658,9 +659,9 @@
 	clk_register_clkdev(clk1, "uart1_syn_gclk", NULL);
 
 	clk = clk_register_mux(NULL, "uart1_mclk", uart1_parents,
-			ARRAY_SIZE(uart1_parents), 0, SPEAR1340_PERIP_CLK_CFG,
-			SPEAR1340_UART1_CLK_SHIFT, SPEAR1340_UART_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(uart1_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1340_PERIP_CLK_CFG, SPEAR1340_UART1_CLK_SHIFT,
+			SPEAR1340_UART_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "uart1_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "uart1_clk", "uart1_mclk", 0,
@@ -698,7 +699,8 @@
 	clk_register_clkdev(clk1, "c3_syn_gclk", NULL);
 
 	clk = clk_register_mux(NULL, "c3_mclk", c3_parents,
-			ARRAY_SIZE(c3_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(c3_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			SPEAR1340_PERIP_CLK_CFG, SPEAR1340_C3_CLK_SHIFT,
 			SPEAR1340_C3_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "c3_mclk", NULL);
@@ -710,8 +712,8 @@
 
 	/* gmac */
 	clk = clk_register_mux(NULL, "phy_input_mclk", gmac_phy_input_parents,
-			ARRAY_SIZE(gmac_phy_input_parents), 0,
-			SPEAR1340_GMAC_CLK_CFG,
+			ARRAY_SIZE(gmac_phy_input_parents),
+			CLK_SET_RATE_NO_REPARENT, SPEAR1340_GMAC_CLK_CFG,
 			SPEAR1340_GMAC_PHY_INPUT_CLK_SHIFT,
 			SPEAR1340_GMAC_PHY_INPUT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "phy_input_mclk", NULL);
@@ -723,15 +725,16 @@
 	clk_register_clkdev(clk1, "phy_syn_gclk", NULL);
 
 	clk = clk_register_mux(NULL, "phy_mclk", gmac_phy_parents,
-			ARRAY_SIZE(gmac_phy_parents), 0,
+			ARRAY_SIZE(gmac_phy_parents), CLK_SET_RATE_NO_REPARENT,
 			SPEAR1340_PERIP_CLK_CFG, SPEAR1340_GMAC_PHY_CLK_SHIFT,
 			SPEAR1340_GMAC_PHY_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "stmmacphy.0", NULL);
 
 	/* clcd */
 	clk = clk_register_mux(NULL, "clcd_syn_mclk", clcd_synth_parents,
-			ARRAY_SIZE(clcd_synth_parents), 0,
-			SPEAR1340_CLCD_CLK_SYNT, SPEAR1340_CLCD_SYNT_CLK_SHIFT,
+			ARRAY_SIZE(clcd_synth_parents),
+			CLK_SET_RATE_NO_REPARENT, SPEAR1340_CLCD_CLK_SYNT,
+			SPEAR1340_CLCD_SYNT_CLK_SHIFT,
 			SPEAR1340_CLCD_SYNT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "clcd_syn_mclk", NULL);
 
@@ -741,7 +744,8 @@
 	clk_register_clkdev(clk, "clcd_syn_clk", NULL);
 
 	clk = clk_register_mux(NULL, "clcd_pixel_mclk", clcd_pixel_parents,
-			ARRAY_SIZE(clcd_pixel_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(clcd_pixel_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			SPEAR1340_PERIP_CLK_CFG, SPEAR1340_CLCD_CLK_SHIFT,
 			SPEAR1340_CLCD_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "clcd_pixel_mclk", NULL);
@@ -753,9 +757,9 @@
 
 	/* i2s */
 	clk = clk_register_mux(NULL, "i2s_src_mclk", i2s_src_parents,
-			ARRAY_SIZE(i2s_src_parents), 0, SPEAR1340_I2S_CLK_CFG,
-			SPEAR1340_I2S_SRC_CLK_SHIFT, SPEAR1340_I2S_SRC_CLK_MASK,
-			0, &_lock);
+			ARRAY_SIZE(i2s_src_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR1340_I2S_CLK_CFG, SPEAR1340_I2S_SRC_CLK_SHIFT,
+			SPEAR1340_I2S_SRC_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "i2s_src_mclk", NULL);
 
 	clk = clk_register_aux("i2s_prs1_clk", NULL, "i2s_src_mclk",
@@ -765,7 +769,8 @@
 	clk_register_clkdev(clk, "i2s_prs1_clk", NULL);
 
 	clk = clk_register_mux(NULL, "i2s_ref_mclk", i2s_ref_parents,
-			ARRAY_SIZE(i2s_ref_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(i2s_ref_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			SPEAR1340_I2S_CLK_CFG, SPEAR1340_I2S_REF_SHIFT,
 			SPEAR1340_I2S_REF_SEL_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "i2s_ref_mclk", NULL);
@@ -891,13 +896,15 @@
 
 	/* RAS clks */
 	clk = clk_register_mux(NULL, "gen_syn0_1_mclk", gen_synth0_1_parents,
-			ARRAY_SIZE(gen_synth0_1_parents), 0, SPEAR1340_PLL_CFG,
+			ARRAY_SIZE(gen_synth0_1_parents),
+			CLK_SET_RATE_NO_REPARENT, SPEAR1340_PLL_CFG,
 			SPEAR1340_GEN_SYNT0_1_CLK_SHIFT,
 			SPEAR1340_GEN_SYNT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "gen_syn0_1_mclk", NULL);
 
 	clk = clk_register_mux(NULL, "gen_syn2_3_mclk", gen_synth2_3_parents,
-			ARRAY_SIZE(gen_synth2_3_parents), 0, SPEAR1340_PLL_CFG,
+			ARRAY_SIZE(gen_synth2_3_parents),
+			CLK_SET_RATE_NO_REPARENT, SPEAR1340_PLL_CFG,
 			SPEAR1340_GEN_SYNT2_3_CLK_SHIFT,
 			SPEAR1340_GEN_SYNT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "gen_syn2_3_mclk", NULL);
@@ -938,7 +945,8 @@
 	clk_register_clkdev(clk, NULL, "spear_cec.1");
 
 	clk = clk_register_mux(NULL, "spdif_out_mclk", spdif_out_parents,
-			ARRAY_SIZE(spdif_out_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(spdif_out_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			SPEAR1340_PERIP_CLK_CFG, SPEAR1340_SPDIF_OUT_CLK_SHIFT,
 			SPEAR1340_SPDIF_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "spdif_out_mclk", NULL);
@@ -949,7 +957,8 @@
 	clk_register_clkdev(clk, NULL, "d0000000.spdif-out");
 
 	clk = clk_register_mux(NULL, "spdif_in_mclk", spdif_in_parents,
-			ARRAY_SIZE(spdif_in_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(spdif_in_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			SPEAR1340_PERIP_CLK_CFG, SPEAR1340_SPDIF_IN_CLK_SHIFT,
 			SPEAR1340_SPDIF_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "spdif_in_mclk", NULL);
diff --git a/drivers/clk/spear/spear3xx_clock.c b/drivers/clk/spear/spear3xx_clock.c
index 080c3c5..c2d2043 100644
--- a/drivers/clk/spear/spear3xx_clock.c
+++ b/drivers/clk/spear/spear3xx_clock.c
@@ -294,7 +294,8 @@
 	clk_register_clkdev(clk, NULL, "a9400000.i2s");
 
 	clk = clk_register_mux(NULL, "i2s_ref_clk", i2s_ref_parents,
-			ARRAY_SIZE(i2s_ref_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(i2s_ref_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			SPEAR320_CONTROL_REG, I2S_REF_PCLK_SHIFT,
 			I2S_REF_PCLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "i2s_ref_clk", NULL);
@@ -313,57 +314,66 @@
 	clk_register_clkdev(clk, "hclk", "ab000000.eth");
 
 	clk = clk_register_mux(NULL, "rs485_clk", uartx_parents,
-			ARRAY_SIZE(uartx_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(uartx_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			SPEAR320_EXT_CTRL_REG, SPEAR320_RS485_PCLK_SHIFT,
 			SPEAR320_UARTX_PCLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "a9300000.serial");
 
 	clk = clk_register_mux(NULL, "sdhci_clk", sdhci_parents,
-			ARRAY_SIZE(sdhci_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(sdhci_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			SPEAR320_CONTROL_REG, SDHCI_PCLK_SHIFT, SDHCI_PCLK_MASK,
 			0, &_lock);
 	clk_register_clkdev(clk, NULL, "70000000.sdhci");
 
 	clk = clk_register_mux(NULL, "smii_pclk", smii0_parents,
-			ARRAY_SIZE(smii0_parents), 0, SPEAR320_CONTROL_REG,
-			SMII_PCLK_SHIFT, SMII_PCLK_MASK, 0, &_lock);
+			ARRAY_SIZE(smii0_parents), CLK_SET_RATE_NO_REPARENT,
+			SPEAR320_CONTROL_REG, SMII_PCLK_SHIFT, SMII_PCLK_MASK,
+			0, &_lock);
 	clk_register_clkdev(clk, NULL, "smii_pclk");
 
 	clk = clk_register_fixed_factor(NULL, "smii_clk", "smii_pclk", 0, 1, 1);
 	clk_register_clkdev(clk, NULL, "smii");
 
 	clk = clk_register_mux(NULL, "uart1_clk", uartx_parents,
-			ARRAY_SIZE(uartx_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(uartx_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			SPEAR320_CONTROL_REG, UART1_PCLK_SHIFT, UART1_PCLK_MASK,
 			0, &_lock);
 	clk_register_clkdev(clk, NULL, "a3000000.serial");
 
 	clk = clk_register_mux(NULL, "uart2_clk", uartx_parents,
-			ARRAY_SIZE(uartx_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(uartx_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			SPEAR320_EXT_CTRL_REG, SPEAR320_UART2_PCLK_SHIFT,
 			SPEAR320_UARTX_PCLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "a4000000.serial");
 
 	clk = clk_register_mux(NULL, "uart3_clk", uartx_parents,
-			ARRAY_SIZE(uartx_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(uartx_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			SPEAR320_EXT_CTRL_REG, SPEAR320_UART3_PCLK_SHIFT,
 			SPEAR320_UARTX_PCLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "a9100000.serial");
 
 	clk = clk_register_mux(NULL, "uart4_clk", uartx_parents,
-			ARRAY_SIZE(uartx_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(uartx_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			SPEAR320_EXT_CTRL_REG, SPEAR320_UART4_PCLK_SHIFT,
 			SPEAR320_UARTX_PCLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "a9200000.serial");
 
 	clk = clk_register_mux(NULL, "uart5_clk", uartx_parents,
-			ARRAY_SIZE(uartx_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(uartx_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			SPEAR320_EXT_CTRL_REG, SPEAR320_UART5_PCLK_SHIFT,
 			SPEAR320_UARTX_PCLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "60000000.serial");
 
 	clk = clk_register_mux(NULL, "uart6_clk", uartx_parents,
-			ARRAY_SIZE(uartx_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(uartx_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			SPEAR320_EXT_CTRL_REG, SPEAR320_UART6_PCLK_SHIFT,
 			SPEAR320_UARTX_PCLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "60100000.serial");
@@ -427,7 +437,8 @@
 	clk_register_clkdev(clk1, "uart_syn_gclk", NULL);
 
 	clk = clk_register_mux(NULL, "uart0_mclk", uart0_parents,
-			ARRAY_SIZE(uart0_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(uart0_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			PERIP_CLK_CFG, UART_CLK_SHIFT, UART_CLK_MASK, 0,
 			&_lock);
 	clk_register_clkdev(clk, "uart0_mclk", NULL);
@@ -444,7 +455,8 @@
 	clk_register_clkdev(clk1, "firda_syn_gclk", NULL);
 
 	clk = clk_register_mux(NULL, "firda_mclk", firda_parents,
-			ARRAY_SIZE(firda_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(firda_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			PERIP_CLK_CFG, FIRDA_CLK_SHIFT, FIRDA_CLK_MASK, 0,
 			&_lock);
 	clk_register_clkdev(clk, "firda_mclk", NULL);
@@ -458,14 +470,16 @@
 	clk_register_gpt("gpt0_syn_clk", "pll1_clk", 0, PRSC0_CLK_CFG, gpt_rtbl,
 			ARRAY_SIZE(gpt_rtbl), &_lock);
 	clk = clk_register_mux(NULL, "gpt0_clk", gpt0_parents,
-			ARRAY_SIZE(gpt0_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(gpt0_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			PERIP_CLK_CFG, GPT0_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "gpt0");
 
 	clk_register_gpt("gpt1_syn_clk", "pll1_clk", 0, PRSC1_CLK_CFG, gpt_rtbl,
 			ARRAY_SIZE(gpt_rtbl), &_lock);
 	clk = clk_register_mux(NULL, "gpt1_mclk", gpt1_parents,
-			ARRAY_SIZE(gpt1_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(gpt1_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			PERIP_CLK_CFG, GPT1_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "gpt1_mclk", NULL);
 	clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mclk",
@@ -476,7 +490,8 @@
 	clk_register_gpt("gpt2_syn_clk", "pll1_clk", 0, PRSC2_CLK_CFG, gpt_rtbl,
 			ARRAY_SIZE(gpt_rtbl), &_lock);
 	clk = clk_register_mux(NULL, "gpt2_mclk", gpt2_parents,
-			ARRAY_SIZE(gpt2_parents), CLK_SET_RATE_PARENT,
+			ARRAY_SIZE(gpt2_parents),
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
 			PERIP_CLK_CFG, GPT2_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "gpt2_mclk", NULL);
 	clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mclk",
@@ -498,9 +513,9 @@
 	clk_register_clkdev(clk1, "gen1_syn_gclk", NULL);
 
 	clk = clk_register_mux(NULL, "gen2_3_par_clk", gen2_3_parents,
-			ARRAY_SIZE(gen2_3_parents), 0, CORE_CLK_CFG,
-			GEN_SYNTH2_3_CLK_SHIFT, GEN_SYNTH2_3_CLK_MASK, 0,
-			&_lock);
+			ARRAY_SIZE(gen2_3_parents), CLK_SET_RATE_NO_REPARENT,
+			CORE_CLK_CFG, GEN_SYNTH2_3_CLK_SHIFT,
+			GEN_SYNTH2_3_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "gen2_3_par_clk", NULL);
 
 	clk = clk_register_aux("gen2_syn_clk", "gen2_syn_gclk",
@@ -540,8 +555,8 @@
 	clk_register_clkdev(clk, "ahbmult2_clk", NULL);
 
 	clk = clk_register_mux(NULL, "ddr_clk", ddr_parents,
-			ARRAY_SIZE(ddr_parents), 0, PLL_CLK_CFG, MCTR_CLK_SHIFT,
-			MCTR_CLK_MASK, 0, &_lock);
+			ARRAY_SIZE(ddr_parents), CLK_SET_RATE_NO_REPARENT,
+			PLL_CLK_CFG, MCTR_CLK_SHIFT, MCTR_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "ddr_clk", NULL);
 
 	clk = clk_register_divider(NULL, "apb_clk", "ahb_clk",
diff --git a/drivers/clk/spear/spear6xx_clock.c b/drivers/clk/spear/spear6xx_clock.c
index 9406f24..4f649c9 100644
--- a/drivers/clk/spear/spear6xx_clock.c
+++ b/drivers/clk/spear/spear6xx_clock.c
@@ -169,8 +169,9 @@
 	clk_register_clkdev(clk1, "uart_syn_gclk", NULL);
 
 	clk = clk_register_mux(NULL, "uart_mclk", uart_parents,
-			ARRAY_SIZE(uart_parents), 0, PERIP_CLK_CFG,
-			UART_CLK_SHIFT, UART_CLK_MASK, 0, &_lock);
+			ARRAY_SIZE(uart_parents), CLK_SET_RATE_NO_REPARENT,
+			PERIP_CLK_CFG, UART_CLK_SHIFT, UART_CLK_MASK, 0,
+			&_lock);
 	clk_register_clkdev(clk, "uart_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "uart0", "uart_mclk", 0, PERIP1_CLK_ENB,
@@ -188,8 +189,9 @@
 	clk_register_clkdev(clk1, "firda_syn_gclk", NULL);
 
 	clk = clk_register_mux(NULL, "firda_mclk", firda_parents,
-			ARRAY_SIZE(firda_parents), 0, PERIP_CLK_CFG,
-			FIRDA_CLK_SHIFT, FIRDA_CLK_MASK, 0, &_lock);
+			ARRAY_SIZE(firda_parents), CLK_SET_RATE_NO_REPARENT,
+			PERIP_CLK_CFG, FIRDA_CLK_SHIFT, FIRDA_CLK_MASK, 0,
+			&_lock);
 	clk_register_clkdev(clk, "firda_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "firda_clk", "firda_mclk", 0,
@@ -203,8 +205,9 @@
 	clk_register_clkdev(clk1, "clcd_syn_gclk", NULL);
 
 	clk = clk_register_mux(NULL, "clcd_mclk", clcd_parents,
-			ARRAY_SIZE(clcd_parents), 0, PERIP_CLK_CFG,
-			CLCD_CLK_SHIFT, CLCD_CLK_MASK, 0, &_lock);
+			ARRAY_SIZE(clcd_parents), CLK_SET_RATE_NO_REPARENT,
+			PERIP_CLK_CFG, CLCD_CLK_SHIFT, CLCD_CLK_MASK, 0,
+			&_lock);
 	clk_register_clkdev(clk, "clcd_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "clcd_clk", "clcd_mclk", 0,
@@ -217,13 +220,13 @@
 	clk_register_clkdev(clk, "gpt0_1_syn_clk", NULL);
 
 	clk = clk_register_mux(NULL, "gpt0_mclk", gpt0_1_parents,
-			ARRAY_SIZE(gpt0_1_parents), 0, PERIP_CLK_CFG,
-			GPT0_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock);
+			ARRAY_SIZE(gpt0_1_parents), CLK_SET_RATE_NO_REPARENT,
+			PERIP_CLK_CFG, GPT0_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, NULL, "gpt0");
 
 	clk = clk_register_mux(NULL, "gpt1_mclk", gpt0_1_parents,
-			ARRAY_SIZE(gpt0_1_parents), 0, PERIP_CLK_CFG,
-			GPT1_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock);
+			ARRAY_SIZE(gpt0_1_parents), CLK_SET_RATE_NO_REPARENT,
+			PERIP_CLK_CFG, GPT1_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "gpt1_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "gpt1_clk", "gpt1_mclk", 0,
@@ -235,8 +238,8 @@
 	clk_register_clkdev(clk, "gpt2_syn_clk", NULL);
 
 	clk = clk_register_mux(NULL, "gpt2_mclk", gpt2_parents,
-			ARRAY_SIZE(gpt2_parents), 0, PERIP_CLK_CFG,
-			GPT2_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock);
+			ARRAY_SIZE(gpt2_parents), CLK_SET_RATE_NO_REPARENT,
+			PERIP_CLK_CFG, GPT2_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "gpt2_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "gpt2_clk", "gpt2_mclk", 0,
@@ -248,8 +251,8 @@
 	clk_register_clkdev(clk, "gpt3_syn_clk", NULL);
 
 	clk = clk_register_mux(NULL, "gpt3_mclk", gpt3_parents,
-			ARRAY_SIZE(gpt3_parents), 0, PERIP_CLK_CFG,
-			GPT3_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock);
+			ARRAY_SIZE(gpt3_parents), CLK_SET_RATE_NO_REPARENT,
+			PERIP_CLK_CFG, GPT3_CLK_SHIFT, GPT_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "gpt3_mclk", NULL);
 
 	clk = clk_register_gate(NULL, "gpt3_clk", "gpt3_mclk", 0,
@@ -277,8 +280,8 @@
 	clk_register_clkdev(clk, "ahbmult2_clk", NULL);
 
 	clk = clk_register_mux(NULL, "ddr_clk", ddr_parents,
-			ARRAY_SIZE(ddr_parents), 0, PLL_CLK_CFG, MCTR_CLK_SHIFT,
-			MCTR_CLK_MASK, 0, &_lock);
+			ARRAY_SIZE(ddr_parents), CLK_SET_RATE_NO_REPARENT,
+			PLL_CLK_CFG, MCTR_CLK_SHIFT, MCTR_CLK_MASK, 0, &_lock);
 	clk_register_clkdev(clk, "ddr_clk", NULL);
 
 	clk = clk_register_divider(NULL, "apb_clk", "ahb_clk",
diff --git a/drivers/clk/sunxi/clk-sunxi.c b/drivers/clk/sunxi/clk-sunxi.c
index 412912b..34ee69f 100644
--- a/drivers/clk/sunxi/clk-sunxi.c
+++ b/drivers/clk/sunxi/clk-sunxi.c
@@ -25,12 +25,12 @@
 static DEFINE_SPINLOCK(clk_lock);
 
 /**
- * sunxi_osc_clk_setup() - Setup function for gatable oscillator
+ * sun4i_osc_clk_setup() - Setup function for gatable oscillator
  */
 
 #define SUNXI_OSC24M_GATE	0
 
-static void __init sunxi_osc_clk_setup(struct device_node *node)
+static void __init sun4i_osc_clk_setup(struct device_node *node)
 {
 	struct clk *clk;
 	struct clk_fixed_rate *fixed;
@@ -64,22 +64,23 @@
 			&gate->hw, &clk_gate_ops,
 			CLK_IS_ROOT);
 
-	if (clk) {
+	if (!IS_ERR(clk)) {
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
 		clk_register_clkdev(clk, clk_name, NULL);
 	}
 }
+CLK_OF_DECLARE(sun4i_osc, "allwinner,sun4i-osc-clk", sun4i_osc_clk_setup);
 
 
 
 /**
- * sunxi_get_pll1_factors() - calculates n, k, m, p factors for PLL1
+ * sun4i_get_pll1_factors() - calculates n, k, m, p factors for PLL1
  * PLL1 rate is calculated as follows
  * rate = (parent_rate * n * (k + 1) >> p) / (m + 1);
  * parent_rate is always 24Mhz
  */
 
-static void sunxi_get_pll1_factors(u32 *freq, u32 parent_rate,
+static void sun4i_get_pll1_factors(u32 *freq, u32 parent_rate,
 				   u8 *n, u8 *k, u8 *m, u8 *p)
 {
 	u8 div;
@@ -124,15 +125,97 @@
 	*n = div / 4;
 }
 
+/**
+ * sun6i_a31_get_pll1_factors() - calculates n, k and m factors for PLL1
+ * PLL1 rate is calculated as follows
+ * rate = parent_rate * (n + 1) * (k + 1) / (m + 1);
+ * parent_rate should always be 24MHz
+ */
+static void sun6i_a31_get_pll1_factors(u32 *freq, u32 parent_rate,
+				       u8 *n, u8 *k, u8 *m, u8 *p)
+{
+	/*
+	 * We can operate only on MHz, this will make our life easier
+	 * later.
+	 */
+	u32 freq_mhz = *freq / 1000000;
+	u32 parent_freq_mhz = parent_rate / 1000000;
 
+	/*
+	 * Round down the frequency to the closest multiple of either
+	 * 6 or 16
+	 */
+	u32 round_freq_6 = round_down(freq_mhz, 6);
+	u32 round_freq_16 = round_down(freq_mhz, 16);
+
+	if (round_freq_6 > round_freq_16)
+		freq_mhz = round_freq_6;
+	else
+		freq_mhz = round_freq_16;
+
+	*freq = freq_mhz * 1000000;
+
+	/*
+	 * If the factors pointer are null, we were just called to
+	 * round down the frequency.
+	 * Exit.
+	 */
+	if (n == NULL)
+		return;
+
+	/* If the frequency is a multiple of 32 MHz, k is always 3 */
+	if (!(freq_mhz % 32))
+		*k = 3;
+	/* If the frequency is a multiple of 9 MHz, k is always 2 */
+	else if (!(freq_mhz % 9))
+		*k = 2;
+	/* If the frequency is a multiple of 8 MHz, k is always 1 */
+	else if (!(freq_mhz % 8))
+		*k = 1;
+	/* Otherwise, we don't use the k factor */
+	else
+		*k = 0;
+
+	/*
+	 * If the frequency is a multiple of 2 but not a multiple of
+	 * 3, m is 3. This is the first time we use 6 here, yet we
+	 * will use it on several other places.
+	 * We use this number because it's the lowest frequency we can
+	 * generate (with n = 0, k = 0, m = 3), so every other frequency
+	 * somehow relates to this frequency.
+	 */
+	if ((freq_mhz % 6) == 2 || (freq_mhz % 6) == 4)
+		*m = 2;
+	/*
+	 * If the frequency is a multiple of 6MHz, but the factor is
+	 * odd, m will be 3
+	 */
+	else if ((freq_mhz / 6) & 1)
+		*m = 3;
+	/* Otherwise, we end up with m = 1 */
+	else
+		*m = 1;
+
+	/* Calculate n thanks to the above factors we already got */
+	*n = freq_mhz * (*m + 1) / ((*k + 1) * parent_freq_mhz) - 1;
+
+	/*
+	 * If n end up being outbound, and that we can still decrease
+	 * m, do it.
+	 */
+	if ((*n + 1) > 31 && (*m + 1) > 1) {
+		*n = (*n + 1) / 2 - 1;
+		*m = (*m + 1) / 2 - 1;
+	}
+}
 
 /**
- * sunxi_get_apb1_factors() - calculates m, p factors for APB1
+ * sun4i_get_apb1_factors() - calculates m, p factors for APB1
  * APB1 rate is calculated as follows
  * rate = (parent_rate >> p) / (m + 1);
  */
 
-static void sunxi_get_apb1_factors(u32 *freq, u32 parent_rate,
+static void sun4i_get_apb1_factors(u32 *freq, u32 parent_rate,
 				   u8 *n, u8 *k, u8 *m, u8 *p)
 {
 	u8 calcm, calcp;
@@ -178,7 +261,7 @@
 	void (*getter) (u32 *rate, u32 parent_rate, u8 *n, u8 *k, u8 *m, u8 *p);
 };
 
-static struct clk_factors_config pll1_config = {
+static struct clk_factors_config sun4i_pll1_config = {
 	.nshift = 8,
 	.nwidth = 5,
 	.kshift = 4,
@@ -189,21 +272,35 @@
 	.pwidth = 2,
 };
 
-static struct clk_factors_config apb1_config = {
+static struct clk_factors_config sun6i_a31_pll1_config = {
+	.nshift	= 8,
+	.nwidth = 5,
+	.kshift = 4,
+	.kwidth = 2,
+	.mshift = 0,
+	.mwidth = 2,
+};
+
+static struct clk_factors_config sun4i_apb1_config = {
 	.mshift = 0,
 	.mwidth = 5,
 	.pshift = 16,
 	.pwidth = 2,
 };
 
-static const __initconst struct factors_data pll1_data = {
-	.table = &pll1_config,
-	.getter = sunxi_get_pll1_factors,
+static const struct factors_data sun4i_pll1_data __initconst = {
+	.table = &sun4i_pll1_config,
+	.getter = sun4i_get_pll1_factors,
 };
 
-static const __initconst struct factors_data apb1_data = {
-	.table = &apb1_config,
-	.getter = sunxi_get_apb1_factors,
+static const struct factors_data sun6i_a31_pll1_data __initconst = {
+	.table = &sun6i_a31_pll1_config,
+	.getter = sun6i_a31_get_pll1_factors,
+};
+
+static const struct factors_data sun4i_apb1_data __initconst = {
+	.table = &sun4i_apb1_config,
+	.getter = sun4i_get_apb1_factors,
 };
 
 static void __init sunxi_factors_clk_setup(struct device_node *node,
@@ -221,7 +318,7 @@
 	clk = clk_register_factors(NULL, clk_name, parent, 0, reg,
 				   data->table, data->getter, &clk_lock);
 
-	if (clk) {
+	if (!IS_ERR(clk)) {
 		of_clk_add_provider(node, of_clk_src_simple_get, clk);
 		clk_register_clkdev(clk, clk_name, NULL);
 	}
@@ -239,11 +336,15 @@
 	u8 shift;
 };
 
-static const __initconst struct mux_data cpu_mux_data = {
+static const struct mux_data sun4i_cpu_mux_data __initconst = {
 	.shift = 16,
 };
 
-static const __initconst struct mux_data apb1_mux_data = {
+static const struct mux_data sun6i_a31_ahb1_mux_data __initconst = {
+	.shift = 12,
+};
+
+static const struct mux_data sun4i_apb1_mux_data __initconst = {
 	.shift = 24,
 };
 
@@ -261,7 +362,8 @@
 	while (i < 5 && (parents[i] = of_clk_get_parent_name(node, i)) != NULL)
 		i++;
 
-	clk = clk_register_mux(NULL, clk_name, parents, i, 0, reg,
+	clk = clk_register_mux(NULL, clk_name, parents, i,
+			       CLK_SET_RATE_NO_REPARENT, reg,
 			       data->shift, SUNXI_MUX_GATE_WIDTH,
 			       0, &clk_lock);
 
@@ -277,26 +379,34 @@
  * sunxi_divider_clk_setup() - Setup function for simple divider clocks
  */
 
-#define SUNXI_DIVISOR_WIDTH	2
-
 struct div_data {
-	u8 shift;
-	u8 pow;
+	u8	shift;
+	u8	pow;
+	u8	width;
 };
 
-static const __initconst struct div_data axi_data = {
-	.shift = 0,
-	.pow = 0,
+static const struct div_data sun4i_axi_data __initconst = {
+	.shift	= 0,
+	.pow	= 0,
+	.width	= 2,
 };
 
-static const __initconst struct div_data ahb_data = {
-	.shift = 4,
-	.pow = 1,
+static const struct div_data sun4i_ahb_data __initconst = {
+	.shift	= 4,
+	.pow	= 1,
+	.width	= 2,
 };
 
-static const __initconst struct div_data apb0_data = {
-	.shift = 8,
-	.pow = 1,
+static const struct div_data sun4i_apb0_data __initconst = {
+	.shift	= 8,
+	.pow	= 1,
+	.width	= 2,
+};
+
+static const struct div_data sun6i_a31_apb2_div_data __initconst = {
+	.shift	= 0,
+	.pow	= 0,
+	.width	= 4,
 };
 
 static void __init sunxi_divider_clk_setup(struct device_node *node,
@@ -312,7 +422,7 @@
 	clk_parent = of_clk_get_parent_name(node, 0);
 
 	clk = clk_register_divider(NULL, clk_name, clk_parent, 0,
-				   reg, data->shift, SUNXI_DIVISOR_WIDTH,
+				   reg, data->shift, data->width,
 				   data->pow ? CLK_DIVIDER_POWER_OF_TWO : 0,
 				   &clk_lock);
 	if (clk) {
@@ -333,34 +443,70 @@
 	DECLARE_BITMAP(mask, SUNXI_GATES_MAX_SIZE);
 };
 
-static const __initconst struct gates_data sun4i_axi_gates_data = {
+static const struct gates_data sun4i_axi_gates_data __initconst = {
 	.mask = {1},
 };
 
-static const __initconst struct gates_data sun4i_ahb_gates_data = {
+static const struct gates_data sun4i_ahb_gates_data __initconst = {
 	.mask = {0x7F77FFF, 0x14FB3F},
 };
 
-static const __initconst struct gates_data sun5i_a13_ahb_gates_data = {
+static const struct gates_data sun5i_a10s_ahb_gates_data __initconst = {
+	.mask = {0x147667e7, 0x185915},
+};
+
+static const struct gates_data sun5i_a13_ahb_gates_data __initconst = {
 	.mask = {0x107067e7, 0x185111},
 };
 
-static const __initconst struct gates_data sun4i_apb0_gates_data = {
+static const struct gates_data sun6i_a31_ahb1_gates_data __initconst = {
+	.mask = {0xEDFE7F62, 0x794F931},
+};
+
+static const struct gates_data sun7i_a20_ahb_gates_data __initconst = {
+	.mask = { 0x12f77fff, 0x16ff3f },
+};
+
+static const struct gates_data sun4i_apb0_gates_data __initconst = {
 	.mask = {0x4EF},
 };
 
-static const __initconst struct gates_data sun5i_a13_apb0_gates_data = {
+static const struct gates_data sun5i_a10s_apb0_gates_data __initconst = {
+	.mask = {0x469},
+};
+
+static const struct gates_data sun5i_a13_apb0_gates_data __initconst = {
 	.mask = {0x61},
 };
 
-static const __initconst struct gates_data sun4i_apb1_gates_data = {
+static const struct gates_data sun7i_a20_apb0_gates_data __initconst = {
+	.mask = { 0x4ff },
+};
+
+static const struct gates_data sun4i_apb1_gates_data __initconst = {
 	.mask = {0xFF00F7},
 };
 
-static const __initconst struct gates_data sun5i_a13_apb1_gates_data = {
+static const struct gates_data sun5i_a10s_apb1_gates_data __initconst = {
+	.mask = {0xf0007},
+};
+
+static const struct gates_data sun5i_a13_apb1_gates_data __initconst = {
 	.mask = {0xa0007},
 };
 
+static const struct gates_data sun6i_a31_apb1_gates_data __initconst = {
+	.mask = {0x3031},
+};
+
+static const struct gates_data sun6i_a31_apb2_gates_data __initconst = {
+	.mask = {0x3F000F},
+};
+
+static const struct gates_data sun7i_a20_apb1_gates_data __initconst = {
+	.mask = { 0xff80ff },
+};
+
 static void __init sunxi_gates_clk_setup(struct device_node *node,
 					 struct gates_data *data)
 {
@@ -410,43 +556,49 @@
 	of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
 }
 
-/* Matches for of_clk_init */
-static const __initconst struct of_device_id clk_match[] = {
-	{.compatible = "allwinner,sun4i-osc-clk", .data = sunxi_osc_clk_setup,},
-	{}
-};
-
 /* Matches for factors clocks */
-static const __initconst struct of_device_id clk_factors_match[] = {
-	{.compatible = "allwinner,sun4i-pll1-clk", .data = &pll1_data,},
-	{.compatible = "allwinner,sun4i-apb1-clk", .data = &apb1_data,},
+static const struct of_device_id clk_factors_match[] __initconst = {
+	{.compatible = "allwinner,sun4i-pll1-clk", .data = &sun4i_pll1_data,},
+	{.compatible = "allwinner,sun6i-a31-pll1-clk", .data = &sun6i_a31_pll1_data,},
+	{.compatible = "allwinner,sun4i-apb1-clk", .data = &sun4i_apb1_data,},
 	{}
 };
 
 /* Matches for divider clocks */
-static const __initconst struct of_device_id clk_div_match[] = {
-	{.compatible = "allwinner,sun4i-axi-clk", .data = &axi_data,},
-	{.compatible = "allwinner,sun4i-ahb-clk", .data = &ahb_data,},
-	{.compatible = "allwinner,sun4i-apb0-clk", .data = &apb0_data,},
+static const struct of_device_id clk_div_match[] __initconst = {
+	{.compatible = "allwinner,sun4i-axi-clk", .data = &sun4i_axi_data,},
+	{.compatible = "allwinner,sun4i-ahb-clk", .data = &sun4i_ahb_data,},
+	{.compatible = "allwinner,sun4i-apb0-clk", .data = &sun4i_apb0_data,},
+	{.compatible = "allwinner,sun6i-a31-apb2-div-clk", .data = &sun6i_a31_apb2_div_data,},
 	{}
 };
 
 /* Matches for mux clocks */
-static const __initconst struct of_device_id clk_mux_match[] = {
-	{.compatible = "allwinner,sun4i-cpu-clk", .data = &cpu_mux_data,},
-	{.compatible = "allwinner,sun4i-apb1-mux-clk", .data = &apb1_mux_data,},
+static const struct of_device_id clk_mux_match[] __initconst = {
+	{.compatible = "allwinner,sun4i-cpu-clk", .data = &sun4i_cpu_mux_data,},
+	{.compatible = "allwinner,sun4i-apb1-mux-clk", .data = &sun4i_apb1_mux_data,},
+	{.compatible = "allwinner,sun6i-a31-ahb1-mux-clk", .data = &sun6i_a31_ahb1_mux_data,},
 	{}
 };
 
 /* Matches for gate clocks */
-static const __initconst struct of_device_id clk_gates_match[] = {
+static const struct of_device_id clk_gates_match[] __initconst = {
 	{.compatible = "allwinner,sun4i-axi-gates-clk", .data = &sun4i_axi_gates_data,},
 	{.compatible = "allwinner,sun4i-ahb-gates-clk", .data = &sun4i_ahb_gates_data,},
+	{.compatible = "allwinner,sun5i-a10s-ahb-gates-clk", .data = &sun5i_a10s_ahb_gates_data,},
 	{.compatible = "allwinner,sun5i-a13-ahb-gates-clk", .data = &sun5i_a13_ahb_gates_data,},
+	{.compatible = "allwinner,sun6i-a31-ahb1-gates-clk", .data = &sun6i_a31_ahb1_gates_data,},
+	{.compatible = "allwinner,sun7i-a20-ahb-gates-clk", .data = &sun7i_a20_ahb_gates_data,},
 	{.compatible = "allwinner,sun4i-apb0-gates-clk", .data = &sun4i_apb0_gates_data,},
+	{.compatible = "allwinner,sun5i-a10s-apb0-gates-clk", .data = &sun5i_a10s_apb0_gates_data,},
 	{.compatible = "allwinner,sun5i-a13-apb0-gates-clk", .data = &sun5i_a13_apb0_gates_data,},
+	{.compatible = "allwinner,sun7i-a20-apb0-gates-clk", .data = &sun7i_a20_apb0_gates_data,},
 	{.compatible = "allwinner,sun4i-apb1-gates-clk", .data = &sun4i_apb1_gates_data,},
+	{.compatible = "allwinner,sun5i-a10s-apb1-gates-clk", .data = &sun5i_a10s_apb1_gates_data,},
 	{.compatible = "allwinner,sun5i-a13-apb1-gates-clk", .data = &sun5i_a13_apb1_gates_data,},
+	{.compatible = "allwinner,sun6i-a31-apb1-gates-clk", .data = &sun6i_a31_apb1_gates_data,},
+	{.compatible = "allwinner,sun7i-a20-apb1-gates-clk", .data = &sun7i_a20_apb1_gates_data,},
+	{.compatible = "allwinner,sun6i-a31-apb2-gates-clk", .data = &sun6i_a31_apb2_gates_data,},
 	{}
 };
 
@@ -467,8 +619,8 @@
 
 void __init sunxi_init_clocks(void)
 {
-	/* Register all the simple sunxi clocks on DT */
-	of_clk_init(clk_match);
+	/* Register all the simple and basic clocks on DT */
+	of_clk_init(NULL);
 
 	/* Register factor clocks */
 	of_sunxi_table_clock_setup(clk_factors_match, sunxi_factors_clk_setup);
diff --git a/drivers/clk/tegra/clk-tegra114.c b/drivers/clk/tegra/clk-tegra114.c
index 806d803..9467da7 100644
--- a/drivers/clk/tegra/clk-tegra114.c
+++ b/drivers/clk/tegra/clk-tegra114.c
@@ -1566,7 +1566,8 @@
 
 	/* audio0 */
 	clk = clk_register_mux(NULL, "audio0_mux", mux_audio_sync_clk,
-			       ARRAY_SIZE(mux_audio_sync_clk), 0,
+			       ARRAY_SIZE(mux_audio_sync_clk),
+			       CLK_SET_RATE_NO_REPARENT,
 			       clk_base + AUDIO_SYNC_CLK_I2S0, 0, 3, 0,
 			       NULL);
 	clks[audio0_mux] = clk;
@@ -1578,7 +1579,8 @@
 
 	/* audio1 */
 	clk = clk_register_mux(NULL, "audio1_mux", mux_audio_sync_clk,
-			       ARRAY_SIZE(mux_audio_sync_clk), 0,
+			       ARRAY_SIZE(mux_audio_sync_clk),
+			       CLK_SET_RATE_NO_REPARENT,
 			       clk_base + AUDIO_SYNC_CLK_I2S1, 0, 3, 0,
 			       NULL);
 	clks[audio1_mux] = clk;
@@ -1590,7 +1592,8 @@
 
 	/* audio2 */
 	clk = clk_register_mux(NULL, "audio2_mux", mux_audio_sync_clk,
-			       ARRAY_SIZE(mux_audio_sync_clk), 0,
+			       ARRAY_SIZE(mux_audio_sync_clk),
+			       CLK_SET_RATE_NO_REPARENT,
 			       clk_base + AUDIO_SYNC_CLK_I2S2, 0, 3, 0,
 			       NULL);
 	clks[audio2_mux] = clk;
@@ -1602,7 +1605,8 @@
 
 	/* audio3 */
 	clk = clk_register_mux(NULL, "audio3_mux", mux_audio_sync_clk,
-			       ARRAY_SIZE(mux_audio_sync_clk), 0,
+			       ARRAY_SIZE(mux_audio_sync_clk),
+			       CLK_SET_RATE_NO_REPARENT,
 			       clk_base + AUDIO_SYNC_CLK_I2S3, 0, 3, 0,
 			       NULL);
 	clks[audio3_mux] = clk;
@@ -1614,7 +1618,8 @@
 
 	/* audio4 */
 	clk = clk_register_mux(NULL, "audio4_mux", mux_audio_sync_clk,
-			       ARRAY_SIZE(mux_audio_sync_clk), 0,
+			       ARRAY_SIZE(mux_audio_sync_clk),
+			       CLK_SET_RATE_NO_REPARENT,
 			       clk_base + AUDIO_SYNC_CLK_I2S4, 0, 3, 0,
 			       NULL);
 	clks[audio4_mux] = clk;
@@ -1626,7 +1631,8 @@
 
 	/* spdif */
 	clk = clk_register_mux(NULL, "spdif_mux", mux_audio_sync_clk,
-			       ARRAY_SIZE(mux_audio_sync_clk), 0,
+			       ARRAY_SIZE(mux_audio_sync_clk),
+			       CLK_SET_RATE_NO_REPARENT,
 			       clk_base + AUDIO_SYNC_CLK_SPDIF, 0, 3, 0,
 			       NULL);
 	clks[spdif_mux] = clk;
@@ -1721,7 +1727,8 @@
 
 	/* clk_out_1 */
 	clk = clk_register_mux(NULL, "clk_out_1_mux", clk_out1_parents,
-			       ARRAY_SIZE(clk_out1_parents), 0,
+			       ARRAY_SIZE(clk_out1_parents),
+			       CLK_SET_RATE_NO_REPARENT,
 			       pmc_base + PMC_CLK_OUT_CNTRL, 6, 3, 0,
 			       &clk_out_lock);
 	clks[clk_out_1_mux] = clk;
@@ -1733,7 +1740,8 @@
 
 	/* clk_out_2 */
 	clk = clk_register_mux(NULL, "clk_out_2_mux", clk_out2_parents,
-			       ARRAY_SIZE(clk_out2_parents), 0,
+			       ARRAY_SIZE(clk_out2_parents),
+			       CLK_SET_RATE_NO_REPARENT,
 			       pmc_base + PMC_CLK_OUT_CNTRL, 14, 3, 0,
 			       &clk_out_lock);
 	clks[clk_out_2_mux] = clk;
@@ -1745,7 +1753,8 @@
 
 	/* clk_out_3 */
 	clk = clk_register_mux(NULL, "clk_out_3_mux", clk_out3_parents,
-			       ARRAY_SIZE(clk_out3_parents), 0,
+			       ARRAY_SIZE(clk_out3_parents),
+			       CLK_SET_RATE_NO_REPARENT,
 			       pmc_base + PMC_CLK_OUT_CNTRL, 22, 3, 0,
 			       &clk_out_lock);
 	clks[clk_out_3_mux] = clk;
@@ -2063,7 +2072,8 @@
 
 	/* dsia */
 	clk = clk_register_mux(NULL, "dsia_mux", mux_plld_out0_plld2_out0,
-			       ARRAY_SIZE(mux_plld_out0_plld2_out0), 0,
+			       ARRAY_SIZE(mux_plld_out0_plld2_out0),
+			       CLK_SET_RATE_NO_REPARENT,
 			       clk_base + PLLD_BASE, 25, 1, 0, &pll_d_lock);
 	clks[dsia_mux] = clk;
 	clk = tegra_clk_register_periph_gate("dsia", "dsia_mux", 0, clk_base,
@@ -2073,7 +2083,8 @@
 
 	/* dsib */
 	clk = clk_register_mux(NULL, "dsib_mux", mux_plld_out0_plld2_out0,
-			       ARRAY_SIZE(mux_plld_out0_plld2_out0), 0,
+			       ARRAY_SIZE(mux_plld_out0_plld2_out0),
+			       CLK_SET_RATE_NO_REPARENT,
 			       clk_base + PLLD2_BASE, 25, 1, 0, &pll_d2_lock);
 	clks[dsib_mux] = clk;
 	clk = tegra_clk_register_periph_gate("dsib", "dsib_mux", 0, clk_base,
@@ -2110,7 +2121,8 @@
 
 	/* emc */
 	clk = clk_register_mux(NULL, "emc_mux", mux_pllmcp_clkm,
-			       ARRAY_SIZE(mux_pllmcp_clkm), 0,
+			       ARRAY_SIZE(mux_pllmcp_clkm),
+			       CLK_SET_RATE_NO_REPARENT,
 			       clk_base + CLK_SOURCE_EMC,
 			       29, 3, 0, NULL);
 	clk = tegra_clk_register_periph_gate("emc", "emc_mux", 0, clk_base,
@@ -2194,7 +2206,7 @@
  * dfll_soc/dfll_ref apparently must be kept enabled, otherwise I2C5
  * breaks
  */
-static __initdata struct tegra_clk_init_table init_table[] = {
+static struct tegra_clk_init_table init_table[] __initdata = {
 	{uarta, pll_p, 408000000, 0},
 	{uartb, pll_p, 408000000, 0},
 	{uartc, pll_p, 408000000, 0},
diff --git a/drivers/clk/tegra/clk-tegra20.c b/drivers/clk/tegra/clk-tegra20.c
index 759ca47..056f649 100644
--- a/drivers/clk/tegra/clk-tegra20.c
+++ b/drivers/clk/tegra/clk-tegra20.c
@@ -778,7 +778,8 @@
 
 	/* audio */
 	clk = clk_register_mux(NULL, "audio_mux", audio_parents,
-				ARRAY_SIZE(audio_parents), 0,
+				ARRAY_SIZE(audio_parents),
+				CLK_SET_RATE_NO_REPARENT,
 				clk_base + AUDIO_SYNC_CLK, 0, 3, 0, NULL);
 	clk = clk_register_gate(NULL, "audio", "audio_mux", 0,
 				clk_base + AUDIO_SYNC_CLK, 4,
@@ -941,7 +942,8 @@
 
 	/* emc */
 	clk = clk_register_mux(NULL, "emc_mux", mux_pllmcp_clkm,
-			       ARRAY_SIZE(mux_pllmcp_clkm), 0,
+			       ARRAY_SIZE(mux_pllmcp_clkm),
+			       CLK_SET_RATE_NO_REPARENT,
 			       clk_base + CLK_SOURCE_EMC,
 			       30, 2, 0, NULL);
 	clk = tegra_clk_register_periph_gate("emc", "emc_mux", 0, clk_base, 0,
@@ -1223,7 +1225,7 @@
 #endif
 };
 
-static __initdata struct tegra_clk_init_table init_table[] = {
+static struct tegra_clk_init_table init_table[] __initdata = {
 	{pll_p, clk_max, 216000000, 1},
 	{pll_p_out1, clk_max, 28800000, 1},
 	{pll_p_out2, clk_max, 48000000, 1},
diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c
index e2c6ca0..dbe7c80 100644
--- a/drivers/clk/tegra/clk-tegra30.c
+++ b/drivers/clk/tegra/clk-tegra30.c
@@ -971,7 +971,7 @@
 	/* PLLU */
 	clk = tegra_clk_register_pll("pll_u", "pll_ref", clk_base, pmc_base, 0,
 			    0, &pll_u_params, TEGRA_PLLU | TEGRA_PLL_HAS_CPCON |
-			    TEGRA_PLL_SET_LFCON | TEGRA_PLL_USE_LOCK,
+			    TEGRA_PLL_SET_LFCON,
 			    pll_u_freq_table,
 			    NULL);
 	clk_register_clkdev(clk, "pll_u", NULL);
@@ -1026,7 +1026,8 @@
 
 	/* PLLE */
 	clk = clk_register_mux(NULL, "pll_e_mux", pll_e_parents,
-			       ARRAY_SIZE(pll_e_parents), 0,
+			       ARRAY_SIZE(pll_e_parents),
+			       CLK_SET_RATE_NO_REPARENT,
 			       clk_base + PLLE_AUX, 2, 1, 0, NULL);
 	clk = tegra_clk_register_plle("pll_e", "pll_e_mux", clk_base, pmc_base,
 			     CLK_GET_RATE_NOCACHE, 100000000, &pll_e_params,
@@ -1086,7 +1087,8 @@
 
 	/* audio0 */
 	clk = clk_register_mux(NULL, "audio0_mux", mux_audio_sync_clk,
-				ARRAY_SIZE(mux_audio_sync_clk), 0,
+				ARRAY_SIZE(mux_audio_sync_clk),
+				CLK_SET_RATE_NO_REPARENT,
 				clk_base + AUDIO_SYNC_CLK_I2S0, 0, 3, 0, NULL);
 	clk = clk_register_gate(NULL, "audio0", "audio0_mux", 0,
 				clk_base + AUDIO_SYNC_CLK_I2S0, 4,
@@ -1096,7 +1098,8 @@
 
 	/* audio1 */
 	clk = clk_register_mux(NULL, "audio1_mux", mux_audio_sync_clk,
-				ARRAY_SIZE(mux_audio_sync_clk), 0,
+				ARRAY_SIZE(mux_audio_sync_clk),
+				CLK_SET_RATE_NO_REPARENT,
 				clk_base + AUDIO_SYNC_CLK_I2S1, 0, 3, 0, NULL);
 	clk = clk_register_gate(NULL, "audio1", "audio1_mux", 0,
 				clk_base + AUDIO_SYNC_CLK_I2S1, 4,
@@ -1106,7 +1109,8 @@
 
 	/* audio2 */
 	clk = clk_register_mux(NULL, "audio2_mux", mux_audio_sync_clk,
-				ARRAY_SIZE(mux_audio_sync_clk), 0,
+				ARRAY_SIZE(mux_audio_sync_clk),
+				CLK_SET_RATE_NO_REPARENT,
 				clk_base + AUDIO_SYNC_CLK_I2S2, 0, 3, 0, NULL);
 	clk = clk_register_gate(NULL, "audio2", "audio2_mux", 0,
 				clk_base + AUDIO_SYNC_CLK_I2S2, 4,
@@ -1116,7 +1120,8 @@
 
 	/* audio3 */
 	clk = clk_register_mux(NULL, "audio3_mux", mux_audio_sync_clk,
-				ARRAY_SIZE(mux_audio_sync_clk), 0,
+				ARRAY_SIZE(mux_audio_sync_clk),
+				CLK_SET_RATE_NO_REPARENT,
 				clk_base + AUDIO_SYNC_CLK_I2S3, 0, 3, 0, NULL);
 	clk = clk_register_gate(NULL, "audio3", "audio3_mux", 0,
 				clk_base + AUDIO_SYNC_CLK_I2S3, 4,
@@ -1126,7 +1131,8 @@
 
 	/* audio4 */
 	clk = clk_register_mux(NULL, "audio4_mux", mux_audio_sync_clk,
-				ARRAY_SIZE(mux_audio_sync_clk), 0,
+				ARRAY_SIZE(mux_audio_sync_clk),
+				CLK_SET_RATE_NO_REPARENT,
 				clk_base + AUDIO_SYNC_CLK_I2S4, 0, 3, 0, NULL);
 	clk = clk_register_gate(NULL, "audio4", "audio4_mux", 0,
 				clk_base + AUDIO_SYNC_CLK_I2S4, 4,
@@ -1136,7 +1142,8 @@
 
 	/* spdif */
 	clk = clk_register_mux(NULL, "spdif_mux", mux_audio_sync_clk,
-				ARRAY_SIZE(mux_audio_sync_clk), 0,
+				ARRAY_SIZE(mux_audio_sync_clk),
+				CLK_SET_RATE_NO_REPARENT,
 				clk_base + AUDIO_SYNC_CLK_SPDIF, 0, 3, 0, NULL);
 	clk = clk_register_gate(NULL, "spdif", "spdif_mux", 0,
 				clk_base + AUDIO_SYNC_CLK_SPDIF, 4,
@@ -1229,7 +1236,8 @@
 
 	/* clk_out_1 */
 	clk = clk_register_mux(NULL, "clk_out_1_mux", clk_out1_parents,
-			       ARRAY_SIZE(clk_out1_parents), 0,
+			       ARRAY_SIZE(clk_out1_parents),
+			       CLK_SET_RATE_NO_REPARENT,
 			       pmc_base + PMC_CLK_OUT_CNTRL, 6, 3, 0,
 			       &clk_out_lock);
 	clks[clk_out_1_mux] = clk;
@@ -1241,7 +1249,8 @@
 
 	/* clk_out_2 */
 	clk = clk_register_mux(NULL, "clk_out_2_mux", clk_out2_parents,
-			       ARRAY_SIZE(clk_out2_parents), 0,
+			       ARRAY_SIZE(clk_out2_parents),
+			       CLK_SET_RATE_NO_REPARENT,
 			       pmc_base + PMC_CLK_OUT_CNTRL, 14, 3, 0,
 			       &clk_out_lock);
 	clk = clk_register_gate(NULL, "clk_out_2", "clk_out_2_mux", 0,
@@ -1252,7 +1261,8 @@
 
 	/* clk_out_3 */
 	clk = clk_register_mux(NULL, "clk_out_3_mux", clk_out3_parents,
-			       ARRAY_SIZE(clk_out3_parents), 0,
+			       ARRAY_SIZE(clk_out3_parents),
+			       CLK_SET_RATE_NO_REPARENT,
 			       pmc_base + PMC_CLK_OUT_CNTRL, 22, 3, 0,
 			       &clk_out_lock);
 	clk = clk_register_gate(NULL, "clk_out_3", "clk_out_3_mux", 0,
@@ -1679,7 +1689,8 @@
 
 	/* emc */
 	clk = clk_register_mux(NULL, "emc_mux", mux_pllmcp_clkm,
-			       ARRAY_SIZE(mux_pllmcp_clkm), 0,
+			       ARRAY_SIZE(mux_pllmcp_clkm),
+			       CLK_SET_RATE_NO_REPARENT,
 			       clk_base + CLK_SOURCE_EMC,
 			       30, 2, 0, NULL);
 	clk = tegra_clk_register_periph_gate("emc", "emc_mux", 0, clk_base, 0,
@@ -1901,7 +1912,7 @@
 #endif
 };
 
-static __initdata struct tegra_clk_init_table init_table[] = {
+static struct tegra_clk_init_table init_table[] __initdata = {
 	{uarta, pll_p, 408000000, 0},
 	{uartb, pll_p, 408000000, 0},
 	{uartc, pll_p, 408000000, 0},
diff --git a/drivers/clk/versatile/clk-vexpress.c b/drivers/clk/versatile/clk-vexpress.c
index a4a728d..2d5e1b4 100644
--- a/drivers/clk/versatile/clk-vexpress.c
+++ b/drivers/clk/versatile/clk-vexpress.c
@@ -37,8 +37,8 @@
 		snprintf(name, ARRAY_SIZE(name), "timerclken%d", i);
 
 		vexpress_sp810_timerclken[i] = clk_register_mux(NULL, name,
-				parents, 2, 0, base + SCCTRL,
-				SCCTRL_TIMERENnSEL_SHIFT(i), 1,
+				parents, 2, CLK_SET_RATE_NO_REPARENT,
+				base + SCCTRL, SCCTRL_TIMERENnSEL_SHIFT(i), 1,
 				0, &vexpress_sp810_lock);
 
 		if (WARN_ON(IS_ERR(vexpress_sp810_timerclken[i])))
diff --git a/drivers/clk/zynq/clkc.c b/drivers/clk/zynq/clkc.c
index 089d3e3..cc40fe6 100644
--- a/drivers/clk/zynq/clkc.c
+++ b/drivers/clk/zynq/clkc.c
@@ -125,8 +125,9 @@
 	div0_name = kasprintf(GFP_KERNEL, "%s_div0", clk_name);
 	div1_name = kasprintf(GFP_KERNEL, "%s_div1", clk_name);
 
-	clk = clk_register_mux(NULL, mux_name, parents, 4, 0,
-			fclk_ctrl_reg, 4, 2, 0, fclk_lock);
+	clk = clk_register_mux(NULL, mux_name, parents, 4,
+			CLK_SET_RATE_NO_REPARENT, fclk_ctrl_reg, 4, 2, 0,
+			fclk_lock);
 
 	clk = clk_register_divider(NULL, div0_name, mux_name,
 			0, fclk_ctrl_reg, 8, 6, CLK_DIVIDER_ONE_BASED |
@@ -168,8 +169,8 @@
 	mux_name = kasprintf(GFP_KERNEL, "%s_mux", clk_name0);
 	div_name = kasprintf(GFP_KERNEL, "%s_div", clk_name0);
 
-	clk = clk_register_mux(NULL, mux_name, parents, 4, 0,
-			clk_ctrl, 4, 2, 0, lock);
+	clk = clk_register_mux(NULL, mux_name, parents, 4,
+			CLK_SET_RATE_NO_REPARENT, clk_ctrl, 4, 2, 0, lock);
 
 	clk = clk_register_divider(NULL, div_name, mux_name, 0, clk_ctrl, 8, 6,
 			CLK_DIVIDER_ONE_BASED | CLK_DIVIDER_ALLOW_ZERO, lock);
@@ -236,25 +237,26 @@
 	clk = clk_register_zynq_pll("armpll_int", "ps_clk", SLCR_ARMPLL_CTRL,
 			SLCR_PLL_STATUS, 0, &armpll_lock);
 	clks[armpll] = clk_register_mux(NULL, clk_output_name[armpll],
-			armpll_parents, 2, 0, SLCR_ARMPLL_CTRL, 4, 1, 0,
-			&armpll_lock);
+			armpll_parents, 2, CLK_SET_RATE_NO_REPARENT,
+			SLCR_ARMPLL_CTRL, 4, 1, 0, &armpll_lock);
 
 	clk = clk_register_zynq_pll("ddrpll_int", "ps_clk", SLCR_DDRPLL_CTRL,
 			SLCR_PLL_STATUS, 1, &ddrpll_lock);
 	clks[ddrpll] = clk_register_mux(NULL, clk_output_name[ddrpll],
-			ddrpll_parents, 2, 0, SLCR_DDRPLL_CTRL, 4, 1, 0,
-			&ddrpll_lock);
+			ddrpll_parents, 2, CLK_SET_RATE_NO_REPARENT,
+			SLCR_DDRPLL_CTRL, 4, 1, 0, &ddrpll_lock);
 
 	clk = clk_register_zynq_pll("iopll_int", "ps_clk", SLCR_IOPLL_CTRL,
 			SLCR_PLL_STATUS, 2, &iopll_lock);
 	clks[iopll] = clk_register_mux(NULL, clk_output_name[iopll],
-			iopll_parents, 2, 0, SLCR_IOPLL_CTRL, 4, 1, 0,
-			&iopll_lock);
+			iopll_parents, 2, CLK_SET_RATE_NO_REPARENT,
+			SLCR_IOPLL_CTRL, 4, 1, 0, &iopll_lock);
 
 	/* CPU clocks */
 	tmp = readl(SLCR_621_TRUE) & 1;
-	clk = clk_register_mux(NULL, "cpu_mux", cpu_parents, 4, 0,
-			SLCR_ARM_CLK_CTRL, 4, 2, 0, &armclk_lock);
+	clk = clk_register_mux(NULL, "cpu_mux", cpu_parents, 4,
+			CLK_SET_RATE_NO_REPARENT, SLCR_ARM_CLK_CTRL, 4, 2, 0,
+			&armclk_lock);
 	clk = clk_register_divider(NULL, "cpu_div", "cpu_mux", 0,
 			SLCR_ARM_CLK_CTRL, 8, 6, CLK_DIVIDER_ONE_BASED |
 			CLK_DIVIDER_ALLOW_ZERO, &armclk_lock);
@@ -293,8 +295,9 @@
 			swdt_ext_clk_mux_parents[i + 1] = dummy_nm;
 	}
 	clks[swdt] = clk_register_mux(NULL, clk_output_name[swdt],
-			swdt_ext_clk_mux_parents, 2, CLK_SET_RATE_PARENT,
-			SLCR_SWDT_CLK_SEL, 0, 1, 0, &swdtclk_lock);
+			swdt_ext_clk_mux_parents, 2, CLK_SET_RATE_PARENT |
+			CLK_SET_RATE_NO_REPARENT, SLCR_SWDT_CLK_SEL, 0, 1, 0,
+			&swdtclk_lock);
 
 	/* DDR clocks */
 	clk = clk_register_divider(NULL, "ddr2x_div", "ddrpll", 0,
@@ -356,8 +359,9 @@
 			gem0_mux_parents[i + 1] = of_clk_get_parent_name(np,
 					idx);
 	}
-	clk = clk_register_mux(NULL, "gem0_mux", periph_parents, 4, 0,
-			SLCR_GEM0_CLK_CTRL, 4, 2, 0, &gem0clk_lock);
+	clk = clk_register_mux(NULL, "gem0_mux", periph_parents, 4,
+			CLK_SET_RATE_NO_REPARENT, SLCR_GEM0_CLK_CTRL, 4, 2, 0,
+			&gem0clk_lock);
 	clk = clk_register_divider(NULL, "gem0_div0", "gem0_mux", 0,
 			SLCR_GEM0_CLK_CTRL, 8, 6, CLK_DIVIDER_ONE_BASED |
 			CLK_DIVIDER_ALLOW_ZERO, &gem0clk_lock);
@@ -366,7 +370,8 @@
 			CLK_DIVIDER_ONE_BASED | CLK_DIVIDER_ALLOW_ZERO,
 			&gem0clk_lock);
 	clk = clk_register_mux(NULL, "gem0_emio_mux", gem0_mux_parents, 2,
-			CLK_SET_RATE_PARENT, SLCR_GEM0_CLK_CTRL, 6, 1, 0,
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
+			SLCR_GEM0_CLK_CTRL, 6, 1, 0,
 			&gem0clk_lock);
 	clks[gem0] = clk_register_gate(NULL, clk_output_name[gem0],
 			"gem0_emio_mux", CLK_SET_RATE_PARENT,
@@ -379,8 +384,9 @@
 			gem1_mux_parents[i + 1] = of_clk_get_parent_name(np,
 					idx);
 	}
-	clk = clk_register_mux(NULL, "gem1_mux", periph_parents, 4, 0,
-			SLCR_GEM1_CLK_CTRL, 4, 2, 0, &gem1clk_lock);
+	clk = clk_register_mux(NULL, "gem1_mux", periph_parents, 4,
+			CLK_SET_RATE_NO_REPARENT, SLCR_GEM1_CLK_CTRL, 4, 2, 0,
+			&gem1clk_lock);
 	clk = clk_register_divider(NULL, "gem1_div0", "gem1_mux", 0,
 			SLCR_GEM1_CLK_CTRL, 8, 6, CLK_DIVIDER_ONE_BASED |
 			CLK_DIVIDER_ALLOW_ZERO, &gem1clk_lock);
@@ -389,7 +395,8 @@
 			CLK_DIVIDER_ONE_BASED | CLK_DIVIDER_ALLOW_ZERO,
 			&gem1clk_lock);
 	clk = clk_register_mux(NULL, "gem1_emio_mux", gem1_mux_parents, 2,
-			CLK_SET_RATE_PARENT, SLCR_GEM1_CLK_CTRL, 6, 1, 0,
+			CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT,
+			SLCR_GEM1_CLK_CTRL, 6, 1, 0,
 			&gem1clk_lock);
 	clks[gem1] = clk_register_gate(NULL, clk_output_name[gem1],
 			"gem1_emio_mux", CLK_SET_RATE_PARENT,
@@ -409,8 +416,9 @@
 			can_mio_mux_parents[i] = dummy_nm;
 	}
 	kfree(clk_name);
-	clk = clk_register_mux(NULL, "can_mux", periph_parents, 4, 0,
-			SLCR_CAN_CLK_CTRL, 4, 2, 0, &canclk_lock);
+	clk = clk_register_mux(NULL, "can_mux", periph_parents, 4,
+			CLK_SET_RATE_NO_REPARENT, SLCR_CAN_CLK_CTRL, 4, 2, 0,
+			&canclk_lock);
 	clk = clk_register_divider(NULL, "can_div0", "can_mux", 0,
 			SLCR_CAN_CLK_CTRL, 8, 6, CLK_DIVIDER_ONE_BASED |
 			CLK_DIVIDER_ALLOW_ZERO, &canclk_lock);
@@ -425,17 +433,21 @@
 			CLK_SET_RATE_PARENT, SLCR_CAN_CLK_CTRL, 1, 0,
 			&canclk_lock);
 	clk = clk_register_mux(NULL, "can0_mio_mux",
-			can_mio_mux_parents, 54, CLK_SET_RATE_PARENT,
-			SLCR_CAN_MIOCLK_CTRL, 0, 6, 0, &canmioclk_lock);
+			can_mio_mux_parents, 54, CLK_SET_RATE_PARENT |
+			CLK_SET_RATE_NO_REPARENT, SLCR_CAN_MIOCLK_CTRL, 0, 6, 0,
+			&canmioclk_lock);
 	clk = clk_register_mux(NULL, "can1_mio_mux",
-			can_mio_mux_parents, 54, CLK_SET_RATE_PARENT,
-			SLCR_CAN_MIOCLK_CTRL, 16, 6, 0, &canmioclk_lock);
+			can_mio_mux_parents, 54, CLK_SET_RATE_PARENT |
+			CLK_SET_RATE_NO_REPARENT, SLCR_CAN_MIOCLK_CTRL, 16, 6,
+			0, &canmioclk_lock);
 	clks[can0] = clk_register_mux(NULL, clk_output_name[can0],
-			can0_mio_mux2_parents, 2, CLK_SET_RATE_PARENT,
-			SLCR_CAN_MIOCLK_CTRL, 6, 1, 0, &canmioclk_lock);
+			can0_mio_mux2_parents, 2, CLK_SET_RATE_PARENT |
+			CLK_SET_RATE_NO_REPARENT, SLCR_CAN_MIOCLK_CTRL, 6, 1, 0,
+			&canmioclk_lock);
 	clks[can1] = clk_register_mux(NULL, clk_output_name[can1],
-			can1_mio_mux2_parents, 2, CLK_SET_RATE_PARENT,
-			SLCR_CAN_MIOCLK_CTRL, 22, 1, 0, &canmioclk_lock);
+			can1_mio_mux2_parents, 2, CLK_SET_RATE_PARENT |
+			CLK_SET_RATE_NO_REPARENT, SLCR_CAN_MIOCLK_CTRL, 22, 1,
+			0, &canmioclk_lock);
 
 	for (i = 0; i < ARRAY_SIZE(dbgtrc_emio_input_names); i++) {
 		int idx = of_property_match_string(np, "clock-names",
@@ -444,13 +456,15 @@
 			dbg_emio_mux_parents[i + 1] = of_clk_get_parent_name(np,
 					idx);
 	}
-	clk = clk_register_mux(NULL, "dbg_mux", periph_parents, 4, 0,
-			SLCR_DBG_CLK_CTRL, 4, 2, 0, &dbgclk_lock);
+	clk = clk_register_mux(NULL, "dbg_mux", periph_parents, 4,
+			CLK_SET_RATE_NO_REPARENT, SLCR_DBG_CLK_CTRL, 4, 2, 0,
+			&dbgclk_lock);
 	clk = clk_register_divider(NULL, "dbg_div", "dbg_mux", 0,
 			SLCR_DBG_CLK_CTRL, 8, 6, CLK_DIVIDER_ONE_BASED |
 			CLK_DIVIDER_ALLOW_ZERO, &dbgclk_lock);
-	clk = clk_register_mux(NULL, "dbg_emio_mux", dbg_emio_mux_parents, 2, 0,
-			SLCR_DBG_CLK_CTRL, 6, 1, 0, &dbgclk_lock);
+	clk = clk_register_mux(NULL, "dbg_emio_mux", dbg_emio_mux_parents, 2,
+			CLK_SET_RATE_NO_REPARENT, SLCR_DBG_CLK_CTRL, 6, 1, 0,
+			&dbgclk_lock);
 	clks[dbg_trc] = clk_register_gate(NULL, clk_output_name[dbg_trc],
 			"dbg_emio_mux", CLK_SET_RATE_PARENT, SLCR_DBG_CLK_CTRL,
 			0, 0, &dbgclk_lock);
diff --git a/drivers/clk/zynq/pll.c b/drivers/clk/zynq/pll.c
index 47e307c..3226f54 100644
--- a/drivers/clk/zynq/pll.c
+++ b/drivers/clk/zynq/pll.c
@@ -50,6 +50,9 @@
 #define PLLCTRL_RESET_MASK	1
 #define PLLCTRL_RESET_SHIFT	0
 
+#define PLL_FBDIV_MIN	13
+#define PLL_FBDIV_MAX	66
+
 /**
  * zynq_pll_round_rate() - Round a clock frequency
  * @hw:		Handle between common and hardware-specific interfaces
@@ -63,10 +66,10 @@
 	u32 fbdiv;
 
 	fbdiv = DIV_ROUND_CLOSEST(rate, *prate);
-	if (fbdiv < 13)
-		fbdiv = 13;
-	else if (fbdiv > 66)
-		fbdiv = 66;
+	if (fbdiv < PLL_FBDIV_MIN)
+		fbdiv = PLL_FBDIV_MIN;
+	else if (fbdiv > PLL_FBDIV_MAX)
+		fbdiv = PLL_FBDIV_MAX;
 
 	return *prate * fbdiv;
 }
@@ -182,7 +185,13 @@
 
 /**
  * clk_register_zynq_pll() - Register PLL with the clock framework
- * @np	Pointer to the DT device node
+ * @name	PLL name
+ * @parent	Parent clock name
+ * @pll_ctrl	Pointer to PLL control register
+ * @pll_status	Pointer to PLL status register
+ * @lock_index	Bit index to this PLL's lock status bit in @pll_status
+ * @lock	Register lock
+ * Returns handle to the registered clock.
  */
 struct clk *clk_register_zynq_pll(const char *name, const char *parent,
 		void __iomem *pll_ctrl, void __iomem *pll_status, u8 lock_index,
diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c
index ac60f8b..ab29476 100644
--- a/drivers/clocksource/samsung_pwm_timer.c
+++ b/drivers/clocksource/samsung_pwm_timer.c
@@ -368,10 +368,6 @@
 
 static void __init samsung_timer_resources(void)
 {
-	pwm.timerclk = clk_get(NULL, "timers");
-	if (IS_ERR(pwm.timerclk))
-		panic("failed to get timers clock for timer");
-
 	clk_prepare_enable(pwm.timerclk);
 
 	pwm.tcnt_max = (1UL << pwm.variant.bits) - 1;
@@ -416,6 +412,10 @@
 	memcpy(&pwm.variant, variant, sizeof(pwm.variant));
 	memcpy(pwm.irq, irqs, SAMSUNG_PWM_NUM * sizeof(*irqs));
 
+	pwm.timerclk = clk_get(NULL, "timers");
+	if (IS_ERR(pwm.timerclk))
+		panic("failed to get timers clock for timer");
+
 	_samsung_pwm_clocksource_init();
 }
 
@@ -447,6 +447,10 @@
 		return;
 	}
 
+	pwm.timerclk = of_clk_get_by_name(np, "timers");
+	if (IS_ERR(pwm.timerclk))
+		panic("failed to get timers clock for timer");
+
 	_samsung_pwm_clocksource_init();
 }
 
diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm
index b3302193..8e36603 100644
--- a/drivers/cpuidle/Kconfig.arm
+++ b/drivers/cpuidle/Kconfig.arm
@@ -27,3 +27,13 @@
 	help
 	  Select this to enable cpuidle for ST-E u8500 processors
 
+config CPU_IDLE_BIG_LITTLE
+	bool "Support for ARM big.LITTLE processors"
+	depends on ARCH_VEXPRESS_TC2_PM
+	select ARM_CPU_SUSPEND
+	select CPU_IDLE_MULTIPLE_DRIVERS
+	help
+	  Select this option to enable CPU idle driver for big.LITTLE based
+	  ARM systems. Driver manages CPUs coordination through MCPM and
+	  define different C-states for little and big cores through the
+	  multiple CPU idle drivers infrastructure.
diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
index 0b9d200..cea5ef5 100644
--- a/drivers/cpuidle/Makefile
+++ b/drivers/cpuidle/Makefile
@@ -11,3 +11,4 @@
 obj-$(CONFIG_ARM_KIRKWOOD_CPUIDLE)	+= cpuidle-kirkwood.o
 obj-$(CONFIG_ARM_ZYNQ_CPUIDLE)		+= cpuidle-zynq.o
 obj-$(CONFIG_ARM_U8500_CPUIDLE)         += cpuidle-ux500.o
+obj-$(CONFIG_CPU_IDLE_BIG_LITTLE)	+= cpuidle-big_little.o
diff --git a/drivers/cpuidle/cpuidle-big_little.c b/drivers/cpuidle/cpuidle-big_little.c
new file mode 100644
index 0000000..b45fc62
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-big_little.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2013 ARM/Linaro
+ *
+ * Authors: Daniel Lezcano <daniel.lezcano@linaro.org>
+ *          Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ *          Nicolas Pitre <nicolas.pitre@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Maintainer: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ * Maintainer: Daniel Lezcano <daniel.lezcano@linaro.org>
+ */
+#include <linux/cpuidle.h>
+#include <linux/cpu_pm.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+
+#include <asm/cpu.h>
+#include <asm/cputype.h>
+#include <asm/cpuidle.h>
+#include <asm/mcpm.h>
+#include <asm/smp_plat.h>
+#include <asm/suspend.h>
+
+static int bl_enter_powerdown(struct cpuidle_device *dev,
+			      struct cpuidle_driver *drv, int idx);
+
+/*
+ * NB: Owing to current menu governor behaviour big and LITTLE
+ * index 1 states have to define exit_latency and target_residency for
+ * cluster state since, when all CPUs in a cluster hit it, the cluster
+ * can be shutdown. This means that when a single CPU enters this state
+ * the exit_latency and target_residency values are somewhat overkill.
+ * There is no notion of cluster states in the menu governor, so CPUs
+ * have to define CPU states where possibly the cluster will be shutdown
+ * depending on the state of other CPUs. idle states entry and exit happen
+ * at random times; however the cluster state provides target_residency
+ * values as if all CPUs in a cluster enter the state at once; this is
+ * somewhat optimistic and behaviour should be fixed either in the governor
+ * or in the MCPM back-ends.
+ * To make this driver 100% generic the number of states and the exit_latency
+ * target_residency values must be obtained from device tree bindings.
+ *
+ * exit_latency: refers to the TC2 vexpress test chip and depends on the
+ * current cluster operating point. It is the time it takes to get the CPU
+ * up and running when the CPU is powered up on cluster wake-up from shutdown.
+ * Current values for big and LITTLE clusters are provided for clusters
+ * running at default operating points.
+ *
+ * target_residency: it is the minimum amount of time the cluster has
+ * to be down to break even in terms of power consumption. cluster
+ * shutdown has inherent dynamic power costs (L2 writebacks to DRAM
+ * being the main factor) that depend on the current operating points.
+ * The current values for both clusters are provided for a CPU whose half
+ * of L2 lines are dirty and require cleaning to DRAM, and takes into
+ * account leakage static power values related to the vexpress TC2 testchip.
+ */
+static struct cpuidle_driver bl_idle_little_driver = {
+	.name = "little_idle",
+	.owner = THIS_MODULE,
+	.states[0] = ARM_CPUIDLE_WFI_STATE,
+	.states[1] = {
+		.enter			= bl_enter_powerdown,
+		.exit_latency		= 700,
+		.target_residency	= 2500,
+		.flags			= CPUIDLE_FLAG_TIME_VALID |
+					  CPUIDLE_FLAG_TIMER_STOP,
+		.name			= "C1",
+		.desc			= "ARM little-cluster power down",
+	},
+	.state_count = 2,
+};
+
+static struct cpuidle_driver bl_idle_big_driver = {
+	.name = "big_idle",
+	.owner = THIS_MODULE,
+	.states[0] = ARM_CPUIDLE_WFI_STATE,
+	.states[1] = {
+		.enter			= bl_enter_powerdown,
+		.exit_latency		= 500,
+		.target_residency	= 2000,
+		.flags			= CPUIDLE_FLAG_TIME_VALID |
+					  CPUIDLE_FLAG_TIMER_STOP,
+		.name			= "C1",
+		.desc			= "ARM big-cluster power down",
+	},
+	.state_count = 2,
+};
+
+/*
+ * notrace prevents trace shims from getting inserted where they
+ * should not. Global jumps and ldrex/strex must not be inserted
+ * in power down sequences where caches and MMU may be turned off.
+ */
+static int notrace bl_powerdown_finisher(unsigned long arg)
+{
+	/* MCPM works with HW CPU identifiers */
+	unsigned int mpidr = read_cpuid_mpidr();
+	unsigned int cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	unsigned int cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+
+	mcpm_set_entry_vector(cpu, cluster, cpu_resume);
+
+	/*
+	 * Residency value passed to mcpm_cpu_suspend back-end
+	 * has to be given clear semantics. Set to 0 as a
+	 * temporary value.
+	 */
+	mcpm_cpu_suspend(0);
+
+	/* return value != 0 means failure */
+	return 1;
+}
+
+/**
+ * bl_enter_powerdown - Programs CPU to enter the specified state
+ * @dev: cpuidle device
+ * @drv: The target state to be programmed
+ * @idx: state index
+ *
+ * Called from the CPUidle framework to program the device to the
+ * specified target state selected by the governor.
+ */
+static int bl_enter_powerdown(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv, int idx)
+{
+	cpu_pm_enter();
+
+	cpu_suspend(0, bl_powerdown_finisher);
+
+	/* signals the MCPM core that CPU is out of low power state */
+	mcpm_cpu_powered_up();
+
+	cpu_pm_exit();
+
+	return idx;
+}
+
+static int __init bl_idle_driver_init(struct cpuidle_driver *drv, int cpu_id)
+{
+	struct cpuinfo_arm *cpu_info;
+	struct cpumask *cpumask;
+	unsigned long cpuid;
+	int cpu;
+
+	cpumask = kzalloc(cpumask_size(), GFP_KERNEL);
+	if (!cpumask)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu) {
+		cpu_info = &per_cpu(cpu_data, cpu);
+		cpuid = is_smp() ? cpu_info->cpuid : read_cpuid_id();
+
+		/* read cpu id part number */
+		if ((cpuid & 0xFFF0) == cpu_id)
+			cpumask_set_cpu(cpu, cpumask);
+	}
+
+	drv->cpumask = cpumask;
+
+	return 0;
+}
+
+static int __init bl_idle_init(void)
+{
+	int ret;
+
+	/*
+	 * Initialize the driver just for a compliant set of machines
+	 */
+	if (!of_machine_is_compatible("arm,vexpress,v2p-ca15_a7"))
+		return -ENODEV;
+	/*
+	 * For now the differentiation between little and big cores
+	 * is based on the part number. A7 cores are considered little
+	 * cores, A15 are considered big cores. This distinction may
+	 * evolve in the future with a more generic matching approach.
+	 */
+	ret = bl_idle_driver_init(&bl_idle_little_driver,
+				  ARM_CPU_PART_CORTEX_A7);
+	if (ret)
+		return ret;
+
+	ret = bl_idle_driver_init(&bl_idle_big_driver, ARM_CPU_PART_CORTEX_A15);
+	if (ret)
+		goto out_uninit_little;
+
+	ret = cpuidle_register(&bl_idle_little_driver, NULL);
+	if (ret)
+		goto out_uninit_big;
+
+	ret = cpuidle_register(&bl_idle_big_driver, NULL);
+	if (ret)
+		goto out_unregister_little;
+
+	return 0;
+
+out_unregister_little:
+	cpuidle_unregister(&bl_idle_little_driver);
+out_uninit_big:
+	kfree(bl_idle_big_driver.cpumask);
+out_uninit_little:
+	kfree(bl_idle_little_driver.cpumask);
+
+	return ret;
+}
+device_initcall(bl_idle_init);
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index daa4da2..526ec77 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -308,6 +308,15 @@
 	select DMA_ENGINE
 	select DMA_VIRTUAL_CHANNELS
 
+config K3_DMA
+	tristate "Hisilicon K3 DMA support"
+	depends on ARCH_HI3xxx
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support the DMA engine for Hisilicon K3 platform
+	  devices.
+
 config DMA_ENGINE
 	bool
 
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 6d62ec3..db89035 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -40,3 +40,4 @@
 obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o
 obj-$(CONFIG_DMA_JZ4740) += dma-jz4740.o
 obj-$(CONFIG_TI_CPPI41) += cppi41.o
+obj-$(CONFIG_K3_DMA) += k3dma.o
diff --git a/drivers/dma/acpi-dma.c b/drivers/dma/acpi-dma.c
index 5a18f82..e69b03c 100644
--- a/drivers/dma/acpi-dma.c
+++ b/drivers/dma/acpi-dma.c
@@ -43,7 +43,6 @@
 	struct list_head resource_list;
 	struct resource_list_entry *rentry;
 	resource_size_t mem = 0, irq = 0;
-	u32 vendor_id;
 	int ret;
 
 	if (grp->shared_info_length != sizeof(struct acpi_csrt_shared_info))
@@ -73,9 +72,8 @@
 	if (si->mmio_base_low != mem || si->gsi_interrupt != irq)
 		return 0;
 
-	vendor_id = le32_to_cpu(grp->vendor_id);
 	dev_dbg(&adev->dev, "matches with %.4s%04X (rev %u)\n",
-		(char *)&vendor_id, grp->device_id, grp->revision);
+		(char *)&grp->vendor_id, grp->device_id, grp->revision);
 
 	/* Check if the request line range is available */
 	if (si->base_request_line == 0 && si->num_handshake_signals == 0)
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 06fe45c..fce46c5 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -24,6 +24,7 @@
  *
  * Documentation: ARM DDI 0196G == PL080
  * Documentation: ARM DDI 0218E == PL081
+ * Documentation: S3C6410 User's Manual == PL080S
  *
  * PL080 & PL081 both have 16 sets of DMA signals that can be routed to any
  * channel.
@@ -36,6 +37,14 @@
  *
  * The PL080 has a dual bus master, PL081 has a single master.
  *
+ * PL080S is a version modified by Samsung and used in S3C64xx SoCs.
+ * It differs in following aspects:
+ * - CH_CONFIG register at different offset,
+ * - separate CH_CONTROL2 register for transfer size,
+ * - bigger maximum transfer size,
+ * - 8-word aligned LLI, instead of 4-word, due to extra CCTL2 word,
+ * - no support for peripheral flow control.
+ *
  * Memory to peripheral transfer may be visualized as
  *	Get data from memory to DMAC
  *	Until no data left
@@ -64,10 +73,7 @@
  *  - Peripheral flow control: the transfer size is ignored (and should be
  *    zero).  The data is transferred from the current LLI entry, until
  *    after the final transfer signalled by LBREQ or LSREQ.  The DMAC
- *    will then move to the next LLI entry.
- *
- * Global TODO:
- * - Break out common code from arch/arm/mach-s3c64xx and share
+ *    will then move to the next LLI entry. Unsupported by PL080S.
  */
 #include <linux/amba/bus.h>
 #include <linux/amba/pl08x.h>
@@ -100,24 +106,16 @@
  * @nomadik: whether the channels have Nomadik security extension bits
  *	that need to be checked for permission before use and some registers are
  *	missing
+ * @pl080s: whether this version is a PL080S, which has separate register and
+ *	LLI word for transfer size.
  */
 struct vendor_data {
+	u8 config_offset;
 	u8 channels;
 	bool dualmaster;
 	bool nomadik;
-};
-
-/*
- * PL08X private data structures
- * An LLI struct - see PL08x TRM.  Note that next uses bit[0] as a bus bit,
- * start & end do not - their bus bit info is in cctl.  Also note that these
- * are fixed 32-bit quantities.
- */
-struct pl08x_lli {
-	u32 src;
-	u32 dst;
-	u32 lli;
-	u32 cctl;
+	bool pl080s;
+	u32 max_transfer_size;
 };
 
 /**
@@ -133,6 +131,8 @@
 	u8 buswidth;
 };
 
+#define IS_BUS_ALIGNED(bus) IS_ALIGNED((bus)->addr, (bus)->buswidth)
+
 /**
  * struct pl08x_phy_chan - holder for the physical channels
  * @id: physical index to this channel
@@ -145,6 +145,7 @@
 struct pl08x_phy_chan {
 	unsigned int id;
 	void __iomem *base;
+	void __iomem *reg_config;
 	spinlock_t lock;
 	struct pl08x_dma_chan *serving;
 	bool locked;
@@ -174,12 +175,13 @@
  * @ccfg: config reg values for current txd
  * @done: this marks completed descriptors, which should not have their
  *   mux released.
+ * @cyclic: indicate cyclic transfers
  */
 struct pl08x_txd {
 	struct virt_dma_desc vd;
 	struct list_head dsg_list;
 	dma_addr_t llis_bus;
-	struct pl08x_lli *llis_va;
+	u32 *llis_va;
 	/* Default cctl value for LLIs */
 	u32 cctl;
 	/*
@@ -188,6 +190,7 @@
 	 */
 	u32 ccfg;
 	bool done;
+	bool cyclic;
 };
 
 /**
@@ -263,17 +266,29 @@
 	struct dma_pool *pool;
 	u8 lli_buses;
 	u8 mem_buses;
+	u8 lli_words;
 };
 
 /*
  * PL08X specific defines
  */
 
-/* Size (bytes) of each LLI buffer allocated for one transfer */
-# define PL08X_LLI_TSFR_SIZE	0x2000
+/* The order of words in an LLI. */
+#define PL080_LLI_SRC		0
+#define PL080_LLI_DST		1
+#define PL080_LLI_LLI		2
+#define PL080_LLI_CCTL		3
+#define PL080S_LLI_CCTL2	4
 
-/* Maximum times we call dma_pool_alloc on this pool without freeing */
-#define MAX_NUM_TSFR_LLIS	(PL08X_LLI_TSFR_SIZE/sizeof(struct pl08x_lli))
+/* Total words in an LLI. */
+#define PL080_LLI_WORDS		4
+#define PL080S_LLI_WORDS	8
+
+/*
+ * Number of LLIs in each LLI buffer allocated for one transfer
+ * (maximum times we call dma_pool_alloc on this pool without freeing)
+ */
+#define MAX_NUM_TSFR_LLIS	512
 #define PL08X_ALIGN		8
 
 static inline struct pl08x_dma_chan *to_pl08x_chan(struct dma_chan *chan)
@@ -334,10 +349,39 @@
 {
 	unsigned int val;
 
-	val = readl(ch->base + PL080_CH_CONFIG);
+	val = readl(ch->reg_config);
 	return val & PL080_CONFIG_ACTIVE;
 }
 
+static void pl08x_write_lli(struct pl08x_driver_data *pl08x,
+		struct pl08x_phy_chan *phychan, const u32 *lli, u32 ccfg)
+{
+	if (pl08x->vd->pl080s)
+		dev_vdbg(&pl08x->adev->dev,
+			"WRITE channel %d: csrc=0x%08x, cdst=0x%08x, "
+			"clli=0x%08x, cctl=0x%08x, cctl2=0x%08x, ccfg=0x%08x\n",
+			phychan->id, lli[PL080_LLI_SRC], lli[PL080_LLI_DST],
+			lli[PL080_LLI_LLI], lli[PL080_LLI_CCTL],
+			lli[PL080S_LLI_CCTL2], ccfg);
+	else
+		dev_vdbg(&pl08x->adev->dev,
+			"WRITE channel %d: csrc=0x%08x, cdst=0x%08x, "
+			"clli=0x%08x, cctl=0x%08x, ccfg=0x%08x\n",
+			phychan->id, lli[PL080_LLI_SRC], lli[PL080_LLI_DST],
+			lli[PL080_LLI_LLI], lli[PL080_LLI_CCTL], ccfg);
+
+	writel_relaxed(lli[PL080_LLI_SRC], phychan->base + PL080_CH_SRC_ADDR);
+	writel_relaxed(lli[PL080_LLI_DST], phychan->base + PL080_CH_DST_ADDR);
+	writel_relaxed(lli[PL080_LLI_LLI], phychan->base + PL080_CH_LLI);
+	writel_relaxed(lli[PL080_LLI_CCTL], phychan->base + PL080_CH_CONTROL);
+
+	if (pl08x->vd->pl080s)
+		writel_relaxed(lli[PL080S_LLI_CCTL2],
+				phychan->base + PL080S_CH_CONTROL2);
+
+	writel(ccfg, phychan->reg_config);
+}
+
 /*
  * Set the initial DMA register values i.e. those for the first LLI
  * The next LLI pointer and the configuration interrupt bit have
@@ -350,7 +394,6 @@
 	struct pl08x_phy_chan *phychan = plchan->phychan;
 	struct virt_dma_desc *vd = vchan_next_desc(&plchan->vc);
 	struct pl08x_txd *txd = to_pl08x_txd(&vd->tx);
-	struct pl08x_lli *lli;
 	u32 val;
 
 	list_del(&txd->vd.node);
@@ -361,19 +404,7 @@
 	while (pl08x_phy_channel_busy(phychan))
 		cpu_relax();
 
-	lli = &txd->llis_va[0];
-
-	dev_vdbg(&pl08x->adev->dev,
-		"WRITE channel %d: csrc=0x%08x, cdst=0x%08x, "
-		"clli=0x%08x, cctl=0x%08x, ccfg=0x%08x\n",
-		phychan->id, lli->src, lli->dst, lli->lli, lli->cctl,
-		txd->ccfg);
-
-	writel(lli->src, phychan->base + PL080_CH_SRC_ADDR);
-	writel(lli->dst, phychan->base + PL080_CH_DST_ADDR);
-	writel(lli->lli, phychan->base + PL080_CH_LLI);
-	writel(lli->cctl, phychan->base + PL080_CH_CONTROL);
-	writel(txd->ccfg, phychan->base + PL080_CH_CONFIG);
+	pl08x_write_lli(pl08x, phychan, &txd->llis_va[0], txd->ccfg);
 
 	/* Enable the DMA channel */
 	/* Do not access config register until channel shows as disabled */
@@ -381,11 +412,11 @@
 		cpu_relax();
 
 	/* Do not access config register until channel shows as inactive */
-	val = readl(phychan->base + PL080_CH_CONFIG);
+	val = readl(phychan->reg_config);
 	while ((val & PL080_CONFIG_ACTIVE) || (val & PL080_CONFIG_ENABLE))
-		val = readl(phychan->base + PL080_CH_CONFIG);
+		val = readl(phychan->reg_config);
 
-	writel(val | PL080_CONFIG_ENABLE, phychan->base + PL080_CH_CONFIG);
+	writel(val | PL080_CONFIG_ENABLE, phychan->reg_config);
 }
 
 /*
@@ -404,9 +435,9 @@
 	int timeout;
 
 	/* Set the HALT bit and wait for the FIFO to drain */
-	val = readl(ch->base + PL080_CH_CONFIG);
+	val = readl(ch->reg_config);
 	val |= PL080_CONFIG_HALT;
-	writel(val, ch->base + PL080_CH_CONFIG);
+	writel(val, ch->reg_config);
 
 	/* Wait for channel inactive */
 	for (timeout = 1000; timeout; timeout--) {
@@ -423,9 +454,9 @@
 	u32 val;
 
 	/* Clear the HALT bit */
-	val = readl(ch->base + PL080_CH_CONFIG);
+	val = readl(ch->reg_config);
 	val &= ~PL080_CONFIG_HALT;
-	writel(val, ch->base + PL080_CH_CONFIG);
+	writel(val, ch->reg_config);
 }
 
 /*
@@ -437,12 +468,12 @@
 static void pl08x_terminate_phy_chan(struct pl08x_driver_data *pl08x,
 	struct pl08x_phy_chan *ch)
 {
-	u32 val = readl(ch->base + PL080_CH_CONFIG);
+	u32 val = readl(ch->reg_config);
 
 	val &= ~(PL080_CONFIG_ENABLE | PL080_CONFIG_ERR_IRQ_MASK |
 	         PL080_CONFIG_TC_IRQ_MASK);
 
-	writel(val, ch->base + PL080_CH_CONFIG);
+	writel(val, ch->reg_config);
 
 	writel(1 << ch->id, pl08x->base + PL080_ERR_CLEAR);
 	writel(1 << ch->id, pl08x->base + PL080_TC_CLEAR);
@@ -453,6 +484,28 @@
 	/* The source width defines the number of bytes */
 	u32 bytes = cctl & PL080_CONTROL_TRANSFER_SIZE_MASK;
 
+	cctl &= PL080_CONTROL_SWIDTH_MASK;
+
+	switch (cctl >> PL080_CONTROL_SWIDTH_SHIFT) {
+	case PL080_WIDTH_8BIT:
+		break;
+	case PL080_WIDTH_16BIT:
+		bytes *= 2;
+		break;
+	case PL080_WIDTH_32BIT:
+		bytes *= 4;
+		break;
+	}
+	return bytes;
+}
+
+static inline u32 get_bytes_in_cctl_pl080s(u32 cctl, u32 cctl1)
+{
+	/* The source width defines the number of bytes */
+	u32 bytes = cctl1 & PL080S_CONTROL_TRANSFER_SIZE_MASK;
+
+	cctl &= PL080_CONTROL_SWIDTH_MASK;
+
 	switch (cctl >> PL080_CONTROL_SWIDTH_SHIFT) {
 	case PL080_WIDTH_8BIT:
 		break;
@@ -469,47 +522,66 @@
 /* The channel should be paused when calling this */
 static u32 pl08x_getbytes_chan(struct pl08x_dma_chan *plchan)
 {
+	struct pl08x_driver_data *pl08x = plchan->host;
+	const u32 *llis_va, *llis_va_limit;
 	struct pl08x_phy_chan *ch;
+	dma_addr_t llis_bus;
 	struct pl08x_txd *txd;
-	size_t bytes = 0;
+	u32 llis_max_words;
+	size_t bytes;
+	u32 clli;
 
 	ch = plchan->phychan;
 	txd = plchan->at;
 
+	if (!ch || !txd)
+		return 0;
+
 	/*
 	 * Follow the LLIs to get the number of remaining
 	 * bytes in the currently active transaction.
 	 */
-	if (ch && txd) {
-		u32 clli = readl(ch->base + PL080_CH_LLI) & ~PL080_LLI_LM_AHB2;
+	clli = readl(ch->base + PL080_CH_LLI) & ~PL080_LLI_LM_AHB2;
 
-		/* First get the remaining bytes in the active transfer */
+	/* First get the remaining bytes in the active transfer */
+	if (pl08x->vd->pl080s)
+		bytes = get_bytes_in_cctl_pl080s(
+				readl(ch->base + PL080_CH_CONTROL),
+				readl(ch->base + PL080S_CH_CONTROL2));
+	else
 		bytes = get_bytes_in_cctl(readl(ch->base + PL080_CH_CONTROL));
 
-		if (clli) {
-			struct pl08x_lli *llis_va = txd->llis_va;
-			dma_addr_t llis_bus = txd->llis_bus;
-			int index;
+	if (!clli)
+		return bytes;
 
-			BUG_ON(clli < llis_bus || clli >= llis_bus +
-				sizeof(struct pl08x_lli) * MAX_NUM_TSFR_LLIS);
+	llis_va = txd->llis_va;
+	llis_bus = txd->llis_bus;
 
-			/*
-			 * Locate the next LLI - as this is an array,
-			 * it's simple maths to find.
-			 */
-			index = (clli - llis_bus) / sizeof(struct pl08x_lli);
+	llis_max_words = pl08x->lli_words * MAX_NUM_TSFR_LLIS;
+	BUG_ON(clli < llis_bus || clli >= llis_bus +
+						sizeof(u32) * llis_max_words);
 
-			for (; index < MAX_NUM_TSFR_LLIS; index++) {
-				bytes += get_bytes_in_cctl(llis_va[index].cctl);
+	/*
+	 * Locate the next LLI - as this is an array,
+	 * it's simple maths to find.
+	 */
+	llis_va += (clli - llis_bus) / sizeof(u32);
 
-				/*
-				 * A LLI pointer of 0 terminates the LLI list
-				 */
-				if (!llis_va[index].lli)
-					break;
-			}
-		}
+	llis_va_limit = llis_va + llis_max_words;
+
+	for (; llis_va < llis_va_limit; llis_va += pl08x->lli_words) {
+		if (pl08x->vd->pl080s)
+			bytes += get_bytes_in_cctl_pl080s(
+						llis_va[PL080_LLI_CCTL],
+						llis_va[PL080S_LLI_CCTL2]);
+		else
+			bytes += get_bytes_in_cctl(llis_va[PL080_LLI_CCTL]);
+
+		/*
+		 * A LLI pointer going backward terminates the LLI list
+		 */
+		if (llis_va[PL080_LLI_LLI] <= clli)
+			break;
 	}
 
 	return bytes;
@@ -720,6 +792,7 @@
 		break;
 	}
 
+	tsize &= PL080_CONTROL_TRANSFER_SIZE_MASK;
 	retbits |= tsize << PL080_CONTROL_TRANSFER_SIZE_SHIFT;
 	return retbits;
 }
@@ -764,20 +837,26 @@
 /*
  * Fills in one LLI for a certain transfer descriptor and advance the counter
  */
-static void pl08x_fill_lli_for_desc(struct pl08x_lli_build_data *bd,
-	int num_llis, int len, u32 cctl)
+static void pl08x_fill_lli_for_desc(struct pl08x_driver_data *pl08x,
+				    struct pl08x_lli_build_data *bd,
+				    int num_llis, int len, u32 cctl, u32 cctl2)
 {
-	struct pl08x_lli *llis_va = bd->txd->llis_va;
+	u32 offset = num_llis * pl08x->lli_words;
+	u32 *llis_va = bd->txd->llis_va + offset;
 	dma_addr_t llis_bus = bd->txd->llis_bus;
 
 	BUG_ON(num_llis >= MAX_NUM_TSFR_LLIS);
 
-	llis_va[num_llis].cctl = cctl;
-	llis_va[num_llis].src = bd->srcbus.addr;
-	llis_va[num_llis].dst = bd->dstbus.addr;
-	llis_va[num_llis].lli = llis_bus + (num_llis + 1) *
-		sizeof(struct pl08x_lli);
-	llis_va[num_llis].lli |= bd->lli_bus;
+	/* Advance the offset to next LLI. */
+	offset += pl08x->lli_words;
+
+	llis_va[PL080_LLI_SRC] = bd->srcbus.addr;
+	llis_va[PL080_LLI_DST] = bd->dstbus.addr;
+	llis_va[PL080_LLI_LLI] = (llis_bus + sizeof(u32) * offset);
+	llis_va[PL080_LLI_LLI] |= bd->lli_bus;
+	llis_va[PL080_LLI_CCTL] = cctl;
+	if (pl08x->vd->pl080s)
+		llis_va[PL080S_LLI_CCTL2] = cctl2;
 
 	if (cctl & PL080_CONTROL_SRC_INCR)
 		bd->srcbus.addr += len;
@@ -789,14 +868,53 @@
 	bd->remainder -= len;
 }
 
-static inline void prep_byte_width_lli(struct pl08x_lli_build_data *bd,
-		u32 *cctl, u32 len, int num_llis, size_t *total_bytes)
+static inline void prep_byte_width_lli(struct pl08x_driver_data *pl08x,
+			struct pl08x_lli_build_data *bd, u32 *cctl, u32 len,
+			int num_llis, size_t *total_bytes)
 {
 	*cctl = pl08x_cctl_bits(*cctl, 1, 1, len);
-	pl08x_fill_lli_for_desc(bd, num_llis, len, *cctl);
+	pl08x_fill_lli_for_desc(pl08x, bd, num_llis, len, *cctl, len);
 	(*total_bytes) += len;
 }
 
+#ifdef VERBOSE_DEBUG
+static void pl08x_dump_lli(struct pl08x_driver_data *pl08x,
+			   const u32 *llis_va, int num_llis)
+{
+	int i;
+
+	if (pl08x->vd->pl080s) {
+		dev_vdbg(&pl08x->adev->dev,
+			"%-3s %-9s  %-10s %-10s %-10s %-10s %s\n",
+			"lli", "", "csrc", "cdst", "clli", "cctl", "cctl2");
+		for (i = 0; i < num_llis; i++) {
+			dev_vdbg(&pl08x->adev->dev,
+				"%3d @%p: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
+				i, llis_va, llis_va[PL080_LLI_SRC],
+				llis_va[PL080_LLI_DST], llis_va[PL080_LLI_LLI],
+				llis_va[PL080_LLI_CCTL],
+				llis_va[PL080S_LLI_CCTL2]);
+			llis_va += pl08x->lli_words;
+		}
+	} else {
+		dev_vdbg(&pl08x->adev->dev,
+			"%-3s %-9s  %-10s %-10s %-10s %s\n",
+			"lli", "", "csrc", "cdst", "clli", "cctl");
+		for (i = 0; i < num_llis; i++) {
+			dev_vdbg(&pl08x->adev->dev,
+				"%3d @%p: 0x%08x 0x%08x 0x%08x 0x%08x\n",
+				i, llis_va, llis_va[PL080_LLI_SRC],
+				llis_va[PL080_LLI_DST], llis_va[PL080_LLI_LLI],
+				llis_va[PL080_LLI_CCTL]);
+			llis_va += pl08x->lli_words;
+		}
+	}
+}
+#else
+static inline void pl08x_dump_lli(struct pl08x_driver_data *pl08x,
+				  const u32 *llis_va, int num_llis) {}
+#endif
+
 /*
  * This fills in the table of LLIs for the transfer descriptor
  * Note that we assume we never have to change the burst sizes
@@ -810,7 +928,7 @@
 	int num_llis = 0;
 	u32 cctl, early_bytes = 0;
 	size_t max_bytes_per_lli, total_bytes;
-	struct pl08x_lli *llis_va;
+	u32 *llis_va, *last_lli;
 	struct pl08x_sg *dsg;
 
 	txd->llis_va = dma_pool_alloc(pl08x->pool, GFP_NOWAIT, &txd->llis_bus);
@@ -845,10 +963,13 @@
 
 		pl08x_choose_master_bus(&bd, &mbus, &sbus, cctl);
 
-		dev_vdbg(&pl08x->adev->dev, "src=0x%08x%s/%u dst=0x%08x%s/%u len=%zu\n",
-			bd.srcbus.addr, cctl & PL080_CONTROL_SRC_INCR ? "+" : "",
+		dev_vdbg(&pl08x->adev->dev,
+			"src=0x%08llx%s/%u dst=0x%08llx%s/%u len=%zu\n",
+			(u64)bd.srcbus.addr,
+			cctl & PL080_CONTROL_SRC_INCR ? "+" : "",
 			bd.srcbus.buswidth,
-			bd.dstbus.addr, cctl & PL080_CONTROL_DST_INCR ? "+" : "",
+			(u64)bd.dstbus.addr,
+			cctl & PL080_CONTROL_DST_INCR ? "+" : "",
 			bd.dstbus.buswidth,
 			bd.remainder);
 		dev_vdbg(&pl08x->adev->dev, "mbus=%s sbus=%s\n",
@@ -886,8 +1007,8 @@
 				return 0;
 			}
 
-			if ((bd.srcbus.addr % bd.srcbus.buswidth) ||
-					(bd.dstbus.addr % bd.dstbus.buswidth)) {
+			if (!IS_BUS_ALIGNED(&bd.srcbus) ||
+				!IS_BUS_ALIGNED(&bd.dstbus)) {
 				dev_err(&pl08x->adev->dev,
 					"%s src & dst address must be aligned to src"
 					" & dst width if peripheral is flow controller",
@@ -897,7 +1018,8 @@
 
 			cctl = pl08x_cctl_bits(cctl, bd.srcbus.buswidth,
 					bd.dstbus.buswidth, 0);
-			pl08x_fill_lli_for_desc(&bd, num_llis++, 0, cctl);
+			pl08x_fill_lli_for_desc(pl08x, &bd, num_llis++,
+					0, cctl, 0);
 			break;
 		}
 
@@ -908,9 +1030,9 @@
 		 */
 		if (bd.remainder < mbus->buswidth)
 			early_bytes = bd.remainder;
-		else if ((mbus->addr) % (mbus->buswidth)) {
-			early_bytes = mbus->buswidth - (mbus->addr) %
-				(mbus->buswidth);
+		else if (!IS_BUS_ALIGNED(mbus)) {
+			early_bytes = mbus->buswidth -
+				(mbus->addr & (mbus->buswidth - 1));
 			if ((bd.remainder - early_bytes) < mbus->buswidth)
 				early_bytes = bd.remainder;
 		}
@@ -919,8 +1041,8 @@
 			dev_vdbg(&pl08x->adev->dev,
 				"%s byte width LLIs (remain 0x%08x)\n",
 				__func__, bd.remainder);
-			prep_byte_width_lli(&bd, &cctl, early_bytes, num_llis++,
-				&total_bytes);
+			prep_byte_width_lli(pl08x, &bd, &cctl, early_bytes,
+				num_llis++, &total_bytes);
 		}
 
 		if (bd.remainder) {
@@ -928,7 +1050,7 @@
 			 * Master now aligned
 			 * - if slave is not then we must set its width down
 			 */
-			if (sbus->addr % sbus->buswidth) {
+			if (!IS_BUS_ALIGNED(sbus)) {
 				dev_dbg(&pl08x->adev->dev,
 					"%s set down bus width to one byte\n",
 					__func__);
@@ -941,7 +1063,7 @@
 			 * MIN(buswidths)
 			 */
 			max_bytes_per_lli = bd.srcbus.buswidth *
-				PL080_CONTROL_TRANSFER_SIZE_MASK;
+						pl08x->vd->max_transfer_size;
 			dev_vdbg(&pl08x->adev->dev,
 				"%s max bytes per lli = %zu\n",
 				__func__, max_bytes_per_lli);
@@ -976,8 +1098,8 @@
 
 				cctl = pl08x_cctl_bits(cctl, bd.srcbus.buswidth,
 					bd.dstbus.buswidth, tsize);
-				pl08x_fill_lli_for_desc(&bd, num_llis++,
-						lli_len, cctl);
+				pl08x_fill_lli_for_desc(pl08x, &bd, num_llis++,
+						lli_len, cctl, tsize);
 				total_bytes += lli_len;
 			}
 
@@ -988,8 +1110,8 @@
 				dev_vdbg(&pl08x->adev->dev,
 					"%s align with boundary, send odd bytes (remain %zu)\n",
 					__func__, bd.remainder);
-				prep_byte_width_lli(&bd, &cctl, bd.remainder,
-						num_llis++, &total_bytes);
+				prep_byte_width_lli(pl08x, &bd, &cctl,
+					bd.remainder, num_llis++, &total_bytes);
 			}
 		}
 
@@ -1003,33 +1125,25 @@
 		if (num_llis >= MAX_NUM_TSFR_LLIS) {
 			dev_err(&pl08x->adev->dev,
 				"%s need to increase MAX_NUM_TSFR_LLIS from 0x%08x\n",
-				__func__, (u32) MAX_NUM_TSFR_LLIS);
+				__func__, MAX_NUM_TSFR_LLIS);
 			return 0;
 		}
 	}
 
 	llis_va = txd->llis_va;
-	/* The final LLI terminates the LLI. */
-	llis_va[num_llis - 1].lli = 0;
-	/* The final LLI element shall also fire an interrupt. */
-	llis_va[num_llis - 1].cctl |= PL080_CONTROL_TC_IRQ_EN;
+	last_lli = llis_va + (num_llis - 1) * pl08x->lli_words;
 
-#ifdef VERBOSE_DEBUG
-	{
-		int i;
-
-		dev_vdbg(&pl08x->adev->dev,
-			 "%-3s %-9s  %-10s %-10s %-10s %s\n",
-			 "lli", "", "csrc", "cdst", "clli", "cctl");
-		for (i = 0; i < num_llis; i++) {
-			dev_vdbg(&pl08x->adev->dev,
-				 "%3d @%p: 0x%08x 0x%08x 0x%08x 0x%08x\n",
-				 i, &llis_va[i], llis_va[i].src,
-				 llis_va[i].dst, llis_va[i].lli, llis_va[i].cctl
-				);
-		}
+	if (txd->cyclic) {
+		/* Link back to the first LLI. */
+		last_lli[PL080_LLI_LLI] = txd->llis_bus | bd.lli_bus;
+	} else {
+		/* The final LLI terminates the LLI. */
+		last_lli[PL080_LLI_LLI] = 0;
+		/* The final LLI element shall also fire an interrupt. */
+		last_lli[PL080_LLI_CCTL] |= PL080_CONTROL_TC_IRQ_EN;
 	}
-#endif
+
+	pl08x_dump_lli(pl08x, llis_va, num_llis);
 
 	return num_llis;
 }
@@ -1305,6 +1419,7 @@
 				  struct dma_slave_config *config)
 {
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
 
 	if (!plchan->slave)
 		return -EINVAL;
@@ -1314,6 +1429,13 @@
 	    config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)
 		return -EINVAL;
 
+	if (config->device_fc && pl08x->vd->pl080s) {
+		dev_err(&pl08x->adev->dev,
+			"%s: PL080S does not support peripheral flow control\n",
+			__func__);
+		return -EINVAL;
+	}
+
 	plchan->cfg = *config;
 
 	return 0;
@@ -1404,25 +1526,19 @@
 	return vchan_tx_prep(&plchan->vc, &txd->vd, flags);
 }
 
-static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
-		struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
+static struct pl08x_txd *pl08x_init_txd(
+		struct dma_chan *chan,
+		enum dma_transfer_direction direction,
+		dma_addr_t *slave_addr)
 {
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
 	struct pl08x_driver_data *pl08x = plchan->host;
 	struct pl08x_txd *txd;
-	struct pl08x_sg *dsg;
-	struct scatterlist *sg;
 	enum dma_slave_buswidth addr_width;
-	dma_addr_t slave_addr;
 	int ret, tmp;
 	u8 src_buses, dst_buses;
 	u32 maxburst, cctl;
 
-	dev_dbg(&pl08x->adev->dev, "%s prepare transaction of %d bytes from %s\n",
-			__func__, sg_dma_len(sgl), plchan->name);
-
 	txd = pl08x_get_txd(plchan);
 	if (!txd) {
 		dev_err(&pl08x->adev->dev, "%s no txd\n", __func__);
@@ -1436,14 +1552,14 @@
 	 */
 	if (direction == DMA_MEM_TO_DEV) {
 		cctl = PL080_CONTROL_SRC_INCR;
-		slave_addr = plchan->cfg.dst_addr;
+		*slave_addr = plchan->cfg.dst_addr;
 		addr_width = plchan->cfg.dst_addr_width;
 		maxburst = plchan->cfg.dst_maxburst;
 		src_buses = pl08x->mem_buses;
 		dst_buses = plchan->cd->periph_buses;
 	} else if (direction == DMA_DEV_TO_MEM) {
 		cctl = PL080_CONTROL_DST_INCR;
-		slave_addr = plchan->cfg.src_addr;
+		*slave_addr = plchan->cfg.src_addr;
 		addr_width = plchan->cfg.src_addr_width;
 		maxburst = plchan->cfg.src_maxburst;
 		src_buses = plchan->cd->periph_buses;
@@ -1492,24 +1608,107 @@
 	else
 		txd->ccfg |= plchan->signal << PL080_CONFIG_SRC_SEL_SHIFT;
 
+	return txd;
+}
+
+static int pl08x_tx_add_sg(struct pl08x_txd *txd,
+			   enum dma_transfer_direction direction,
+			   dma_addr_t slave_addr,
+			   dma_addr_t buf_addr,
+			   unsigned int len)
+{
+	struct pl08x_sg *dsg;
+
+	dsg = kzalloc(sizeof(struct pl08x_sg), GFP_NOWAIT);
+	if (!dsg)
+		return -ENOMEM;
+
+	list_add_tail(&dsg->node, &txd->dsg_list);
+
+	dsg->len = len;
+	if (direction == DMA_MEM_TO_DEV) {
+		dsg->src_addr = buf_addr;
+		dsg->dst_addr = slave_addr;
+	} else {
+		dsg->src_addr = slave_addr;
+		dsg->dst_addr = buf_addr;
+	}
+
+	return 0;
+}
+
+static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_txd *txd;
+	struct scatterlist *sg;
+	int ret, tmp;
+	dma_addr_t slave_addr;
+
+	dev_dbg(&pl08x->adev->dev, "%s prepare transaction of %d bytes from %s\n",
+			__func__, sg_dma_len(sgl), plchan->name);
+
+	txd = pl08x_init_txd(chan, direction, &slave_addr);
+	if (!txd)
+		return NULL;
+
 	for_each_sg(sgl, sg, sg_len, tmp) {
-		dsg = kzalloc(sizeof(struct pl08x_sg), GFP_NOWAIT);
-		if (!dsg) {
+		ret = pl08x_tx_add_sg(txd, direction, slave_addr,
+				      sg_dma_address(sg),
+				      sg_dma_len(sg));
+		if (ret) {
 			pl08x_release_mux(plchan);
 			pl08x_free_txd(pl08x, txd);
 			dev_err(&pl08x->adev->dev, "%s no mem for pl080 sg\n",
 					__func__);
 			return NULL;
 		}
-		list_add_tail(&dsg->node, &txd->dsg_list);
+	}
 
-		dsg->len = sg_dma_len(sg);
-		if (direction == DMA_MEM_TO_DEV) {
-			dsg->src_addr = sg_dma_address(sg);
-			dsg->dst_addr = slave_addr;
-		} else {
-			dsg->src_addr = slave_addr;
-			dsg->dst_addr = sg_dma_address(sg);
+	ret = pl08x_fill_llis_for_desc(plchan->host, txd);
+	if (!ret) {
+		pl08x_release_mux(plchan);
+		pl08x_free_txd(pl08x, txd);
+		return NULL;
+	}
+
+	return vchan_tx_prep(&plchan->vc, &txd->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *pl08x_prep_dma_cyclic(
+		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+		size_t period_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_txd *txd;
+	int ret, tmp;
+	dma_addr_t slave_addr;
+
+	dev_dbg(&pl08x->adev->dev,
+		"%s prepare cyclic transaction of %d/%d bytes %s %s\n",
+		__func__, period_len, buf_len,
+		direction == DMA_MEM_TO_DEV ? "to" : "from",
+		plchan->name);
+
+	txd = pl08x_init_txd(chan, direction, &slave_addr);
+	if (!txd)
+		return NULL;
+
+	txd->cyclic = true;
+	txd->cctl |= PL080_CONTROL_TC_IRQ_EN;
+	for (tmp = 0; tmp < buf_len; tmp += period_len) {
+		ret = pl08x_tx_add_sg(txd, direction, slave_addr,
+				      buf_addr + tmp, period_len);
+		if (ret) {
+			pl08x_release_mux(plchan);
+			pl08x_free_txd(pl08x, txd);
+			return NULL;
 		}
 	}
 
@@ -1652,7 +1851,9 @@
 
 			spin_lock(&plchan->vc.lock);
 			tx = plchan->at;
-			if (tx) {
+			if (tx && tx->cyclic) {
+				vchan_cyclic_callback(&tx->vd);
+			} else if (tx) {
 				plchan->at = NULL;
 				/*
 				 * This descriptor is done, release its mux
@@ -1846,6 +2047,7 @@
 {
 	struct pl08x_driver_data *pl08x;
 	const struct vendor_data *vd = id->data;
+	u32 tsfr_size;
 	int ret = 0;
 	int i;
 
@@ -1873,6 +2075,7 @@
 
 	/* Initialize slave engine */
 	dma_cap_set(DMA_SLAVE, pl08x->slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, pl08x->slave.cap_mask);
 	pl08x->slave.dev = &adev->dev;
 	pl08x->slave.device_alloc_chan_resources = pl08x_alloc_chan_resources;
 	pl08x->slave.device_free_chan_resources = pl08x_free_chan_resources;
@@ -1880,6 +2083,7 @@
 	pl08x->slave.device_tx_status = pl08x_dma_tx_status;
 	pl08x->slave.device_issue_pending = pl08x_issue_pending;
 	pl08x->slave.device_prep_slave_sg = pl08x_prep_slave_sg;
+	pl08x->slave.device_prep_dma_cyclic = pl08x_prep_dma_cyclic;
 	pl08x->slave.device_control = pl08x_control;
 
 	/* Get the platform data */
@@ -1902,9 +2106,15 @@
 		pl08x->mem_buses = pl08x->pd->mem_buses;
 	}
 
+	if (vd->pl080s)
+		pl08x->lli_words = PL080S_LLI_WORDS;
+	else
+		pl08x->lli_words = PL080_LLI_WORDS;
+	tsfr_size = MAX_NUM_TSFR_LLIS * pl08x->lli_words * sizeof(u32);
+
 	/* A DMA memory pool for LLIs, align on 1-byte boundary */
 	pl08x->pool = dma_pool_create(DRIVER_NAME, &pl08x->adev->dev,
-			PL08X_LLI_TSFR_SIZE, PL08X_ALIGN, 0);
+						tsfr_size, PL08X_ALIGN, 0);
 	if (!pl08x->pool) {
 		ret = -ENOMEM;
 		goto out_no_lli_pool;
@@ -1947,6 +2157,7 @@
 
 		ch->id = i;
 		ch->base = pl08x->base + PL080_Cx_BASE(i);
+		ch->reg_config = ch->base + vd->config_offset;
 		spin_lock_init(&ch->lock);
 
 		/*
@@ -1957,7 +2168,7 @@
 		if (vd->nomadik) {
 			u32 val;
 
-			val = readl(ch->base + PL080_CH_CONFIG);
+			val = readl(ch->reg_config);
 			if (val & (PL080N_CONFIG_ITPROT | PL080N_CONFIG_SECPROT)) {
 				dev_info(&adev->dev, "physical channel %d reserved for secure access only\n", i);
 				ch->locked = true;
@@ -2008,8 +2219,8 @@
 
 	amba_set_drvdata(adev, pl08x);
 	init_pl08x_debugfs(pl08x);
-	dev_info(&pl08x->adev->dev, "DMA: PL%03x rev%u at 0x%08llx irq %d\n",
-		 amba_part(adev), amba_rev(adev),
+	dev_info(&pl08x->adev->dev, "DMA: PL%03x%s rev%u at 0x%08llx irq %d\n",
+		 amba_part(adev), pl08x->vd->pl080s ? "s" : "", amba_rev(adev),
 		 (unsigned long long)adev->res.start, adev->irq[0]);
 
 	return 0;
@@ -2038,22 +2249,41 @@
 
 /* PL080 has 8 channels and the PL080 have just 2 */
 static struct vendor_data vendor_pl080 = {
+	.config_offset = PL080_CH_CONFIG,
 	.channels = 8,
 	.dualmaster = true,
+	.max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
 };
 
 static struct vendor_data vendor_nomadik = {
+	.config_offset = PL080_CH_CONFIG,
 	.channels = 8,
 	.dualmaster = true,
 	.nomadik = true,
+	.max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
+};
+
+static struct vendor_data vendor_pl080s = {
+	.config_offset = PL080S_CH_CONFIG,
+	.channels = 8,
+	.pl080s = true,
+	.max_transfer_size = PL080S_CONTROL_TRANSFER_SIZE_MASK,
 };
 
 static struct vendor_data vendor_pl081 = {
+	.config_offset = PL080_CH_CONFIG,
 	.channels = 2,
 	.dualmaster = false,
+	.max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
 };
 
 static struct amba_id pl08x_ids[] = {
+	/* Samsung PL080S variant */
+	{
+		.id	= 0x0a141080,
+		.mask	= 0xffffffff,
+		.data	= &vendor_pl080s,
+	},
 	/* PL080 */
 	{
 		.id	= 0x00041080,
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 99af4db..9162ac8 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -382,20 +382,30 @@
 EXPORT_SYMBOL(dma_issue_pending_all);
 
 /**
- * nth_chan - returns the nth channel of the given capability
- * @cap: capability to match
- * @n: nth channel desired
- *
- * Defaults to returning the channel with the desired capability and the
- * lowest reference count when 'n' cannot be satisfied.  Must be called
- * under dma_list_mutex.
+ * dma_chan_is_local - returns true if the channel is in the same numa-node as the cpu
  */
-static struct dma_chan *nth_chan(enum dma_transaction_type cap, int n)
+static bool dma_chan_is_local(struct dma_chan *chan, int cpu)
+{
+	int node = dev_to_node(chan->device->dev);
+	return node == -1 || cpumask_test_cpu(cpu, cpumask_of_node(node));
+}
+
+/**
+ * min_chan - returns the channel with min count and in the same numa-node as the cpu
+ * @cap: capability to match
+ * @cpu: cpu index which the channel should be close to
+ *
+ * If some channels are close to the given cpu, the one with the lowest
+ * reference count is returned. Otherwise, cpu is ignored and only the
+ * reference count is taken into account.
+ * Must be called under dma_list_mutex.
+ */
+static struct dma_chan *min_chan(enum dma_transaction_type cap, int cpu)
 {
 	struct dma_device *device;
 	struct dma_chan *chan;
-	struct dma_chan *ret = NULL;
 	struct dma_chan *min = NULL;
+	struct dma_chan *localmin = NULL;
 
 	list_for_each_entry(device, &dma_device_list, global_node) {
 		if (!dma_has_cap(cap, device->cap_mask) ||
@@ -404,27 +414,22 @@
 		list_for_each_entry(chan, &device->channels, device_node) {
 			if (!chan->client_count)
 				continue;
-			if (!min)
-				min = chan;
-			else if (chan->table_count < min->table_count)
+			if (!min || chan->table_count < min->table_count)
 				min = chan;
 
-			if (n-- == 0) {
-				ret = chan;
-				break; /* done */
-			}
+			if (dma_chan_is_local(chan, cpu))
+				if (!localmin ||
+				    chan->table_count < localmin->table_count)
+					localmin = chan;
 		}
-		if (ret)
-			break; /* done */
 	}
 
-	if (!ret)
-		ret = min;
+	chan = localmin ? localmin : min;
 
-	if (ret)
-		ret->table_count++;
+	if (chan)
+		chan->table_count++;
 
-	return ret;
+	return chan;
 }
 
 /**
@@ -441,7 +446,6 @@
 	struct dma_device *device;
 	int cpu;
 	int cap;
-	int n;
 
 	/* undo the last distribution */
 	for_each_dma_cap_mask(cap, dma_cap_mask_all)
@@ -460,14 +464,9 @@
 		return;
 
 	/* redistribute available channels */
-	n = 0;
 	for_each_dma_cap_mask(cap, dma_cap_mask_all)
 		for_each_online_cpu(cpu) {
-			if (num_possible_cpus() > 1)
-				chan = nth_chan(cap, n++);
-			else
-				chan = nth_chan(cap, -1);
-
+			chan = min_chan(cap, cpu);
 			per_cpu_ptr(channel_table[cap], cpu)->chan = chan;
 		}
 }
@@ -510,7 +509,33 @@
 }
 
 /**
- * dma_request_channel - try to allocate an exclusive channel
+ * dma_request_slave_channel - try to get specific channel exclusively
+ * @chan: target channel
+ */
+struct dma_chan *dma_get_slave_channel(struct dma_chan *chan)
+{
+	int err = -EBUSY;
+
+	/* lock against __dma_request_channel */
+	mutex_lock(&dma_list_mutex);
+
+	if (chan->client_count == 0) {
+		err = dma_chan_get(chan);
+		if (err)
+			pr_debug("%s: failed to get %s: (%d)\n",
+				__func__, dma_chan_name(chan), err);
+	} else
+		chan = NULL;
+
+	mutex_unlock(&dma_list_mutex);
+
+
+	return chan;
+}
+EXPORT_SYMBOL_GPL(dma_get_slave_channel);
+
+/**
+ * __dma_request_channel - try to allocate an exclusive channel
  * @mask: capabilities that the channel must satisfy
  * @fn: optional callback to disposition available channels
  * @fn_param: opaque parameter to pass to dma_filter_fn
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index e88ded2..92f796c 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -25,44 +25,46 @@
 #include <linux/seq_file.h>
 
 static unsigned int test_buf_size = 16384;
-module_param(test_buf_size, uint, S_IRUGO);
+module_param(test_buf_size, uint, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(test_buf_size, "Size of the memcpy test buffer");
 
 static char test_channel[20];
-module_param_string(channel, test_channel, sizeof(test_channel), S_IRUGO);
+module_param_string(channel, test_channel, sizeof(test_channel),
+		S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(channel, "Bus ID of the channel to test (default: any)");
 
 static char test_device[20];
-module_param_string(device, test_device, sizeof(test_device), S_IRUGO);
+module_param_string(device, test_device, sizeof(test_device),
+		S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(device, "Bus ID of the DMA Engine to test (default: any)");
 
 static unsigned int threads_per_chan = 1;
-module_param(threads_per_chan, uint, S_IRUGO);
+module_param(threads_per_chan, uint, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(threads_per_chan,
 		"Number of threads to start per channel (default: 1)");
 
 static unsigned int max_channels;
-module_param(max_channels, uint, S_IRUGO);
+module_param(max_channels, uint, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(max_channels,
 		"Maximum number of channels to use (default: all)");
 
 static unsigned int iterations;
-module_param(iterations, uint, S_IRUGO);
+module_param(iterations, uint, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(iterations,
 		"Iterations before stopping test (default: infinite)");
 
 static unsigned int xor_sources = 3;
-module_param(xor_sources, uint, S_IRUGO);
+module_param(xor_sources, uint, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(xor_sources,
 		"Number of xor source buffers (default: 3)");
 
 static unsigned int pq_sources = 3;
-module_param(pq_sources, uint, S_IRUGO);
+module_param(pq_sources, uint, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(pq_sources,
 		"Number of p+q source buffers (default: 3)");
 
 static int timeout = 3000;
-module_param(timeout, uint, S_IRUGO);
+module_param(timeout, uint, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(timeout, "Transfer Timeout in msec (default: 3000), "
 		 "Pass -1 for infinite timeout");
 
@@ -193,7 +195,6 @@
 
 	/* debugfs related stuff */
 	struct dentry		*root;
-	struct dmatest_params	dbgfs_params;
 
 	/* Test results */
 	struct list_head	results;
@@ -406,7 +407,11 @@
 	list_add_tail(&tr->node, &r->results);
 	mutex_unlock(&info->results_lock);
 
-	pr_warn("%s\n", thread_result_get(r->name, tr));
+	if (tr->type == DMATEST_ET_OK)
+		pr_debug("%s\n", thread_result_get(r->name, tr));
+	else
+		pr_warn("%s\n", thread_result_get(r->name, tr));
+
 	return 0;
 }
 
@@ -1007,7 +1012,15 @@
 	result_free(info, NULL);
 
 	/* Copy test parameters */
-	memcpy(params, &info->dbgfs_params, sizeof(*params));
+	params->buf_size = test_buf_size;
+	strlcpy(params->channel, strim(test_channel), sizeof(params->channel));
+	strlcpy(params->device, strim(test_device), sizeof(params->device));
+	params->threads_per_chan = threads_per_chan;
+	params->max_channels = max_channels;
+	params->iterations = iterations;
+	params->xor_sources = xor_sources;
+	params->pq_sources = pq_sources;
+	params->timeout = timeout;
 
 	/* Run test with new parameters */
 	return __run_threaded_test(info);
@@ -1029,71 +1042,6 @@
 	return false;
 }
 
-static ssize_t dtf_write_string(void *to, size_t available, loff_t *ppos,
-		const void __user *from, size_t count)
-{
-	char tmp[20];
-	ssize_t len;
-
-	len = simple_write_to_buffer(tmp, sizeof(tmp) - 1, ppos, from, count);
-	if (len >= 0) {
-		tmp[len] = '\0';
-		strlcpy(to, strim(tmp), available);
-	}
-
-	return len;
-}
-
-static ssize_t dtf_read_channel(struct file *file, char __user *buf,
-		size_t count, loff_t *ppos)
-{
-	struct dmatest_info *info = file->private_data;
-	return simple_read_from_buffer(buf, count, ppos,
-			info->dbgfs_params.channel,
-			strlen(info->dbgfs_params.channel));
-}
-
-static ssize_t dtf_write_channel(struct file *file, const char __user *buf,
-		size_t size, loff_t *ppos)
-{
-	struct dmatest_info *info = file->private_data;
-	return dtf_write_string(info->dbgfs_params.channel,
-				sizeof(info->dbgfs_params.channel),
-				ppos, buf, size);
-}
-
-static const struct file_operations dtf_channel_fops = {
-	.read	= dtf_read_channel,
-	.write	= dtf_write_channel,
-	.open	= simple_open,
-	.llseek	= default_llseek,
-};
-
-static ssize_t dtf_read_device(struct file *file, char __user *buf,
-		size_t count, loff_t *ppos)
-{
-	struct dmatest_info *info = file->private_data;
-	return simple_read_from_buffer(buf, count, ppos,
-			info->dbgfs_params.device,
-			strlen(info->dbgfs_params.device));
-}
-
-static ssize_t dtf_write_device(struct file *file, const char __user *buf,
-		size_t size, loff_t *ppos)
-{
-	struct dmatest_info *info = file->private_data;
-	return dtf_write_string(info->dbgfs_params.device,
-				sizeof(info->dbgfs_params.device),
-				ppos, buf, size);
-}
-
-static const struct file_operations dtf_device_fops = {
-	.read	= dtf_read_device,
-	.write	= dtf_write_device,
-	.open	= simple_open,
-	.llseek	= default_llseek,
-};
-
 static ssize_t dtf_read_run(struct file *file, char __user *user_buf,
 		size_t count, loff_t *ppos)
 {
@@ -1187,8 +1135,6 @@
 static int dmatest_register_dbgfs(struct dmatest_info *info)
 {
 	struct dentry *d;
-	struct dmatest_params *params = &info->dbgfs_params;
-	int ret = -ENOMEM;
 
 	d = debugfs_create_dir("dmatest", NULL);
 	if (IS_ERR(d))
@@ -1198,81 +1144,24 @@
 
 	info->root = d;
 
-	/* Copy initial values */
-	memcpy(params, &info->params, sizeof(*params));
-
-	/* Test parameters */
-
-	d = debugfs_create_u32("test_buf_size", S_IWUSR | S_IRUGO, info->root,
-			       (u32 *)&params->buf_size);
-	if (IS_ERR_OR_NULL(d))
-		goto err_node;
-
-	d = debugfs_create_file("channel", S_IRUGO | S_IWUSR, info->root,
-				info, &dtf_channel_fops);
-	if (IS_ERR_OR_NULL(d))
-		goto err_node;
-
-	d = debugfs_create_file("device", S_IRUGO | S_IWUSR, info->root,
-				info, &dtf_device_fops);
-	if (IS_ERR_OR_NULL(d))
-		goto err_node;
-
-	d = debugfs_create_u32("threads_per_chan", S_IWUSR | S_IRUGO, info->root,
-			       (u32 *)&params->threads_per_chan);
-	if (IS_ERR_OR_NULL(d))
-		goto err_node;
-
-	d = debugfs_create_u32("max_channels", S_IWUSR | S_IRUGO, info->root,
-			       (u32 *)&params->max_channels);
-	if (IS_ERR_OR_NULL(d))
-		goto err_node;
-
-	d = debugfs_create_u32("iterations", S_IWUSR | S_IRUGO, info->root,
-			       (u32 *)&params->iterations);
-	if (IS_ERR_OR_NULL(d))
-		goto err_node;
-
-	d = debugfs_create_u32("xor_sources", S_IWUSR | S_IRUGO, info->root,
-			       (u32 *)&params->xor_sources);
-	if (IS_ERR_OR_NULL(d))
-		goto err_node;
-
-	d = debugfs_create_u32("pq_sources", S_IWUSR | S_IRUGO, info->root,
-			       (u32 *)&params->pq_sources);
-	if (IS_ERR_OR_NULL(d))
-		goto err_node;
-
-	d = debugfs_create_u32("timeout", S_IWUSR | S_IRUGO, info->root,
-			       (u32 *)&params->timeout);
-	if (IS_ERR_OR_NULL(d))
-		goto err_node;
-
 	/* Run or stop threaded test */
-	d = debugfs_create_file("run", S_IWUSR | S_IRUGO, info->root,
-				info, &dtf_run_fops);
-	if (IS_ERR_OR_NULL(d))
-		goto err_node;
+	debugfs_create_file("run", S_IWUSR | S_IRUGO, info->root, info,
+			    &dtf_run_fops);
 
 	/* Results of test in progress */
-	d = debugfs_create_file("results", S_IRUGO, info->root, info,
-				&dtf_results_fops);
-	if (IS_ERR_OR_NULL(d))
-		goto err_node;
+	debugfs_create_file("results", S_IRUGO, info->root, info,
+			    &dtf_results_fops);
 
 	return 0;
 
-err_node:
-	debugfs_remove_recursive(info->root);
 err_root:
 	pr_err("dmatest: Failed to initialize debugfs\n");
-	return ret;
+	return -ENOMEM;
 }
 
 static int __init dmatest_init(void)
 {
 	struct dmatest_info *info = &test_info;
-	struct dmatest_params *params = &info->params;
 	int ret;
 
 	memset(info, 0, sizeof(*info));
@@ -1283,17 +1172,6 @@
 	mutex_init(&info->results_lock);
 	INIT_LIST_HEAD(&info->results);
 
-	/* Set default parameters */
-	params->buf_size = test_buf_size;
-	strlcpy(params->channel, test_channel, sizeof(params->channel));
-	strlcpy(params->device, test_device, sizeof(params->device));
-	params->threads_per_chan = threads_per_chan;
-	params->max_channels = max_channels;
-	params->iterations = iterations;
-	params->xor_sources = xor_sources;
-	params->pq_sources = pq_sources;
-	params->timeout = timeout;
-
 	ret = dmatest_register_dbgfs(info);
 	if (ret)
 		return ret;
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index eea479c..89eb89f 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -37,16 +37,22 @@
  * which does not support descriptor writeback.
  */
 
+static inline bool is_request_line_unset(struct dw_dma_chan *dwc)
+{
+	return dwc->request_line == (typeof(dwc->request_line))~0;
+}
+
 static inline void dwc_set_masters(struct dw_dma_chan *dwc)
 {
 	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
 	struct dw_dma_slave *dws = dwc->chan.private;
 	unsigned char mmax = dw->nr_masters - 1;
 
-	if (dwc->request_line == ~0) {
-		dwc->src_master = min_t(unsigned char, mmax, dwc_get_sms(dws));
-		dwc->dst_master = min_t(unsigned char, mmax, dwc_get_dms(dws));
-	}
+	if (!is_request_line_unset(dwc))
+		return;
+
+	dwc->src_master = min_t(unsigned char, mmax, dwc_get_sms(dws));
+	dwc->dst_master = min_t(unsigned char, mmax, dwc_get_dms(dws));
 }
 
 #define DWC_DEFAULT_CTLLO(_chan) ({				\
@@ -644,10 +650,13 @@
 static irqreturn_t dw_dma_interrupt(int irq, void *dev_id)
 {
 	struct dw_dma *dw = dev_id;
-	u32 status;
+	u32 status = dma_readl(dw, STATUS_INT);
 
-	dev_vdbg(dw->dma.dev, "%s: status=0x%x\n", __func__,
-			dma_readl(dw, STATUS_INT));
+	dev_vdbg(dw->dma.dev, "%s: status=0x%x\n", __func__, status);
+
+	/* Check if we have any interrupt from the DMAC */
+	if (!status)
+		return IRQ_NONE;
 
 	/*
 	 * Just disable the interrupts. We'll turn them back on in the
@@ -984,7 +993,7 @@
 	dwc->direction = sconfig->direction;
 
 	/* Take the request line from slave_id member */
-	if (dwc->request_line == ~0)
+	if (is_request_line_unset(dwc))
 		dwc->request_line = sconfig->slave_id;
 
 	convert_burst(&dwc->dma_sconfig.src_maxburst);
@@ -1089,16 +1098,16 @@
 	enum dma_status		ret;
 
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret != DMA_SUCCESS) {
-		dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
+	if (ret == DMA_SUCCESS)
+		return ret;
 
-		ret = dma_cookie_status(chan, cookie, txstate);
-	}
+	dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
 
+	ret = dma_cookie_status(chan, cookie, txstate);
 	if (ret != DMA_SUCCESS)
 		dma_set_residue(txstate, dwc_get_residue(dwc));
 
-	if (dwc->paused)
+	if (dwc->paused && ret == DMA_IN_PROGRESS)
 		return DMA_PAUSED;
 
 	return ret;
@@ -1560,8 +1569,8 @@
 	/* Disable BLOCK interrupts as well */
 	channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
 
-	err = devm_request_irq(chip->dev, chip->irq, dw_dma_interrupt, 0,
-			       "dw_dmac", dw);
+	err = devm_request_irq(chip->dev, chip->irq, dw_dma_interrupt,
+			       IRQF_SHARED, "dw_dmac", dw);
 	if (err)
 		return err;
 
diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c
index 6c9449c..e35d975 100644
--- a/drivers/dma/dw/platform.c
+++ b/drivers/dma/dw/platform.c
@@ -253,6 +253,7 @@
 	{ "INTL9C60", 0 },
 	{ }
 };
+MODULE_DEVICE_TABLE(acpi, dw_dma_acpi_id_table);
 #endif
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c
index 5f3e532..ff50ff4 100644
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -56,6 +56,7 @@
 	struct list_head		node;
 	int				absync;
 	int				pset_nr;
+	int				processed;
 	struct edmacc_param		pset[0];
 };
 
@@ -69,6 +70,7 @@
 	int				ch_num;
 	bool				alloced;
 	int				slot[EDMA_MAX_SLOTS];
+	int				missed;
 	struct dma_slave_config		cfg;
 };
 
@@ -104,22 +106,34 @@
 /* Dispatch a queued descriptor to the controller (caller holds lock) */
 static void edma_execute(struct edma_chan *echan)
 {
-	struct virt_dma_desc *vdesc = vchan_next_desc(&echan->vchan);
+	struct virt_dma_desc *vdesc;
 	struct edma_desc *edesc;
-	int i;
+	struct device *dev = echan->vchan.chan.device->dev;
+	int i, j, left, nslots;
 
-	if (!vdesc) {
-		echan->edesc = NULL;
-		return;
+	/* If either we processed all psets or we're still not started */
+	if (!echan->edesc ||
+	    echan->edesc->pset_nr == echan->edesc->processed) {
+		/* Get next vdesc */
+		vdesc = vchan_next_desc(&echan->vchan);
+		if (!vdesc) {
+			echan->edesc = NULL;
+			return;
+		}
+		list_del(&vdesc->node);
+		echan->edesc = to_edma_desc(&vdesc->tx);
 	}
 
-	list_del(&vdesc->node);
+	edesc = echan->edesc;
 
-	echan->edesc = edesc = to_edma_desc(&vdesc->tx);
+	/* Find out how many left */
+	left = edesc->pset_nr - edesc->processed;
+	nslots = min(MAX_NR_SG, left);
 
 	/* Write descriptor PaRAM set(s) */
-	for (i = 0; i < edesc->pset_nr; i++) {
-		edma_write_slot(echan->slot[i], &edesc->pset[i]);
+	for (i = 0; i < nslots; i++) {
+		j = i + edesc->processed;
+		edma_write_slot(echan->slot[i], &edesc->pset[j]);
 		dev_dbg(echan->vchan.chan.device->dev,
 			"\n pset[%d]:\n"
 			"  chnum\t%d\n"
@@ -132,24 +146,50 @@
 			"  bidx\t%08x\n"
 			"  cidx\t%08x\n"
 			"  lkrld\t%08x\n",
-			i, echan->ch_num, echan->slot[i],
-			edesc->pset[i].opt,
-			edesc->pset[i].src,
-			edesc->pset[i].dst,
-			edesc->pset[i].a_b_cnt,
-			edesc->pset[i].ccnt,
-			edesc->pset[i].src_dst_bidx,
-			edesc->pset[i].src_dst_cidx,
-			edesc->pset[i].link_bcntrld);
+			j, echan->ch_num, echan->slot[i],
+			edesc->pset[j].opt,
+			edesc->pset[j].src,
+			edesc->pset[j].dst,
+			edesc->pset[j].a_b_cnt,
+			edesc->pset[j].ccnt,
+			edesc->pset[j].src_dst_bidx,
+			edesc->pset[j].src_dst_cidx,
+			edesc->pset[j].link_bcntrld);
 		/* Link to the previous slot if not the last set */
-		if (i != (edesc->pset_nr - 1))
+		if (i != (nslots - 1))
 			edma_link(echan->slot[i], echan->slot[i+1]);
-		/* Final pset links to the dummy pset */
-		else
-			edma_link(echan->slot[i], echan->ecc->dummy_slot);
 	}
 
-	edma_start(echan->ch_num);
+	edesc->processed += nslots;
+
+	/*
+	 * If this is either the last set in a set of SG-list transactions
+	 * then setup a link to the dummy slot, this results in all future
+	 * events being absorbed and that's OK because we're done
+	 */
+	if (edesc->processed == edesc->pset_nr)
+		edma_link(echan->slot[nslots-1], echan->ecc->dummy_slot);
+
+	edma_resume(echan->ch_num);
+
+	if (edesc->processed <= MAX_NR_SG) {
+		dev_dbg(dev, "first transfer starting %d\n", echan->ch_num);
+		edma_start(echan->ch_num);
+	}
+
+	/*
+	 * This happens due to setup times between intermediate transfers
+	 * in long SG lists which have to be broken up into transfers of
+	 * MAX_NR_SG
+	 */
+	if (echan->missed) {
+		dev_dbg(dev, "missed event in execute detected\n");
+		edma_clean_channel(echan->ch_num);
+		edma_stop(echan->ch_num);
+		edma_start(echan->ch_num);
+		edma_trigger_channel(echan->ch_num);
+		echan->missed = 0;
+	}
 }
 
 static int edma_terminate_all(struct edma_chan *echan)
@@ -222,9 +262,9 @@
 	enum dma_slave_buswidth dev_width;
 	u32 burst;
 	struct scatterlist *sg;
-	int i;
 	int acnt, bcnt, ccnt, src, dst, cidx;
 	int src_bidx, dst_bidx, src_cidx, dst_cidx;
+	int i, nslots;
 
 	if (unlikely(!echan || !sgl || !sg_len))
 		return NULL;
@@ -247,12 +287,6 @@
 		return NULL;
 	}
 
-	if (sg_len > MAX_NR_SG) {
-		dev_err(dev, "Exceeded max SG segments %d > %d\n",
-			sg_len, MAX_NR_SG);
-		return NULL;
-	}
-
 	edesc = kzalloc(sizeof(*edesc) + sg_len *
 		sizeof(edesc->pset[0]), GFP_ATOMIC);
 	if (!edesc) {
@@ -262,8 +296,10 @@
 
 	edesc->pset_nr = sg_len;
 
-	for_each_sg(sgl, sg, sg_len, i) {
-		/* Allocate a PaRAM slot, if needed */
+	/* Allocate a PaRAM slot, if needed */
+	nslots = min_t(unsigned, MAX_NR_SG, sg_len);
+
+	for (i = 0; i < nslots; i++) {
 		if (echan->slot[i] < 0) {
 			echan->slot[i] =
 				edma_alloc_slot(EDMA_CTLR(echan->ch_num),
@@ -273,6 +309,10 @@
 				return NULL;
 			}
 		}
+	}
+
+	/* Configure PaRAM sets for each SG */
+	for_each_sg(sgl, sg, sg_len, i) {
 
 		acnt = dev_width;
 
@@ -330,6 +370,12 @@
 		/* Configure A or AB synchronized transfers */
 		if (edesc->absync)
 			edesc->pset[i].opt |= SYNCDIM;
+
+		/* If this is the last in a current SG set of transactions,
+		   enable interrupts so that next set is processed */
+		if (!((i+1) % MAX_NR_SG))
+			edesc->pset[i].opt |= TCINTEN;
+
 		/* If this is the last set, enable completion interrupt flag */
 		if (i == sg_len - 1)
 			edesc->pset[i].opt |= TCINTEN;
@@ -355,27 +401,65 @@
 	struct device *dev = echan->vchan.chan.device->dev;
 	struct edma_desc *edesc;
 	unsigned long flags;
+	struct edmacc_param p;
 
-	/* Stop the channel */
-	edma_stop(echan->ch_num);
+	/* Pause the channel */
+	edma_pause(echan->ch_num);
 
 	switch (ch_status) {
 	case DMA_COMPLETE:
-		dev_dbg(dev, "transfer complete on channel %d\n", ch_num);
-
 		spin_lock_irqsave(&echan->vchan.lock, flags);
 
 		edesc = echan->edesc;
 		if (edesc) {
+			if (edesc->processed == edesc->pset_nr) {
+				dev_dbg(dev, "Transfer complete, stopping channel %d\n", ch_num);
+				edma_stop(echan->ch_num);
+				vchan_cookie_complete(&edesc->vdesc);
+			} else {
+				dev_dbg(dev, "Intermediate transfer complete on channel %d\n", ch_num);
+			}
+
 			edma_execute(echan);
-			vchan_cookie_complete(&edesc->vdesc);
 		}
 
 		spin_unlock_irqrestore(&echan->vchan.lock, flags);
 
 		break;
 	case DMA_CC_ERROR:
-		dev_dbg(dev, "transfer error on channel %d\n", ch_num);
+		spin_lock_irqsave(&echan->vchan.lock, flags);
+
+		edma_read_slot(EDMA_CHAN_SLOT(echan->slot[0]), &p);
+
+		/*
+		 * Issue later based on missed flag which will be sure
+		 * to happen as:
+		 * (1) we finished transmitting an intermediate slot and
+		 *     edma_execute is coming up.
+		 * (2) or we finished current transfer and issue will
+		 *     call edma_execute.
+		 *
+		 * Important note: issuing can be dangerous here and
+		 * lead to some nasty recursion when we are in a NULL
+		 * slot. So we avoid doing so and set the missed flag.
+		 */
+		if (p.a_b_cnt == 0 && p.ccnt == 0) {
+			dev_dbg(dev, "Error occurred, looks like slot is null, just setting miss\n");
+			echan->missed = 1;
+		} else {
+			/*
+			 * The slot is already programmed but the event got
+			 * missed, so its safe to issue it here.
+			 */
+			dev_dbg(dev, "Error occurred but slot is non-null, TRIGGERING\n");
+			edma_clean_channel(echan->ch_num);
+			edma_stop(echan->ch_num);
+			edma_start(echan->ch_num);
+			edma_trigger_channel(echan->ch_num);
+		}
+
+		spin_unlock_irqrestore(&echan->vchan.lock, flags);
+
 		break;
 	default:
 		break;
@@ -502,8 +586,6 @@
 	} else if (echan->edesc && echan->edesc->vdesc.tx.cookie == cookie) {
 		struct edma_desc *edesc = echan->edesc;
 		txstate->residue = edma_desc_size(edesc);
-	} else {
-		txstate->residue = 0;
 	}
 	spin_unlock_irqrestore(&echan->vchan.lock, flags);
 
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index f2bf8c0..591cd8c 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -1313,15 +1313,7 @@
 					    dma_cookie_t cookie,
 					    struct dma_tx_state *state)
 {
-	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
-	enum dma_status ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&edmac->lock, flags);
-	ret = dma_cookie_status(chan, cookie, state);
-	spin_unlock_irqrestore(&edmac->lock, flags);
-
-	return ret;
+	return dma_cookie_status(chan, cookie, state);
 }
 
 /**
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 49e8fbd..b3f3e90 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -979,15 +979,7 @@
 					dma_cookie_t cookie,
 					struct dma_tx_state *txstate)
 {
-	struct fsldma_chan *chan = to_fsl_chan(dchan);
-	enum dma_status ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&chan->desc_lock, flags);
-	ret = dma_cookie_status(dchan, cookie, txstate);
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
-
-	return ret;
+	return dma_cookie_status(dchan, cookie, txstate);
 }
 
 /*----------------------------------------------------------------------------*/
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
index ff2aab9..78f8ca5 100644
--- a/drivers/dma/imx-dma.c
+++ b/drivers/dma/imx-dma.c
@@ -805,10 +805,8 @@
 	}
 	INIT_LIST_HEAD(&imxdmac->ld_free);
 
-	if (imxdmac->sg_list) {
-		kfree(imxdmac->sg_list);
-		imxdmac->sg_list = NULL;
-	}
+	kfree(imxdmac->sg_list);
+	imxdmac->sg_list = NULL;
 }
 
 static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 1e44b8c..fc43603 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -243,7 +243,6 @@
  * @event_id1		for channels that use 2 events
  * @word_size		peripheral access size
  * @buf_tail		ID of the buffer that was processed
- * @done		channel completion
  * @num_bd		max NUM_BD. number of descriptors currently handling
  */
 struct sdma_channel {
@@ -255,7 +254,6 @@
 	unsigned int			event_id1;
 	enum dma_slave_buswidth		word_size;
 	unsigned int			buf_tail;
-	struct completion		done;
 	unsigned int			num_bd;
 	struct sdma_buffer_descriptor	*bd;
 	dma_addr_t			bd_phys;
@@ -307,9 +305,10 @@
 	u32	ram_code_size;
 };
 
-enum sdma_devtype {
-	IMX31_SDMA,	/* runs on i.mx31 */
-	IMX35_SDMA,	/* runs on i.mx35 and later */
+struct sdma_driver_data {
+	int chnenbl0;
+	int num_events;
+	struct sdma_script_start_addrs	*script_addrs;
 };
 
 struct sdma_engine {
@@ -318,8 +317,6 @@
 	struct sdma_channel		channel[MAX_DMA_CHANNELS];
 	struct sdma_channel_control	*channel_control;
 	void __iomem			*regs;
-	enum sdma_devtype		devtype;
-	unsigned int			num_events;
 	struct sdma_context_data	*context;
 	dma_addr_t			context_phys;
 	struct dma_device		dma_device;
@@ -327,15 +324,118 @@
 	struct clk			*clk_ahb;
 	spinlock_t			channel_0_lock;
 	struct sdma_script_start_addrs	*script_addrs;
+	const struct sdma_driver_data	*drvdata;
+};
+
+static struct sdma_driver_data sdma_imx31 = {
+	.chnenbl0 = SDMA_CHNENBL0_IMX31,
+	.num_events = 32,
+};
+
+static struct sdma_script_start_addrs sdma_script_imx25 = {
+	.ap_2_ap_addr = 729,
+	.uart_2_mcu_addr = 904,
+	.per_2_app_addr = 1255,
+	.mcu_2_app_addr = 834,
+	.uartsh_2_mcu_addr = 1120,
+	.per_2_shp_addr = 1329,
+	.mcu_2_shp_addr = 1048,
+	.ata_2_mcu_addr = 1560,
+	.mcu_2_ata_addr = 1479,
+	.app_2_per_addr = 1189,
+	.app_2_mcu_addr = 770,
+	.shp_2_per_addr = 1407,
+	.shp_2_mcu_addr = 979,
+};
+
+static struct sdma_driver_data sdma_imx25 = {
+	.chnenbl0 = SDMA_CHNENBL0_IMX35,
+	.num_events = 48,
+	.script_addrs = &sdma_script_imx25,
+};
+
+static struct sdma_driver_data sdma_imx35 = {
+	.chnenbl0 = SDMA_CHNENBL0_IMX35,
+	.num_events = 48,
+};
+
+static struct sdma_script_start_addrs sdma_script_imx51 = {
+	.ap_2_ap_addr = 642,
+	.uart_2_mcu_addr = 817,
+	.mcu_2_app_addr = 747,
+	.mcu_2_shp_addr = 961,
+	.ata_2_mcu_addr = 1473,
+	.mcu_2_ata_addr = 1392,
+	.app_2_per_addr = 1033,
+	.app_2_mcu_addr = 683,
+	.shp_2_per_addr = 1251,
+	.shp_2_mcu_addr = 892,
+};
+
+static struct sdma_driver_data sdma_imx51 = {
+	.chnenbl0 = SDMA_CHNENBL0_IMX35,
+	.num_events = 48,
+	.script_addrs = &sdma_script_imx51,
+};
+
+static struct sdma_script_start_addrs sdma_script_imx53 = {
+	.ap_2_ap_addr = 642,
+	.app_2_mcu_addr = 683,
+	.mcu_2_app_addr = 747,
+	.uart_2_mcu_addr = 817,
+	.shp_2_mcu_addr = 891,
+	.mcu_2_shp_addr = 960,
+	.uartsh_2_mcu_addr = 1032,
+	.spdif_2_mcu_addr = 1100,
+	.mcu_2_spdif_addr = 1134,
+	.firi_2_mcu_addr = 1193,
+	.mcu_2_firi_addr = 1290,
+};
+
+static struct sdma_driver_data sdma_imx53 = {
+	.chnenbl0 = SDMA_CHNENBL0_IMX35,
+	.num_events = 48,
+	.script_addrs = &sdma_script_imx53,
+};
+
+static struct sdma_script_start_addrs sdma_script_imx6q = {
+	.ap_2_ap_addr = 642,
+	.uart_2_mcu_addr = 817,
+	.mcu_2_app_addr = 747,
+	.per_2_per_addr = 6331,
+	.uartsh_2_mcu_addr = 1032,
+	.mcu_2_shp_addr = 960,
+	.app_2_mcu_addr = 683,
+	.shp_2_mcu_addr = 891,
+	.spdif_2_mcu_addr = 1100,
+	.mcu_2_spdif_addr = 1134,
+};
+
+static struct sdma_driver_data sdma_imx6q = {
+	.chnenbl0 = SDMA_CHNENBL0_IMX35,
+	.num_events = 48,
+	.script_addrs = &sdma_script_imx6q,
 };
 
 static struct platform_device_id sdma_devtypes[] = {
 	{
+		.name = "imx25-sdma",
+		.driver_data = (unsigned long)&sdma_imx25,
+	}, {
 		.name = "imx31-sdma",
-		.driver_data = IMX31_SDMA,
+		.driver_data = (unsigned long)&sdma_imx31,
 	}, {
 		.name = "imx35-sdma",
-		.driver_data = IMX35_SDMA,
+		.driver_data = (unsigned long)&sdma_imx35,
+	}, {
+		.name = "imx51-sdma",
+		.driver_data = (unsigned long)&sdma_imx51,
+	}, {
+		.name = "imx53-sdma",
+		.driver_data = (unsigned long)&sdma_imx53,
+	}, {
+		.name = "imx6q-sdma",
+		.driver_data = (unsigned long)&sdma_imx6q,
 	}, {
 		/* sentinel */
 	}
@@ -343,8 +443,11 @@
 MODULE_DEVICE_TABLE(platform, sdma_devtypes);
 
 static const struct of_device_id sdma_dt_ids[] = {
-	{ .compatible = "fsl,imx31-sdma", .data = &sdma_devtypes[IMX31_SDMA], },
-	{ .compatible = "fsl,imx35-sdma", .data = &sdma_devtypes[IMX35_SDMA], },
+	{ .compatible = "fsl,imx6q-sdma", .data = &sdma_imx6q, },
+	{ .compatible = "fsl,imx53-sdma", .data = &sdma_imx53, },
+	{ .compatible = "fsl,imx51-sdma", .data = &sdma_imx51, },
+	{ .compatible = "fsl,imx35-sdma", .data = &sdma_imx35, },
+	{ .compatible = "fsl,imx31-sdma", .data = &sdma_imx31, },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, sdma_dt_ids);
@@ -356,8 +459,7 @@
 
 static inline u32 chnenbl_ofs(struct sdma_engine *sdma, unsigned int event)
 {
-	u32 chnenbl0 = (sdma->devtype == IMX31_SDMA ? SDMA_CHNENBL0_IMX31 :
-						      SDMA_CHNENBL0_IMX35);
+	u32 chnenbl0 = sdma->drvdata->chnenbl0;
 	return chnenbl0 + event * 4;
 }
 
@@ -547,8 +649,6 @@
 {
 	struct sdma_channel *sdmac = (struct sdma_channel *) data;
 
-	complete(&sdmac->done);
-
 	if (sdmac->flags & IMX_DMA_SG_LOOP)
 		sdma_handle_channel_loop(sdmac);
 	else
@@ -733,7 +833,7 @@
 	sdmac->per_addr = 0;
 
 	if (sdmac->event_id0) {
-		if (sdmac->event_id0 >= sdmac->sdma->num_events)
+		if (sdmac->event_id0 >= sdmac->sdma->drvdata->num_events)
 			return -EINVAL;
 		sdma_event_enable(sdmac, sdmac->event_id0);
 	}
@@ -812,9 +912,6 @@
 	sdma->channel_control[channel].current_bd_ptr = sdmac->bd_phys;
 
 	sdma_set_channel_priority(sdmac, MXC_SDMA_DEFAULT_PRIORITY);
-
-	init_completion(&sdmac->done);
-
 	return 0;
 out:
 
@@ -1120,15 +1217,12 @@
 }
 
 static enum dma_status sdma_tx_status(struct dma_chan *chan,
-					    dma_cookie_t cookie,
-					    struct dma_tx_state *txstate)
+				      dma_cookie_t cookie,
+				      struct dma_tx_state *txstate)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
-	dma_cookie_t last_used;
 
-	last_used = chan->cookie;
-
-	dma_set_tx_state(txstate, chan->completed_cookie, last_used,
+	dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie,
 			sdmac->chn_count - sdmac->chn_real_count);
 
 	return sdmac->status;
@@ -1218,19 +1312,6 @@
 	int i, ret;
 	dma_addr_t ccb_phys;
 
-	switch (sdma->devtype) {
-	case IMX31_SDMA:
-		sdma->num_events = 32;
-		break;
-	case IMX35_SDMA:
-		sdma->num_events = 48;
-		break;
-	default:
-		dev_err(sdma->dev, "Unknown sdma type %d. aborting\n",
-			sdma->devtype);
-		return -ENODEV;
-	}
-
 	clk_enable(sdma->clk_ipg);
 	clk_enable(sdma->clk_ahb);
 
@@ -1257,7 +1338,7 @@
 			MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control));
 
 	/* disable all channels */
-	for (i = 0; i < sdma->num_events; i++)
+	for (i = 0; i < sdma->drvdata->num_events; i++)
 		writel_relaxed(0, sdma->regs + chnenbl_ofs(sdma, i));
 
 	/* All channels have priority 0 */
@@ -1335,10 +1416,21 @@
 	int ret;
 	int irq;
 	struct resource *iores;
-	struct sdma_platform_data *pdata = pdev->dev.platform_data;
+	struct sdma_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	int i;
 	struct sdma_engine *sdma;
 	s32 *saddr_arr;
+	const struct sdma_driver_data *drvdata = NULL;
+
+	if (of_id)
+		drvdata = of_id->data;
+	else if (pdev->id_entry)
+		drvdata = (void *)pdev->id_entry->driver_data;
+
+	if (!drvdata) {
+		dev_err(&pdev->dev, "unable to find driver data\n");
+		return -EINVAL;
+	}
 
 	sdma = kzalloc(sizeof(*sdma), GFP_KERNEL);
 	if (!sdma)
@@ -1347,6 +1439,7 @@
 	spin_lock_init(&sdma->channel_0_lock);
 
 	sdma->dev = &pdev->dev;
+	sdma->drvdata = drvdata;
 
 	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	irq = platform_get_irq(pdev, 0);
@@ -1396,10 +1489,6 @@
 	for (i = 0; i < SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1; i++)
 		saddr_arr[i] = -EINVAL;
 
-	if (of_id)
-		pdev->id_entry = of_id->data;
-	sdma->devtype = pdev->id_entry->driver_data;
-
 	dma_cap_set(DMA_SLAVE, sdma->dma_device.cap_mask);
 	dma_cap_set(DMA_CYCLIC, sdma->dma_device.cap_mask);
 
@@ -1431,6 +1520,8 @@
 	if (ret)
 		goto err_init;
 
+	if (sdma->drvdata->script_addrs)
+		sdma_add_scripts(sdma, sdma->drvdata->script_addrs);
 	if (pdata && pdata->script_addrs)
 		sdma_add_scripts(sdma, pdata->script_addrs);
 
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
index b642e03..d8ececaf 100644
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -251,7 +251,7 @@
 }
 
 static void pq16_set_src(struct ioat_raw_descriptor *desc[3],
-			dma_addr_t addr, u32 offset, u8 coef, int idx)
+			dma_addr_t addr, u32 offset, u8 coef, unsigned idx)
 {
 	struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0];
 	struct ioat_pq16a_descriptor *pq16 =
@@ -1775,15 +1775,12 @@
 	dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
 	dma->device_free_chan_resources = ioat2_free_chan_resources;
 
-	if (is_xeon_cb32(pdev))
-		dma->copy_align = 6;
-
 	dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
 	dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock;
 
 	device->cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET);
 
-	if (is_bwd_noraid(pdev))
+	if (is_xeon_cb32(pdev) || is_bwd_noraid(pdev))
 		device->cap &= ~(IOAT_CAP_XOR | IOAT_CAP_PQ | IOAT_CAP_RAID16SS);
 
 	/* dca is incompatible with raid operations */
@@ -1793,7 +1790,6 @@
 	if (device->cap & IOAT_CAP_XOR) {
 		is_raid_device = true;
 		dma->max_xor = 8;
-		dma->xor_align = 6;
 
 		dma_cap_set(DMA_XOR, dma->cap_mask);
 		dma->device_prep_dma_xor = ioat3_prep_xor;
@@ -1812,13 +1808,8 @@
 
 		if (device->cap & IOAT_CAP_RAID16SS) {
 			dma_set_maxpq(dma, 16, 0);
-			dma->pq_align = 0;
 		} else {
 			dma_set_maxpq(dma, 8, 0);
-			if (is_xeon_cb32(pdev))
-				dma->pq_align = 6;
-			else
-				dma->pq_align = 0;
 		}
 
 		if (!(device->cap & IOAT_CAP_XOR)) {
@@ -1829,13 +1820,8 @@
 
 			if (device->cap & IOAT_CAP_RAID16SS) {
 				dma->max_xor = 16;
-				dma->xor_align = 0;
 			} else {
 				dma->max_xor = 8;
-				if (is_xeon_cb32(pdev))
-					dma->xor_align = 6;
-				else
-					dma->xor_align = 0;
 			}
 		}
 	}
@@ -1844,14 +1830,6 @@
 	device->cleanup_fn = ioat3_cleanup_event;
 	device->timer_fn = ioat3_timer_event;
 
-	if (is_xeon_cb32(pdev)) {
-		dma_cap_clear(DMA_XOR_VAL, dma->cap_mask);
-		dma->device_prep_dma_xor_val = NULL;
-
-		dma_cap_clear(DMA_PQ_VAL, dma->cap_mask);
-		dma->device_prep_dma_pq_val = NULL;
-	}
-
 	/* starting with CB3.3 super extended descriptors are supported */
 	if (device->cap & IOAT_CAP_RAID16SS) {
 		char pool_name[14];
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index cc727ec..dd8b44a 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -518,7 +518,7 @@
 	struct iop_adma_desc_slot *slot = NULL;
 	int init = iop_chan->slots_allocated ? 0 : 1;
 	struct iop_adma_platform_data *plat_data =
-		iop_chan->device->pdev->dev.platform_data;
+		dev_get_platdata(&iop_chan->device->pdev->dev);
 	int num_descs_in_pool = plat_data->pool_size/IOP_ADMA_SLOT_SIZE;
 
 	/* Allocate descriptor slots */
@@ -1351,7 +1351,7 @@
 	struct iop_adma_device *device = platform_get_drvdata(dev);
 	struct dma_chan *chan, *_chan;
 	struct iop_adma_chan *iop_chan;
-	struct iop_adma_platform_data *plat_data = dev->dev.platform_data;
+	struct iop_adma_platform_data *plat_data = dev_get_platdata(&dev->dev);
 
 	dma_async_device_unregister(&device->common);
 
@@ -1376,7 +1376,7 @@
 	struct iop_adma_device *adev;
 	struct iop_adma_chan *iop_chan;
 	struct dma_device *dma_dev;
-	struct iop_adma_platform_data *plat_data = pdev->dev.platform_data;
+	struct iop_adma_platform_data *plat_data = dev_get_platdata(&pdev->dev);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res)
diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c
index d39c2cd..cb9c0bc 100644
--- a/drivers/dma/ipu/ipu_idmac.c
+++ b/drivers/dma/ipu/ipu_idmac.c
@@ -1593,10 +1593,7 @@
 static enum dma_status idmac_tx_status(struct dma_chan *chan,
 		       dma_cookie_t cookie, struct dma_tx_state *txstate)
 {
-	dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie, 0);
-	if (cookie != chan->cookie)
-		return DMA_ERROR;
-	return DMA_SUCCESS;
+	return dma_cookie_status(chan, cookie, txstate);
 }
 
 static int __init ipu_idmac_init(struct ipu *ipu)
@@ -1767,7 +1764,6 @@
 	iounmap(ipu->reg_ic);
 	iounmap(ipu->reg_ipu);
 	tasklet_kill(&ipu->tasklet);
-	platform_set_drvdata(pdev, NULL);
 
 	return 0;
 }
diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c
new file mode 100644
index 0000000..a2c330f
--- /dev/null
+++ b/drivers/dma/k3dma.c
@@ -0,0 +1,837 @@
+/*
+ * Copyright (c) 2013 Linaro Ltd.
+ * Copyright (c) 2013 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/sched.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/clk.h>
+#include <linux/of_dma.h>
+
+#include "virt-dma.h"
+
+#define DRIVER_NAME		"k3-dma"
+#define DMA_ALIGN		3
+#define DMA_MAX_SIZE		0x1ffc
+
+#define INT_STAT		0x00
+#define INT_TC1			0x04
+#define INT_ERR1		0x0c
+#define INT_ERR2		0x10
+#define INT_TC1_MASK		0x18
+#define INT_ERR1_MASK		0x20
+#define INT_ERR2_MASK		0x24
+#define INT_TC1_RAW		0x600
+#define INT_ERR1_RAW		0x608
+#define INT_ERR2_RAW		0x610
+#define CH_PRI			0x688
+#define CH_STAT			0x690
+#define CX_CUR_CNT		0x704
+#define CX_LLI			0x800
+#define CX_CNT			0x810
+#define CX_SRC			0x814
+#define CX_DST			0x818
+#define CX_CFG			0x81c
+#define AXI_CFG			0x820
+#define AXI_CFG_DEFAULT		0x201201
+
+#define CX_LLI_CHAIN_EN		0x2
+#define CX_CFG_EN		0x1
+#define CX_CFG_MEM2PER		(0x1 << 2)
+#define CX_CFG_PER2MEM		(0x2 << 2)
+#define CX_CFG_SRCINCR		(0x1 << 31)
+#define CX_CFG_DSTINCR		(0x1 << 30)
+
+struct k3_desc_hw {
+	u32 lli;
+	u32 reserved[3];
+	u32 count;
+	u32 saddr;
+	u32 daddr;
+	u32 config;
+} __aligned(32);
+
+struct k3_dma_desc_sw {
+	struct virt_dma_desc	vd;
+	dma_addr_t		desc_hw_lli;
+	size_t			desc_num;
+	size_t			size;
+	struct k3_desc_hw	desc_hw[0];
+};
+
+struct k3_dma_phy;
+
+struct k3_dma_chan {
+	u32			ccfg;
+	struct virt_dma_chan	vc;
+	struct k3_dma_phy	*phy;
+	struct list_head	node;
+	enum dma_transfer_direction dir;
+	dma_addr_t		dev_addr;
+	enum dma_status		status;
+};
+
+struct k3_dma_phy {
+	u32			idx;
+	void __iomem		*base;
+	struct k3_dma_chan	*vchan;
+	struct k3_dma_desc_sw	*ds_run;
+	struct k3_dma_desc_sw	*ds_done;
+};
+
+struct k3_dma_dev {
+	struct dma_device	slave;
+	void __iomem		*base;
+	struct tasklet_struct	task;
+	spinlock_t		lock;
+	struct list_head	chan_pending;
+	struct k3_dma_phy	*phy;
+	struct k3_dma_chan	*chans;
+	struct clk		*clk;
+	u32			dma_channels;
+	u32			dma_requests;
+};
+
+#define to_k3_dma(dmadev) container_of(dmadev, struct k3_dma_dev, slave)
+
+static struct k3_dma_chan *to_k3_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct k3_dma_chan, vc.chan);
+}
+
+static void k3_dma_pause_dma(struct k3_dma_phy *phy, bool on)
+{
+	u32 val = 0;
+
+	if (on) {
+		val = readl_relaxed(phy->base + CX_CFG);
+		val |= CX_CFG_EN;
+		writel_relaxed(val, phy->base + CX_CFG);
+	} else {
+		val = readl_relaxed(phy->base + CX_CFG);
+		val &= ~CX_CFG_EN;
+		writel_relaxed(val, phy->base + CX_CFG);
+	}
+}
+
+static void k3_dma_terminate_chan(struct k3_dma_phy *phy, struct k3_dma_dev *d)
+{
+	u32 val = 0;
+
+	k3_dma_pause_dma(phy, false);
+
+	val = 0x1 << phy->idx;
+	writel_relaxed(val, d->base + INT_TC1_RAW);
+	writel_relaxed(val, d->base + INT_ERR1_RAW);
+	writel_relaxed(val, d->base + INT_ERR2_RAW);
+}
+
+static void k3_dma_set_desc(struct k3_dma_phy *phy, struct k3_desc_hw *hw)
+{
+	writel_relaxed(hw->lli, phy->base + CX_LLI);
+	writel_relaxed(hw->count, phy->base + CX_CNT);
+	writel_relaxed(hw->saddr, phy->base + CX_SRC);
+	writel_relaxed(hw->daddr, phy->base + CX_DST);
+	writel_relaxed(AXI_CFG_DEFAULT, phy->base + AXI_CFG);
+	writel_relaxed(hw->config, phy->base + CX_CFG);
+}
+
+static u32 k3_dma_get_curr_cnt(struct k3_dma_dev *d, struct k3_dma_phy *phy)
+{
+	u32 cnt = 0;
+
+	cnt = readl_relaxed(d->base + CX_CUR_CNT + phy->idx * 0x10);
+	cnt &= 0xffff;
+	return cnt;
+}
+
+static u32 k3_dma_get_curr_lli(struct k3_dma_phy *phy)
+{
+	return readl_relaxed(phy->base + CX_LLI);
+}
+
+static u32 k3_dma_get_chan_stat(struct k3_dma_dev *d)
+{
+	return readl_relaxed(d->base + CH_STAT);
+}
+
+static void k3_dma_enable_dma(struct k3_dma_dev *d, bool on)
+{
+	if (on) {
+		/* set same priority */
+		writel_relaxed(0x0, d->base + CH_PRI);
+
+		/* unmask irq */
+		writel_relaxed(0xffff, d->base + INT_TC1_MASK);
+		writel_relaxed(0xffff, d->base + INT_ERR1_MASK);
+		writel_relaxed(0xffff, d->base + INT_ERR2_MASK);
+	} else {
+		/* mask irq */
+		writel_relaxed(0x0, d->base + INT_TC1_MASK);
+		writel_relaxed(0x0, d->base + INT_ERR1_MASK);
+		writel_relaxed(0x0, d->base + INT_ERR2_MASK);
+	}
+}
+
+static irqreturn_t k3_dma_int_handler(int irq, void *dev_id)
+{
+	struct k3_dma_dev *d = (struct k3_dma_dev *)dev_id;
+	struct k3_dma_phy *p;
+	struct k3_dma_chan *c;
+	u32 stat = readl_relaxed(d->base + INT_STAT);
+	u32 tc1  = readl_relaxed(d->base + INT_TC1);
+	u32 err1 = readl_relaxed(d->base + INT_ERR1);
+	u32 err2 = readl_relaxed(d->base + INT_ERR2);
+	u32 i, irq_chan = 0;
+
+	while (stat) {
+		i = __ffs(stat);
+		stat &= (stat - 1);
+		if (likely(tc1 & BIT(i))) {
+			p = &d->phy[i];
+			c = p->vchan;
+			if (c) {
+				unsigned long flags;
+
+				spin_lock_irqsave(&c->vc.lock, flags);
+				vchan_cookie_complete(&p->ds_run->vd);
+				p->ds_done = p->ds_run;
+				spin_unlock_irqrestore(&c->vc.lock, flags);
+			}
+			irq_chan |= BIT(i);
+		}
+		if (unlikely((err1 & BIT(i)) || (err2 & BIT(i))))
+			dev_warn(d->slave.dev, "DMA ERR\n");
+	}
+
+	writel_relaxed(irq_chan, d->base + INT_TC1_RAW);
+	writel_relaxed(err1, d->base + INT_ERR1_RAW);
+	writel_relaxed(err2, d->base + INT_ERR2_RAW);
+
+	if (irq_chan) {
+		tasklet_schedule(&d->task);
+		return IRQ_HANDLED;
+	} else
+		return IRQ_NONE;
+}
+
+static int k3_dma_start_txd(struct k3_dma_chan *c)
+{
+	struct k3_dma_dev *d = to_k3_dma(c->vc.chan.device);
+	struct virt_dma_desc *vd = vchan_next_desc(&c->vc);
+
+	if (!c->phy)
+		return -EAGAIN;
+
+	if (BIT(c->phy->idx) & k3_dma_get_chan_stat(d))
+		return -EAGAIN;
+
+	if (vd) {
+		struct k3_dma_desc_sw *ds =
+			container_of(vd, struct k3_dma_desc_sw, vd);
+		/*
+		 * fetch and remove request from vc->desc_issued
+		 * so vc->desc_issued only contains desc pending
+		 */
+		list_del(&ds->vd.node);
+		c->phy->ds_run = ds;
+		c->phy->ds_done = NULL;
+		/* start dma */
+		k3_dma_set_desc(c->phy, &ds->desc_hw[0]);
+		return 0;
+	}
+	c->phy->ds_done = NULL;
+	c->phy->ds_run = NULL;
+	return -EAGAIN;
+}
+
+static void k3_dma_tasklet(unsigned long arg)
+{
+	struct k3_dma_dev *d = (struct k3_dma_dev *)arg;
+	struct k3_dma_phy *p;
+	struct k3_dma_chan *c, *cn;
+	unsigned pch, pch_alloc = 0;
+
+	/* check new dma request of running channel in vc->desc_issued */
+	list_for_each_entry_safe(c, cn, &d->slave.channels, vc.chan.device_node) {
+		spin_lock_irq(&c->vc.lock);
+		p = c->phy;
+		if (p && p->ds_done) {
+			if (k3_dma_start_txd(c)) {
+				/* No current txd associated with this channel */
+				dev_dbg(d->slave.dev, "pchan %u: free\n", p->idx);
+				/* Mark this channel free */
+				c->phy = NULL;
+				p->vchan = NULL;
+			}
+		}
+		spin_unlock_irq(&c->vc.lock);
+	}
+
+	/* check new channel request in d->chan_pending */
+	spin_lock_irq(&d->lock);
+	for (pch = 0; pch < d->dma_channels; pch++) {
+		p = &d->phy[pch];
+
+		if (p->vchan == NULL && !list_empty(&d->chan_pending)) {
+			c = list_first_entry(&d->chan_pending,
+				struct k3_dma_chan, node);
+			/* remove from d->chan_pending */
+			list_del_init(&c->node);
+			pch_alloc |= 1 << pch;
+			/* Mark this channel allocated */
+			p->vchan = c;
+			c->phy = p;
+			dev_dbg(d->slave.dev, "pchan %u: alloc vchan %p\n", pch, &c->vc);
+		}
+	}
+	spin_unlock_irq(&d->lock);
+
+	for (pch = 0; pch < d->dma_channels; pch++) {
+		if (pch_alloc & (1 << pch)) {
+			p = &d->phy[pch];
+			c = p->vchan;
+			if (c) {
+				spin_lock_irq(&c->vc.lock);
+				k3_dma_start_txd(c);
+				spin_unlock_irq(&c->vc.lock);
+			}
+		}
+	}
+}
+
+static int k3_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	return 0;
+}
+
+static void k3_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_dev *d = to_k3_dma(chan->device);
+	unsigned long flags;
+
+	spin_lock_irqsave(&d->lock, flags);
+	list_del_init(&c->node);
+	spin_unlock_irqrestore(&d->lock, flags);
+
+	vchan_free_chan_resources(&c->vc);
+	c->ccfg = 0;
+}
+
+static enum dma_status k3_dma_tx_status(struct dma_chan *chan,
+	dma_cookie_t cookie, struct dma_tx_state *state)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_dev *d = to_k3_dma(chan->device);
+	struct k3_dma_phy *p;
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+	enum dma_status ret;
+	size_t bytes = 0;
+
+	ret = dma_cookie_status(&c->vc.chan, cookie, state);
+	if (ret == DMA_SUCCESS)
+		return ret;
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+	p = c->phy;
+	ret = c->status;
+
+	/*
+	 * If the cookie is on our issue queue, then the residue is
+	 * its total size.
+	 */
+	vd = vchan_find_desc(&c->vc, cookie);
+	if (vd) {
+		bytes = container_of(vd, struct k3_dma_desc_sw, vd)->size;
+	} else if ((!p) || (!p->ds_run)) {
+		bytes = 0;
+	} else {
+		struct k3_dma_desc_sw *ds = p->ds_run;
+		u32 clli = 0, index = 0;
+
+		bytes = k3_dma_get_curr_cnt(d, p);
+		clli = k3_dma_get_curr_lli(p);
+		index = (clli - ds->desc_hw_lli) / sizeof(struct k3_desc_hw);
+		for (; index < ds->desc_num; index++) {
+			bytes += ds->desc_hw[index].count;
+			/* end of lli */
+			if (!ds->desc_hw[index].lli)
+				break;
+		}
+	}
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+	dma_set_residue(state, bytes);
+	return ret;
+}
+
+static void k3_dma_issue_pending(struct dma_chan *chan)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_dev *d = to_k3_dma(chan->device);
+	unsigned long flags;
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+	/* add request to vc->desc_issued */
+	if (vchan_issue_pending(&c->vc)) {
+		spin_lock(&d->lock);
+		if (!c->phy) {
+			if (list_empty(&c->node)) {
+				/* if new channel, add chan_pending */
+				list_add_tail(&c->node, &d->chan_pending);
+				/* check in tasklet */
+				tasklet_schedule(&d->task);
+				dev_dbg(d->slave.dev, "vchan %p: issued\n", &c->vc);
+			}
+		}
+		spin_unlock(&d->lock);
+	} else
+		dev_dbg(d->slave.dev, "vchan %p: nothing to issue\n", &c->vc);
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+}
+
+static void k3_dma_fill_desc(struct k3_dma_desc_sw *ds, dma_addr_t dst,
+			dma_addr_t src, size_t len, u32 num, u32 ccfg)
+{
+	if ((num + 1) < ds->desc_num)
+		ds->desc_hw[num].lli = ds->desc_hw_lli + (num + 1) *
+			sizeof(struct k3_desc_hw);
+	ds->desc_hw[num].lli |= CX_LLI_CHAIN_EN;
+	ds->desc_hw[num].count = len;
+	ds->desc_hw[num].saddr = src;
+	ds->desc_hw[num].daddr = dst;
+	ds->desc_hw[num].config = ccfg;
+}
+
+static struct dma_async_tx_descriptor *k3_dma_prep_memcpy(
+	struct dma_chan *chan,	dma_addr_t dst, dma_addr_t src,
+	size_t len, unsigned long flags)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_desc_sw *ds;
+	size_t copy = 0;
+	int num = 0;
+
+	if (!len)
+		return NULL;
+
+	num = DIV_ROUND_UP(len, DMA_MAX_SIZE);
+	ds = kzalloc(sizeof(*ds) + num * sizeof(ds->desc_hw[0]), GFP_ATOMIC);
+	if (!ds) {
+		dev_dbg(chan->device->dev, "vchan %p: kzalloc fail\n", &c->vc);
+		return NULL;
+	}
+	ds->desc_hw_lli = __virt_to_phys((unsigned long)&ds->desc_hw[0]);
+	ds->size = len;
+	ds->desc_num = num;
+	num = 0;
+
+	if (!c->ccfg) {
+		/* default is memtomem, without calling device_control */
+		c->ccfg = CX_CFG_SRCINCR | CX_CFG_DSTINCR | CX_CFG_EN;
+		c->ccfg |= (0xf << 20) | (0xf << 24);	/* burst = 16 */
+		c->ccfg |= (0x3 << 12) | (0x3 << 16);	/* width = 64 bit */
+	}
+
+	do {
+		copy = min_t(size_t, len, DMA_MAX_SIZE);
+		k3_dma_fill_desc(ds, dst, src, copy, num++, c->ccfg);
+
+		if (c->dir == DMA_MEM_TO_DEV) {
+			src += copy;
+		} else if (c->dir == DMA_DEV_TO_MEM) {
+			dst += copy;
+		} else {
+			src += copy;
+			dst += copy;
+		}
+		len -= copy;
+	} while (len);
+
+	ds->desc_hw[num-1].lli = 0;	/* end of link */
+	return vchan_tx_prep(&c->vc, &ds->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *k3_dma_prep_slave_sg(
+	struct dma_chan *chan, struct scatterlist *sgl, unsigned int sglen,
+	enum dma_transfer_direction dir, unsigned long flags, void *context)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_desc_sw *ds;
+	size_t len, avail, total = 0;
+	struct scatterlist *sg;
+	dma_addr_t addr, src = 0, dst = 0;
+	int num = sglen, i;
+
+	if (sgl == 0)
+		return NULL;
+
+	for_each_sg(sgl, sg, sglen, i) {
+		avail = sg_dma_len(sg);
+		if (avail > DMA_MAX_SIZE)
+			num += DIV_ROUND_UP(avail, DMA_MAX_SIZE) - 1;
+	}
+
+	ds = kzalloc(sizeof(*ds) + num * sizeof(ds->desc_hw[0]), GFP_ATOMIC);
+	if (!ds) {
+		dev_dbg(chan->device->dev, "vchan %p: kzalloc fail\n", &c->vc);
+		return NULL;
+	}
+	ds->desc_hw_lli = __virt_to_phys((unsigned long)&ds->desc_hw[0]);
+	ds->desc_num = num;
+	num = 0;
+
+	for_each_sg(sgl, sg, sglen, i) {
+		addr = sg_dma_address(sg);
+		avail = sg_dma_len(sg);
+		total += avail;
+
+		do {
+			len = min_t(size_t, avail, DMA_MAX_SIZE);
+
+			if (dir == DMA_MEM_TO_DEV) {
+				src = addr;
+				dst = c->dev_addr;
+			} else if (dir == DMA_DEV_TO_MEM) {
+				src = c->dev_addr;
+				dst = addr;
+			}
+
+			k3_dma_fill_desc(ds, dst, src, len, num++, c->ccfg);
+
+			addr += len;
+			avail -= len;
+		} while (avail);
+	}
+
+	ds->desc_hw[num-1].lli = 0;	/* end of link */
+	ds->size = total;
+	return vchan_tx_prep(&c->vc, &ds->vd, flags);
+}
+
+static int k3_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+	unsigned long arg)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_dev *d = to_k3_dma(chan->device);
+	struct dma_slave_config *cfg = (void *)arg;
+	struct k3_dma_phy *p = c->phy;
+	unsigned long flags;
+	u32 maxburst = 0, val = 0;
+	enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED;
+	LIST_HEAD(head);
+
+	switch (cmd) {
+	case DMA_SLAVE_CONFIG:
+		if (cfg == NULL)
+			return -EINVAL;
+		c->dir = cfg->direction;
+		if (c->dir == DMA_DEV_TO_MEM) {
+			c->ccfg = CX_CFG_DSTINCR;
+			c->dev_addr = cfg->src_addr;
+			maxburst = cfg->src_maxburst;
+			width = cfg->src_addr_width;
+		} else if (c->dir == DMA_MEM_TO_DEV) {
+			c->ccfg = CX_CFG_SRCINCR;
+			c->dev_addr = cfg->dst_addr;
+			maxburst = cfg->dst_maxburst;
+			width = cfg->dst_addr_width;
+		}
+		switch (width) {
+		case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		case DMA_SLAVE_BUSWIDTH_8_BYTES:
+			val =  __ffs(width);
+			break;
+		default:
+			val = 3;
+			break;
+		}
+		c->ccfg |= (val << 12) | (val << 16);
+
+		if ((maxburst == 0) || (maxburst > 16))
+			val = 16;
+		else
+			val = maxburst - 1;
+		c->ccfg |= (val << 20) | (val << 24);
+		c->ccfg |= CX_CFG_MEM2PER | CX_CFG_EN;
+
+		/* specific request line */
+		c->ccfg |= c->vc.chan.chan_id << 4;
+		break;
+
+	case DMA_TERMINATE_ALL:
+		dev_dbg(d->slave.dev, "vchan %p: terminate all\n", &c->vc);
+
+		/* Prevent this channel being scheduled */
+		spin_lock(&d->lock);
+		list_del_init(&c->node);
+		spin_unlock(&d->lock);
+
+		/* Clear the tx descriptor lists */
+		spin_lock_irqsave(&c->vc.lock, flags);
+		vchan_get_all_descriptors(&c->vc, &head);
+		if (p) {
+			/* vchan is assigned to a pchan - stop the channel */
+			k3_dma_terminate_chan(p, d);
+			c->phy = NULL;
+			p->vchan = NULL;
+			p->ds_run = p->ds_done = NULL;
+		}
+		spin_unlock_irqrestore(&c->vc.lock, flags);
+		vchan_dma_desc_free_list(&c->vc, &head);
+		break;
+
+	case DMA_PAUSE:
+		dev_dbg(d->slave.dev, "vchan %p: pause\n", &c->vc);
+		if (c->status == DMA_IN_PROGRESS) {
+			c->status = DMA_PAUSED;
+			if (p) {
+				k3_dma_pause_dma(p, false);
+			} else {
+				spin_lock(&d->lock);
+				list_del_init(&c->node);
+				spin_unlock(&d->lock);
+			}
+		}
+		break;
+
+	case DMA_RESUME:
+		dev_dbg(d->slave.dev, "vchan %p: resume\n", &c->vc);
+		spin_lock_irqsave(&c->vc.lock, flags);
+		if (c->status == DMA_PAUSED) {
+			c->status = DMA_IN_PROGRESS;
+			if (p) {
+				k3_dma_pause_dma(p, true);
+			} else if (!list_empty(&c->vc.desc_issued)) {
+				spin_lock(&d->lock);
+				list_add_tail(&c->node, &d->chan_pending);
+				spin_unlock(&d->lock);
+			}
+		}
+		spin_unlock_irqrestore(&c->vc.lock, flags);
+		break;
+	default:
+		return -ENXIO;
+	}
+	return 0;
+}
+
+static void k3_dma_free_desc(struct virt_dma_desc *vd)
+{
+	struct k3_dma_desc_sw *ds =
+		container_of(vd, struct k3_dma_desc_sw, vd);
+
+	kfree(ds);
+}
+
+static struct of_device_id k3_pdma_dt_ids[] = {
+	{ .compatible = "hisilicon,k3-dma-1.0", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, k3_pdma_dt_ids);
+
+static struct dma_chan *k3_of_dma_simple_xlate(struct of_phandle_args *dma_spec,
+						struct of_dma *ofdma)
+{
+	struct k3_dma_dev *d = ofdma->of_dma_data;
+	unsigned int request = dma_spec->args[0];
+
+	if (request > d->dma_requests)
+		return NULL;
+
+	return dma_get_slave_channel(&(d->chans[request].vc.chan));
+}
+
+static int k3_dma_probe(struct platform_device *op)
+{
+	struct k3_dma_dev *d;
+	const struct of_device_id *of_id;
+	struct resource *iores;
+	int i, ret, irq = 0;
+
+	iores = platform_get_resource(op, IORESOURCE_MEM, 0);
+	if (!iores)
+		return -EINVAL;
+
+	d = devm_kzalloc(&op->dev, sizeof(*d), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+
+	d->base = devm_ioremap_resource(&op->dev, iores);
+	if (IS_ERR(d->base))
+		return PTR_ERR(d->base);
+
+	of_id = of_match_device(k3_pdma_dt_ids, &op->dev);
+	if (of_id) {
+		of_property_read_u32((&op->dev)->of_node,
+				"dma-channels", &d->dma_channels);
+		of_property_read_u32((&op->dev)->of_node,
+				"dma-requests", &d->dma_requests);
+	}
+
+	d->clk = devm_clk_get(&op->dev, NULL);
+	if (IS_ERR(d->clk)) {
+		dev_err(&op->dev, "no dma clk\n");
+		return PTR_ERR(d->clk);
+	}
+
+	irq = platform_get_irq(op, 0);
+	ret = devm_request_irq(&op->dev, irq,
+			k3_dma_int_handler, IRQF_DISABLED, DRIVER_NAME, d);
+	if (ret)
+		return ret;
+
+	/* init phy channel */
+	d->phy = devm_kzalloc(&op->dev,
+		d->dma_channels * sizeof(struct k3_dma_phy), GFP_KERNEL);
+	if (d->phy == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < d->dma_channels; i++) {
+		struct k3_dma_phy *p = &d->phy[i];
+
+		p->idx = i;
+		p->base = d->base + i * 0x40;
+	}
+
+	INIT_LIST_HEAD(&d->slave.channels);
+	dma_cap_set(DMA_SLAVE, d->slave.cap_mask);
+	dma_cap_set(DMA_MEMCPY, d->slave.cap_mask);
+	d->slave.dev = &op->dev;
+	d->slave.device_alloc_chan_resources = k3_dma_alloc_chan_resources;
+	d->slave.device_free_chan_resources = k3_dma_free_chan_resources;
+	d->slave.device_tx_status = k3_dma_tx_status;
+	d->slave.device_prep_dma_memcpy = k3_dma_prep_memcpy;
+	d->slave.device_prep_slave_sg = k3_dma_prep_slave_sg;
+	d->slave.device_issue_pending = k3_dma_issue_pending;
+	d->slave.device_control = k3_dma_control;
+	d->slave.copy_align = DMA_ALIGN;
+	d->slave.chancnt = d->dma_requests;
+
+	/* init virtual channel */
+	d->chans = devm_kzalloc(&op->dev,
+		d->dma_requests * sizeof(struct k3_dma_chan), GFP_KERNEL);
+	if (d->chans == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < d->dma_requests; i++) {
+		struct k3_dma_chan *c = &d->chans[i];
+
+		c->status = DMA_IN_PROGRESS;
+		INIT_LIST_HEAD(&c->node);
+		c->vc.desc_free = k3_dma_free_desc;
+		vchan_init(&c->vc, &d->slave);
+	}
+
+	/* Enable clock before accessing registers */
+	ret = clk_prepare_enable(d->clk);
+	if (ret < 0) {
+		dev_err(&op->dev, "clk_prepare_enable failed: %d\n", ret);
+		return ret;
+	}
+
+	k3_dma_enable_dma(d, true);
+
+	ret = dma_async_device_register(&d->slave);
+	if (ret)
+		return ret;
+
+	ret = of_dma_controller_register((&op->dev)->of_node,
+					k3_of_dma_simple_xlate, d);
+	if (ret)
+		goto of_dma_register_fail;
+
+	spin_lock_init(&d->lock);
+	INIT_LIST_HEAD(&d->chan_pending);
+	tasklet_init(&d->task, k3_dma_tasklet, (unsigned long)d);
+	platform_set_drvdata(op, d);
+	dev_info(&op->dev, "initialized\n");
+
+	return 0;
+
+of_dma_register_fail:
+	dma_async_device_unregister(&d->slave);
+	return ret;
+}
+
+static int k3_dma_remove(struct platform_device *op)
+{
+	struct k3_dma_chan *c, *cn;
+	struct k3_dma_dev *d = platform_get_drvdata(op);
+
+	dma_async_device_unregister(&d->slave);
+	of_dma_controller_free((&op->dev)->of_node);
+
+	list_for_each_entry_safe(c, cn, &d->slave.channels, vc.chan.device_node) {
+		list_del(&c->vc.chan.device_node);
+		tasklet_kill(&c->vc.task);
+	}
+	tasklet_kill(&d->task);
+	clk_disable_unprepare(d->clk);
+	return 0;
+}
+
+static int k3_dma_suspend(struct device *dev)
+{
+	struct k3_dma_dev *d = dev_get_drvdata(dev);
+	u32 stat = 0;
+
+	stat = k3_dma_get_chan_stat(d);
+	if (stat) {
+		dev_warn(d->slave.dev,
+			"chan %d is running fail to suspend\n", stat);
+		return -1;
+	}
+	k3_dma_enable_dma(d, false);
+	clk_disable_unprepare(d->clk);
+	return 0;
+}
+
+static int k3_dma_resume(struct device *dev)
+{
+	struct k3_dma_dev *d = dev_get_drvdata(dev);
+	int ret = 0;
+
+	ret = clk_prepare_enable(d->clk);
+	if (ret < 0) {
+		dev_err(d->slave.dev, "clk_prepare_enable failed: %d\n", ret);
+		return ret;
+	}
+	k3_dma_enable_dma(d, true);
+	return 0;
+}
+
+SIMPLE_DEV_PM_OPS(k3_dma_pmops, k3_dma_suspend, k3_dma_resume);
+
+static struct platform_driver k3_pdma_driver = {
+	.driver		= {
+		.name	= DRIVER_NAME,
+		.owner  = THIS_MODULE,
+		.pm	= &k3_dma_pmops,
+		.of_match_table = k3_pdma_dt_ids,
+	},
+	.probe		= k3_dma_probe,
+	.remove		= k3_dma_remove,
+};
+
+module_platform_driver(k3_pdma_driver);
+
+MODULE_DESCRIPTION("Hisilicon k3 DMA Driver");
+MODULE_ALIAS("platform:k3dma");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c
index c26699f..ff8d7827 100644
--- a/drivers/dma/mmp_pdma.c
+++ b/drivers/dma/mmp_pdma.c
@@ -18,7 +18,9 @@
 #include <linux/platform_data/mmp_dma.h>
 #include <linux/dmapool.h>
 #include <linux/of_device.h>
+#include <linux/of_dma.h>
 #include <linux/of.h>
+#include <linux/dma/mmp-pdma.h>
 
 #include "dmaengine.h"
 
@@ -47,6 +49,8 @@
 #define DCSR_CMPST	(1 << 10)       /* The Descriptor Compare Status */
 #define DCSR_EORINTR	(1 << 9)        /* The end of Receive */
 
+#define DRCMR(n)	((((n) < 64) ? 0x0100 : 0x1100) + \
+				 (((n) & 0x3f) << 2))
 #define DRCMR_MAPVLD	(1 << 7)	/* Map Valid (read / write) */
 #define DRCMR_CHLNUM	0x1f		/* mask for Channel Number (read / write) */
 
@@ -69,7 +73,7 @@
 #define DCMD_LENGTH	0x01fff		/* length mask (max = 8K - 1) */
 
 #define PDMA_ALIGNMENT		3
-#define PDMA_MAX_DESC_BYTES	0x1000
+#define PDMA_MAX_DESC_BYTES	DCMD_LENGTH
 
 struct mmp_pdma_desc_hw {
 	u32 ddadr;	/* Points to the next descriptor + flags */
@@ -94,6 +98,9 @@
 	struct mmp_pdma_phy *phy;
 	enum dma_transfer_direction dir;
 
+	struct mmp_pdma_desc_sw *cyclic_first;	/* first desc_sw if channel
+						 * is in cyclic mode */
+
 	/* channel's basic info */
 	struct tasklet_struct tasklet;
 	u32 dcmd;
@@ -105,6 +112,7 @@
 	struct list_head chain_pending;	/* Link descriptors queue for pending */
 	struct list_head chain_running;	/* Link descriptors queue for running */
 	bool idle;			/* channel statue machine */
+	bool byte_align;
 
 	struct dma_pool *desc_pool;	/* Descriptors pool */
 };
@@ -121,6 +129,7 @@
 	struct device			*dev;
 	struct dma_device		device;
 	struct mmp_pdma_phy		*phy;
+	spinlock_t phy_lock; /* protect alloc/free phy channels */
 };
 
 #define tx_to_mmp_pdma_desc(tx) container_of(tx, struct mmp_pdma_desc_sw, async_tx)
@@ -137,15 +146,21 @@
 
 static void enable_chan(struct mmp_pdma_phy *phy)
 {
-	u32 reg;
+	u32 reg, dalgn;
 
 	if (!phy->vchan)
 		return;
 
-	reg = phy->vchan->drcmr;
-	reg = (((reg) < 64) ? 0x0100 : 0x1100) + (((reg) & 0x3f) << 2);
+	reg = DRCMR(phy->vchan->drcmr);
 	writel(DRCMR_MAPVLD | phy->idx, phy->base + reg);
 
+	dalgn = readl(phy->base + DALGN);
+	if (phy->vchan->byte_align)
+		dalgn |= 1 << phy->idx;
+	else
+		dalgn &= ~(1 << phy->idx);
+	writel(dalgn, phy->base + DALGN);
+
 	reg = (phy->idx << 2) + DCSR;
 	writel(readl(phy->base + reg) | DCSR_RUN,
 					phy->base + reg);
@@ -218,7 +233,8 @@
 {
 	int prio, i;
 	struct mmp_pdma_device *pdev = to_mmp_pdma_dev(pchan->chan.device);
-	struct mmp_pdma_phy *phy;
+	struct mmp_pdma_phy *phy, *found = NULL;
+	unsigned long flags;
 
 	/*
 	 * dma channel priorities
@@ -227,6 +243,8 @@
 	 * ch 8 - 11, 24 - 27  <--> (2)
 	 * ch 12 - 15, 28 - 31  <--> (3)
 	 */
+
+	spin_lock_irqsave(&pdev->phy_lock, flags);
 	for (prio = 0; prio <= (((pdev->dma_channels - 1) & 0xf) >> 2); prio++) {
 		for (i = 0; i < pdev->dma_channels; i++) {
 			if (prio != ((i & 0xf) >> 2))
@@ -234,31 +252,34 @@
 			phy = &pdev->phy[i];
 			if (!phy->vchan) {
 				phy->vchan = pchan;
-				return phy;
+				found = phy;
+				goto out_unlock;
 			}
 		}
 	}
 
-	return NULL;
+out_unlock:
+	spin_unlock_irqrestore(&pdev->phy_lock, flags);
+	return found;
 }
 
-/* desc->tx_list ==> pending list */
-static void append_pending_queue(struct mmp_pdma_chan *chan,
-					struct mmp_pdma_desc_sw *desc)
+static void mmp_pdma_free_phy(struct mmp_pdma_chan *pchan)
 {
-	struct mmp_pdma_desc_sw *tail =
-				to_mmp_pdma_desc(chan->chain_pending.prev);
+	struct mmp_pdma_device *pdev = to_mmp_pdma_dev(pchan->chan.device);
+	unsigned long flags;
+	u32 reg;
 
-	if (list_empty(&chan->chain_pending))
-		goto out_splice;
+	if (!pchan->phy)
+		return;
 
-	/* one irq per queue, even appended */
-	tail->desc.ddadr = desc->async_tx.phys;
-	tail->desc.dcmd &= ~DCMD_ENDIRQEN;
+	/* clear the channel mapping in DRCMR */
+	reg = DRCMR(pchan->phy->vchan->drcmr);
+	writel(0, pchan->phy->base + reg);
 
-	/* softly link to pending list */
-out_splice:
-	list_splice_tail_init(&desc->tx_list, &chan->chain_pending);
+	spin_lock_irqsave(&pdev->phy_lock, flags);
+	pchan->phy->vchan = NULL;
+	pchan->phy = NULL;
+	spin_unlock_irqrestore(&pdev->phy_lock, flags);
 }
 
 /**
@@ -277,10 +298,7 @@
 
 	if (list_empty(&chan->chain_pending)) {
 		/* chance to re-fetch phy channel with higher prio */
-		if (chan->phy) {
-			chan->phy->vchan = NULL;
-			chan->phy = NULL;
-		}
+		mmp_pdma_free_phy(chan);
 		dev_dbg(chan->dev, "no pending list\n");
 		return;
 	}
@@ -326,14 +344,16 @@
 		cookie = dma_cookie_assign(&child->async_tx);
 	}
 
-	append_pending_queue(chan, desc);
+	/* softly link to pending list - desc->tx_list ==> pending list */
+	list_splice_tail_init(&desc->tx_list, &chan->chain_pending);
 
 	spin_unlock_irqrestore(&chan->desc_lock, flags);
 
 	return cookie;
 }
 
-struct mmp_pdma_desc_sw *mmp_pdma_alloc_descriptor(struct mmp_pdma_chan *chan)
+static struct mmp_pdma_desc_sw *
+mmp_pdma_alloc_descriptor(struct mmp_pdma_chan *chan)
 {
 	struct mmp_pdma_desc_sw *desc;
 	dma_addr_t pdesc;
@@ -377,10 +397,7 @@
 		dev_err(chan->dev, "unable to allocate descriptor pool\n");
 		return -ENOMEM;
 	}
-	if (chan->phy) {
-		chan->phy->vchan = NULL;
-		chan->phy = NULL;
-	}
+	mmp_pdma_free_phy(chan);
 	chan->idle = true;
 	chan->dev_addr = 0;
 	return 1;
@@ -411,10 +428,7 @@
 	chan->desc_pool = NULL;
 	chan->idle = true;
 	chan->dev_addr = 0;
-	if (chan->phy) {
-		chan->phy->vchan = NULL;
-		chan->phy = NULL;
-	}
+	mmp_pdma_free_phy(chan);
 	return;
 }
 
@@ -434,6 +448,7 @@
 		return NULL;
 
 	chan = to_mmp_pdma_chan(dchan);
+	chan->byte_align = false;
 
 	if (!chan->dir) {
 		chan->dir = DMA_MEM_TO_MEM;
@@ -450,6 +465,8 @@
 		}
 
 		copy = min_t(size_t, len, PDMA_MAX_DESC_BYTES);
+		if (dma_src & 0x7 || dma_dst & 0x7)
+			chan->byte_align = true;
 
 		new->desc.dcmd = chan->dcmd | (DCMD_LENGTH & copy);
 		new->desc.dsadr = dma_src;
@@ -486,6 +503,8 @@
 	new->desc.ddadr = DDADR_STOP;
 	new->desc.dcmd |= DCMD_ENDIRQEN;
 
+	chan->cyclic_first = NULL;
+
 	return &first->async_tx;
 
 fail:
@@ -509,12 +528,16 @@
 	if ((sgl == NULL) || (sg_len == 0))
 		return NULL;
 
+	chan->byte_align = false;
+
 	for_each_sg(sgl, sg, sg_len, i) {
 		addr = sg_dma_address(sg);
 		avail = sg_dma_len(sgl);
 
 		do {
 			len = min_t(size_t, avail, PDMA_MAX_DESC_BYTES);
+			if (addr & 0x7)
+				chan->byte_align = true;
 
 			/* allocate and populate the descriptor */
 			new = mmp_pdma_alloc_descriptor(chan);
@@ -557,6 +580,94 @@
 	new->desc.ddadr = DDADR_STOP;
 	new->desc.dcmd |= DCMD_ENDIRQEN;
 
+	chan->dir = dir;
+	chan->cyclic_first = NULL;
+
+	return &first->async_tx;
+
+fail:
+	if (first)
+		mmp_pdma_free_desc_list(chan, &first->tx_list);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *mmp_pdma_prep_dma_cyclic(
+	struct dma_chan *dchan, dma_addr_t buf_addr, size_t len,
+	size_t period_len, enum dma_transfer_direction direction,
+	unsigned long flags, void *context)
+{
+	struct mmp_pdma_chan *chan;
+	struct mmp_pdma_desc_sw *first = NULL, *prev = NULL, *new;
+	dma_addr_t dma_src, dma_dst;
+
+	if (!dchan || !len || !period_len)
+		return NULL;
+
+	/* the buffer length must be a multiple of period_len */
+	if (len % period_len != 0)
+		return NULL;
+
+	if (period_len > PDMA_MAX_DESC_BYTES)
+		return NULL;
+
+	chan = to_mmp_pdma_chan(dchan);
+
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		dma_src = buf_addr;
+		dma_dst = chan->dev_addr;
+		break;
+	case DMA_DEV_TO_MEM:
+		dma_dst = buf_addr;
+		dma_src = chan->dev_addr;
+		break;
+	default:
+		dev_err(chan->dev, "Unsupported direction for cyclic DMA\n");
+		return NULL;
+	}
+
+	chan->dir = direction;
+
+	do {
+		/* Allocate the link descriptor from DMA pool */
+		new = mmp_pdma_alloc_descriptor(chan);
+		if (!new) {
+			dev_err(chan->dev, "no memory for desc\n");
+			goto fail;
+		}
+
+		new->desc.dcmd = chan->dcmd | DCMD_ENDIRQEN |
+					(DCMD_LENGTH & period_len);
+		new->desc.dsadr = dma_src;
+		new->desc.dtadr = dma_dst;
+
+		if (!first)
+			first = new;
+		else
+			prev->desc.ddadr = new->async_tx.phys;
+
+		new->async_tx.cookie = 0;
+		async_tx_ack(&new->async_tx);
+
+		prev = new;
+		len -= period_len;
+
+		if (chan->dir == DMA_MEM_TO_DEV)
+			dma_src += period_len;
+		else
+			dma_dst += period_len;
+
+		/* Insert the link descriptor to the LD ring */
+		list_add_tail(&new->node, &first->tx_list);
+	} while (len);
+
+	first->async_tx.flags = flags; /* client is in control of this ack */
+	first->async_tx.cookie = -EBUSY;
+
+	/* make the cyclic link */
+	new->desc.ddadr = first->async_tx.phys;
+	chan->cyclic_first = first;
+
 	return &first->async_tx;
 
 fail:
@@ -581,10 +692,7 @@
 	switch (cmd) {
 	case DMA_TERMINATE_ALL:
 		disable_chan(chan->phy);
-		if (chan->phy) {
-			chan->phy->vchan = NULL;
-			chan->phy = NULL;
-		}
+		mmp_pdma_free_phy(chan);
 		spin_lock_irqsave(&chan->desc_lock, flags);
 		mmp_pdma_free_desc_list(chan, &chan->chain_pending);
 		mmp_pdma_free_desc_list(chan, &chan->chain_running);
@@ -619,8 +727,13 @@
 			chan->dcmd |= DCMD_BURST32;
 
 		chan->dir = cfg->direction;
-		chan->drcmr = cfg->slave_id;
 		chan->dev_addr = addr;
+		/* FIXME: drivers should be ported over to use the filter
+		 * function. Once that's done, the following two lines can
+		 * be removed.
+		 */
+		if (cfg->slave_id)
+			chan->drcmr = cfg->slave_id;
 		break;
 	default:
 		return -ENOSYS;
@@ -632,15 +745,7 @@
 static enum dma_status mmp_pdma_tx_status(struct dma_chan *dchan,
 			dma_cookie_t cookie, struct dma_tx_state *txstate)
 {
-	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
-	enum dma_status ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&chan->desc_lock, flags);
-	ret = dma_cookie_status(dchan, cookie, txstate);
-	spin_unlock_irqrestore(&chan->desc_lock, flags);
-
-	return ret;
+	return dma_cookie_status(dchan, cookie, txstate);
 }
 
 /**
@@ -669,29 +774,51 @@
 	LIST_HEAD(chain_cleanup);
 	unsigned long flags;
 
-	/* submit pending list; callback for each desc; free desc */
+	if (chan->cyclic_first) {
+		dma_async_tx_callback cb = NULL;
+		void *cb_data = NULL;
 
+		spin_lock_irqsave(&chan->desc_lock, flags);
+		desc = chan->cyclic_first;
+		cb = desc->async_tx.callback;
+		cb_data = desc->async_tx.callback_param;
+		spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+		if (cb)
+			cb(cb_data);
+
+		return;
+	}
+
+	/* submit pending list; callback for each desc; free desc */
 	spin_lock_irqsave(&chan->desc_lock, flags);
 
-	/* update the cookie if we have some descriptors to cleanup */
-	if (!list_empty(&chan->chain_running)) {
-		dma_cookie_t cookie;
+	list_for_each_entry_safe(desc, _desc, &chan->chain_running, node) {
+		/*
+		 * move the descriptors to a temporary list so we can drop
+		 * the lock during the entire cleanup operation
+		 */
+		list_del(&desc->node);
+		list_add(&desc->node, &chain_cleanup);
 
-		desc = to_mmp_pdma_desc(chan->chain_running.prev);
-		cookie = desc->async_tx.cookie;
-		dma_cookie_complete(&desc->async_tx);
-
-		dev_dbg(chan->dev, "completed_cookie=%d\n", cookie);
+		/*
+		 * Look for the first list entry which has the ENDIRQEN flag
+		 * set. That is the descriptor we got an interrupt for, so
+		 * complete that transaction and its cookie.
+		 */
+		if (desc->desc.dcmd & DCMD_ENDIRQEN) {
+			dma_cookie_t cookie = desc->async_tx.cookie;
+			dma_cookie_complete(&desc->async_tx);
+			dev_dbg(chan->dev, "completed_cookie=%d\n", cookie);
+			break;
+		}
 	}
 
 	/*
-	 * move the descriptors to a temporary list so we can drop the lock
-	 * during the entire cleanup operation
+	 * The hardware is idle and ready for more when the
+	 * chain_running list is empty.
 	 */
-	list_splice_tail_init(&chan->chain_running, &chain_cleanup);
-
-	/* the hardware is now idle and ready for more */
-	chan->idle = true;
+	chan->idle = list_empty(&chan->chain_running);
 
 	/* Start any pending transactions automatically */
 	start_pending_queue(chan);
@@ -763,6 +890,39 @@
 };
 MODULE_DEVICE_TABLE(of, mmp_pdma_dt_ids);
 
+static struct dma_chan *mmp_pdma_dma_xlate(struct of_phandle_args *dma_spec,
+					   struct of_dma *ofdma)
+{
+	struct mmp_pdma_device *d = ofdma->of_dma_data;
+	struct dma_chan *chan, *candidate;
+
+retry:
+	candidate = NULL;
+
+	/* walk the list of channels registered with the current instance and
+	 * find one that is currently unused */
+	list_for_each_entry(chan, &d->device.channels, device_node)
+		if (chan->client_count == 0) {
+			candidate = chan;
+			break;
+		}
+
+	if (!candidate)
+		return NULL;
+
+	/* dma_get_slave_channel will return NULL if we lost a race between
+	 * the lookup and the reservation */
+	chan = dma_get_slave_channel(candidate);
+
+	if (chan) {
+		struct mmp_pdma_chan *c = to_mmp_pdma_chan(chan);
+		c->drcmr = dma_spec->args[0];
+		return chan;
+	}
+
+	goto retry;
+}
+
 static int mmp_pdma_probe(struct platform_device *op)
 {
 	struct mmp_pdma_device *pdev;
@@ -777,10 +937,9 @@
 		return -ENOMEM;
 	pdev->dev = &op->dev;
 
-	iores = platform_get_resource(op, IORESOURCE_MEM, 0);
-	if (!iores)
-		return -EINVAL;
+	spin_lock_init(&pdev->phy_lock);
 
+	iores = platform_get_resource(op, IORESOURCE_MEM, 0);
 	pdev->base = devm_ioremap_resource(pdev->dev, iores);
 	if (IS_ERR(pdev->base))
 		return PTR_ERR(pdev->base);
@@ -825,13 +984,15 @@
 
 	dma_cap_set(DMA_SLAVE, pdev->device.cap_mask);
 	dma_cap_set(DMA_MEMCPY, pdev->device.cap_mask);
-	dma_cap_set(DMA_SLAVE, pdev->device.cap_mask);
+	dma_cap_set(DMA_CYCLIC, pdev->device.cap_mask);
+	dma_cap_set(DMA_PRIVATE, pdev->device.cap_mask);
 	pdev->device.dev = &op->dev;
 	pdev->device.device_alloc_chan_resources = mmp_pdma_alloc_chan_resources;
 	pdev->device.device_free_chan_resources = mmp_pdma_free_chan_resources;
 	pdev->device.device_tx_status = mmp_pdma_tx_status;
 	pdev->device.device_prep_dma_memcpy = mmp_pdma_prep_memcpy;
 	pdev->device.device_prep_slave_sg = mmp_pdma_prep_slave_sg;
+	pdev->device.device_prep_dma_cyclic = mmp_pdma_prep_dma_cyclic;
 	pdev->device.device_issue_pending = mmp_pdma_issue_pending;
 	pdev->device.device_control = mmp_pdma_control;
 	pdev->device.copy_align = PDMA_ALIGNMENT;
@@ -847,7 +1008,17 @@
 		return ret;
 	}
 
-	dev_info(pdev->device.dev, "initialized\n");
+	if (op->dev.of_node) {
+		/* Device-tree DMA controller registration */
+		ret = of_dma_controller_register(op->dev.of_node,
+						 mmp_pdma_dma_xlate, pdev);
+		if (ret < 0) {
+			dev_err(&op->dev, "of_dma_controller_register failed\n");
+			return ret;
+		}
+	}
+
+	dev_info(pdev->device.dev, "initialized %d channels\n", dma_channels);
 	return 0;
 }
 
@@ -867,6 +1038,19 @@
 	.remove		= mmp_pdma_remove,
 };
 
+bool mmp_pdma_filter_fn(struct dma_chan *chan, void *param)
+{
+	struct mmp_pdma_chan *c = to_mmp_pdma_chan(chan);
+
+	if (chan->device->dev->driver != &mmp_pdma_driver.driver)
+		return false;
+
+	c->drcmr = *(unsigned int *) param;
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(mmp_pdma_filter_fn);
+
 module_platform_driver(mmp_pdma_driver);
 
 MODULE_DESCRIPTION("MARVELL MMP Periphera DMA Driver");
diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c
index 9b93665..38cb517 100644
--- a/drivers/dma/mmp_tdma.c
+++ b/drivers/dma/mmp_tdma.c
@@ -460,7 +460,8 @@
 {
 	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
 
-	dma_set_residue(txstate, tdmac->buf_len - tdmac->pos);
+	dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie,
+			 tdmac->buf_len - tdmac->pos);
 
 	return tdmac->status;
 }
@@ -549,9 +550,6 @@
 	}
 
 	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!iores)
-		return -EINVAL;
-
 	tdev->base = devm_ioremap_resource(&pdev->dev, iores);
 	if (IS_ERR(tdev->base))
 		return PTR_ERR(tdev->base);
diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c
index 2d95673..2fe4353 100644
--- a/drivers/dma/mpc512x_dma.c
+++ b/drivers/dma/mpc512x_dma.c
@@ -556,15 +556,7 @@
 mpc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 	       struct dma_tx_state *txstate)
 {
-	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
-	enum dma_status ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&mchan->lock, flags);
-	ret = dma_cookie_status(chan, cookie, txstate);
-	spin_unlock_irqrestore(&mchan->lock, flags);
-
-	return ret;
+	return dma_cookie_status(chan, cookie, txstate);
 }
 
 /* Prepare descriptor for memory to memory copy */
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 200f1a3..536dcb8 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -64,7 +64,7 @@
 				int src_idx)
 {
 	struct mv_xor_desc *hw_desc = desc->hw_desc;
-	return hw_desc->phy_src_addr[src_idx];
+	return hw_desc->phy_src_addr[mv_phy_src_idx(src_idx)];
 }
 
 
@@ -107,32 +107,32 @@
 				 int index, dma_addr_t addr)
 {
 	struct mv_xor_desc *hw_desc = desc->hw_desc;
-	hw_desc->phy_src_addr[index] = addr;
+	hw_desc->phy_src_addr[mv_phy_src_idx(index)] = addr;
 	if (desc->type == DMA_XOR)
 		hw_desc->desc_command |= (1 << index);
 }
 
 static u32 mv_chan_get_current_desc(struct mv_xor_chan *chan)
 {
-	return __raw_readl(XOR_CURR_DESC(chan));
+	return readl_relaxed(XOR_CURR_DESC(chan));
 }
 
 static void mv_chan_set_next_descriptor(struct mv_xor_chan *chan,
 					u32 next_desc_addr)
 {
-	__raw_writel(next_desc_addr, XOR_NEXT_DESC(chan));
+	writel_relaxed(next_desc_addr, XOR_NEXT_DESC(chan));
 }
 
 static void mv_chan_unmask_interrupts(struct mv_xor_chan *chan)
 {
-	u32 val = __raw_readl(XOR_INTR_MASK(chan));
+	u32 val = readl_relaxed(XOR_INTR_MASK(chan));
 	val |= XOR_INTR_MASK_VALUE << (chan->idx * 16);
-	__raw_writel(val, XOR_INTR_MASK(chan));
+	writel_relaxed(val, XOR_INTR_MASK(chan));
 }
 
 static u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan)
 {
-	u32 intr_cause = __raw_readl(XOR_INTR_CAUSE(chan));
+	u32 intr_cause = readl_relaxed(XOR_INTR_CAUSE(chan));
 	intr_cause = (intr_cause >> (chan->idx * 16)) & 0xFFFF;
 	return intr_cause;
 }
@@ -149,13 +149,13 @@
 {
 	u32 val = ~(1 << (chan->idx * 16));
 	dev_dbg(mv_chan_to_devp(chan), "%s, val 0x%08x\n", __func__, val);
-	__raw_writel(val, XOR_INTR_CAUSE(chan));
+	writel_relaxed(val, XOR_INTR_CAUSE(chan));
 }
 
 static void mv_xor_device_clear_err_status(struct mv_xor_chan *chan)
 {
 	u32 val = 0xFFFF0000 >> (chan->idx * 16);
-	__raw_writel(val, XOR_INTR_CAUSE(chan));
+	writel_relaxed(val, XOR_INTR_CAUSE(chan));
 }
 
 static int mv_can_chain(struct mv_xor_desc_slot *desc)
@@ -173,7 +173,7 @@
 			       enum dma_transaction_type type)
 {
 	u32 op_mode;
-	u32 config = __raw_readl(XOR_CONFIG(chan));
+	u32 config = readl_relaxed(XOR_CONFIG(chan));
 
 	switch (type) {
 	case DMA_XOR:
@@ -192,7 +192,14 @@
 
 	config &= ~0x7;
 	config |= op_mode;
-	__raw_writel(config, XOR_CONFIG(chan));
+
+#if defined(__BIG_ENDIAN)
+	config |= XOR_DESCRIPTOR_SWAP;
+#else
+	config &= ~XOR_DESCRIPTOR_SWAP;
+#endif
+
+	writel_relaxed(config, XOR_CONFIG(chan));
 	chan->current_type = type;
 }
 
@@ -201,14 +208,14 @@
 	u32 activation;
 
 	dev_dbg(mv_chan_to_devp(chan), " activate chan.\n");
-	activation = __raw_readl(XOR_ACTIVATION(chan));
+	activation = readl_relaxed(XOR_ACTIVATION(chan));
 	activation |= 0x1;
-	__raw_writel(activation, XOR_ACTIVATION(chan));
+	writel_relaxed(activation, XOR_ACTIVATION(chan));
 }
 
 static char mv_chan_is_busy(struct mv_xor_chan *chan)
 {
-	u32 state = __raw_readl(XOR_ACTIVATION(chan));
+	u32 state = readl_relaxed(XOR_ACTIVATION(chan));
 
 	state = (state >> 4) & 0x3;
 
@@ -647,7 +654,7 @@
 
 	dev_dbg(mv_chan_to_devp(mv_chan),
 		"%s sw_desc %p async_tx %p\n",
-		__func__, sw_desc, sw_desc ? &sw_desc->async_tx : 0);
+		__func__, sw_desc, sw_desc ? &sw_desc->async_tx : NULL);
 
 	return sw_desc ? &sw_desc->async_tx : NULL;
 }
@@ -755,22 +762,22 @@
 {
 	u32 val;
 
-	val = __raw_readl(XOR_CONFIG(chan));
+	val = readl_relaxed(XOR_CONFIG(chan));
 	dev_err(mv_chan_to_devp(chan), "config       0x%08x\n", val);
 
-	val = __raw_readl(XOR_ACTIVATION(chan));
+	val = readl_relaxed(XOR_ACTIVATION(chan));
 	dev_err(mv_chan_to_devp(chan), "activation   0x%08x\n", val);
 
-	val = __raw_readl(XOR_INTR_CAUSE(chan));
+	val = readl_relaxed(XOR_INTR_CAUSE(chan));
 	dev_err(mv_chan_to_devp(chan), "intr cause   0x%08x\n", val);
 
-	val = __raw_readl(XOR_INTR_MASK(chan));
+	val = readl_relaxed(XOR_INTR_MASK(chan));
 	dev_err(mv_chan_to_devp(chan), "intr mask    0x%08x\n", val);
 
-	val = __raw_readl(XOR_ERROR_CAUSE(chan));
+	val = readl_relaxed(XOR_ERROR_CAUSE(chan));
 	dev_err(mv_chan_to_devp(chan), "error cause  0x%08x\n", val);
 
-	val = __raw_readl(XOR_ERROR_ADDR(chan));
+	val = readl_relaxed(XOR_ERROR_ADDR(chan));
 	dev_err(mv_chan_to_devp(chan), "error addr   0x%08x\n", val);
 }
 
@@ -1029,10 +1036,8 @@
 	struct dma_device *dma_dev;
 
 	mv_chan = devm_kzalloc(&pdev->dev, sizeof(*mv_chan), GFP_KERNEL);
-	if (!mv_chan) {
-		ret = -ENOMEM;
-		goto err_free_dma;
-	}
+	if (!mv_chan)
+		return ERR_PTR(-ENOMEM);
 
 	mv_chan->idx = idx;
 	mv_chan->irq = irq;
@@ -1166,7 +1171,7 @@
 {
 	const struct mbus_dram_target_info *dram;
 	struct mv_xor_device *xordev;
-	struct mv_xor_platform_data *pdata = pdev->dev.platform_data;
+	struct mv_xor_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	struct resource *res;
 	int i, ret;
 
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
index c619359..06b067f 100644
--- a/drivers/dma/mv_xor.h
+++ b/drivers/dma/mv_xor.h
@@ -29,8 +29,10 @@
 #define MV_XOR_THRESHOLD		1
 #define MV_XOR_MAX_CHANNELS             2
 
+/* Values for the XOR_CONFIG register */
 #define XOR_OPERATION_MODE_XOR		0
 #define XOR_OPERATION_MODE_MEMCPY	2
+#define XOR_DESCRIPTOR_SWAP		BIT(14)
 
 #define XOR_CURR_DESC(chan)	(chan->mmr_base + 0x210 + (chan->idx * 4))
 #define XOR_NEXT_DESC(chan)	(chan->mmr_base + 0x200 + (chan->idx * 4))
@@ -143,7 +145,16 @@
 #endif
 };
 
-/* This structure describes XOR descriptor size 64bytes	*/
+/*
+ * This structure describes XOR descriptor size 64bytes. The
+ * mv_phy_src_idx() macro must be used when indexing the values of the
+ * phy_src_addr[] array. This is due to the fact that the 'descriptor
+ * swap' feature, used on big endian systems, swaps descriptors data
+ * within blocks of 8 bytes. So two consecutive values of the
+ * phy_src_addr[] array are actually swapped in big-endian, which
+ * explains the different mv_phy_src_idx() implementation.
+ */
+#if defined(__LITTLE_ENDIAN)
 struct mv_xor_desc {
 	u32 status;		/* descriptor execution status */
 	u32 crc32_result;	/* result of CRC-32 calculation */
@@ -155,6 +166,21 @@
 	u32 reserved0;
 	u32 reserved1;
 };
+#define mv_phy_src_idx(src_idx) (src_idx)
+#else
+struct mv_xor_desc {
+	u32 crc32_result;	/* result of CRC-32 calculation */
+	u32 status;		/* descriptor execution status */
+	u32 phy_next_desc;	/* next descriptor address pointer */
+	u32 desc_command;	/* type of operation to be carried out */
+	u32 phy_dest_addr;	/* destination block address */
+	u32 byte_count;		/* size of src/dst blocks in bytes */
+	u32 phy_src_addr[8];	/* source block addresses */
+	u32 reserved1;
+	u32 reserved0;
+};
+#define mv_phy_src_idx(src_idx) (src_idx ^ 1)
+#endif
 
 #define to_mv_sw_desc(addr_hw_desc)		\
 	container_of(addr_hw_desc, struct mv_xor_desc_slot, hw_desc)
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
index 7195930..ccd13df 100644
--- a/drivers/dma/mxs-dma.c
+++ b/drivers/dma/mxs-dma.c
@@ -23,7 +23,6 @@
 #include <linux/dmaengine.h>
 #include <linux/delay.h>
 #include <linux/module.h>
-#include <linux/fsl/mxs-dma.h>
 #include <linux/stmp_device.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -197,24 +196,6 @@
 	return container_of(chan, struct mxs_dma_chan, chan);
 }
 
-int mxs_dma_is_apbh(struct dma_chan *chan)
-{
-	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
-	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
-
-	return dma_is_apbh(mxs_dma);
-}
-EXPORT_SYMBOL_GPL(mxs_dma_is_apbh);
-
-int mxs_dma_is_apbx(struct dma_chan *chan)
-{
-	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
-	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
-
-	return !dma_is_apbh(mxs_dma);
-}
-EXPORT_SYMBOL_GPL(mxs_dma_is_apbx);
-
 static void mxs_dma_reset_chan(struct mxs_dma_chan *mxs_chan)
 {
 	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
@@ -349,13 +330,9 @@
 static int mxs_dma_alloc_chan_resources(struct dma_chan *chan)
 {
 	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
-	struct mxs_dma_data *data = chan->private;
 	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
 	int ret;
 
-	if (data)
-		mxs_chan->chan_irq = data->chan_irq;
-
 	mxs_chan->ccw = dma_alloc_coherent(mxs_dma->dma_device.dev,
 				CCW_BLOCK_SIZE, &mxs_chan->ccw_phys,
 				GFP_KERNEL);
@@ -622,10 +599,8 @@
 			dma_cookie_t cookie, struct dma_tx_state *txstate)
 {
 	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
-	dma_cookie_t last_used;
 
-	last_used = chan->cookie;
-	dma_set_tx_state(txstate, chan->completed_cookie, last_used, 0);
+	dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie, 0);
 
 	return mxs_chan->status;
 }
diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c
index 75334bd..0b88dd3 100644
--- a/drivers/dma/of-dma.c
+++ b/drivers/dma/of-dma.c
@@ -160,7 +160,8 @@
 
 	count = of_property_count_strings(np, "dma-names");
 	if (count < 0) {
-		pr_err("%s: dma-names property missing or empty\n", __func__);
+		pr_err("%s: dma-names property of node '%s' missing or empty\n",
+			__func__, np->full_name);
 		return NULL;
 	}
 
diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index 0bbdea50..61fdc54 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c
@@ -564,14 +564,7 @@
 static enum dma_status pd_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 				    struct dma_tx_state *txstate)
 {
-	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
-	enum dma_status ret;
-
-	spin_lock_irq(&pd_chan->lock);
-	ret = dma_cookie_status(chan, cookie, txstate);
-	spin_unlock_irq(&pd_chan->lock);
-
-	return ret;
+	return dma_cookie_status(chan, cookie, txstate);
 }
 
 static void pd_issue_pending(struct dma_chan *chan)
@@ -1036,3 +1029,4 @@
 		   "DMA controller driver");
 MODULE_AUTHOR("Yong Wang <yong.y.wang@intel.com>");
 MODULE_LICENSE("GPL v2");
+MODULE_DEVICE_TABLE(pci, pch_dma_id_table);
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index fa645d8..a562d24 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -545,6 +545,8 @@
 
 	/* List of to be xfered descriptors */
 	struct list_head work_list;
+	/* List of completed descriptors */
+	struct list_head completed_list;
 
 	/* Pointer to the DMAC that manages this channel,
 	 * NULL if the channel is available to be acquired.
@@ -2198,66 +2200,6 @@
 	return container_of(tx, struct dma_pl330_desc, txd);
 }
 
-static inline void free_desc_list(struct list_head *list)
-{
-	struct dma_pl330_dmac *pdmac;
-	struct dma_pl330_desc *desc;
-	struct dma_pl330_chan *pch = NULL;
-	unsigned long flags;
-
-	/* Finish off the work list */
-	list_for_each_entry(desc, list, node) {
-		dma_async_tx_callback callback;
-		void *param;
-
-		/* All desc in a list belong to same channel */
-		pch = desc->pchan;
-		callback = desc->txd.callback;
-		param = desc->txd.callback_param;
-
-		if (callback)
-			callback(param);
-
-		desc->pchan = NULL;
-	}
-
-	/* pch will be unset if list was empty */
-	if (!pch)
-		return;
-
-	pdmac = pch->dmac;
-
-	spin_lock_irqsave(&pdmac->pool_lock, flags);
-	list_splice_tail_init(list, &pdmac->desc_pool);
-	spin_unlock_irqrestore(&pdmac->pool_lock, flags);
-}
-
-static inline void handle_cyclic_desc_list(struct list_head *list)
-{
-	struct dma_pl330_desc *desc;
-	struct dma_pl330_chan *pch = NULL;
-	unsigned long flags;
-
-	list_for_each_entry(desc, list, node) {
-		dma_async_tx_callback callback;
-
-		/* Change status to reload it */
-		desc->status = PREP;
-		pch = desc->pchan;
-		callback = desc->txd.callback;
-		if (callback)
-			callback(desc->txd.callback_param);
-	}
-
-	/* pch will be unset if list was empty */
-	if (!pch)
-		return;
-
-	spin_lock_irqsave(&pch->lock, flags);
-	list_splice_tail_init(list, &pch->work_list);
-	spin_unlock_irqrestore(&pch->lock, flags);
-}
-
 static inline void fill_queue(struct dma_pl330_chan *pch)
 {
 	struct dma_pl330_desc *desc;
@@ -2291,7 +2233,6 @@
 	struct dma_pl330_chan *pch = (struct dma_pl330_chan *)data;
 	struct dma_pl330_desc *desc, *_dt;
 	unsigned long flags;
-	LIST_HEAD(list);
 
 	spin_lock_irqsave(&pch->lock, flags);
 
@@ -2300,7 +2241,7 @@
 		if (desc->status == DONE) {
 			if (!pch->cyclic)
 				dma_cookie_complete(&desc->txd);
-			list_move_tail(&desc->node, &list);
+			list_move_tail(&desc->node, &pch->completed_list);
 		}
 
 	/* Try to submit a req imm. next to the last completed cookie */
@@ -2309,12 +2250,31 @@
 	/* Make sure the PL330 Channel thread is active */
 	pl330_chan_ctrl(pch->pl330_chid, PL330_OP_START);
 
-	spin_unlock_irqrestore(&pch->lock, flags);
+	while (!list_empty(&pch->completed_list)) {
+		dma_async_tx_callback callback;
+		void *callback_param;
 
-	if (pch->cyclic)
-		handle_cyclic_desc_list(&list);
-	else
-		free_desc_list(&list);
+		desc = list_first_entry(&pch->completed_list,
+					struct dma_pl330_desc, node);
+
+		callback = desc->txd.callback;
+		callback_param = desc->txd.callback_param;
+
+		if (pch->cyclic) {
+			desc->status = PREP;
+			list_move_tail(&desc->node, &pch->work_list);
+		} else {
+			desc->status = FREE;
+			list_move_tail(&desc->node, &pch->dmac->desc_pool);
+		}
+
+		if (callback) {
+			spin_unlock_irqrestore(&pch->lock, flags);
+			callback(callback_param);
+			spin_lock_irqsave(&pch->lock, flags);
+		}
+	}
+	spin_unlock_irqrestore(&pch->lock, flags);
 }
 
 static void dma_pl330_rqcb(void *token, enum pl330_op_err err)
@@ -2409,7 +2369,7 @@
 static int pl330_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, unsigned long arg)
 {
 	struct dma_pl330_chan *pch = to_pchan(chan);
-	struct dma_pl330_desc *desc, *_dt;
+	struct dma_pl330_desc *desc;
 	unsigned long flags;
 	struct dma_pl330_dmac *pdmac = pch->dmac;
 	struct dma_slave_config *slave_config;
@@ -2423,12 +2383,18 @@
 		pl330_chan_ctrl(pch->pl330_chid, PL330_OP_FLUSH);
 
 		/* Mark all desc done */
-		list_for_each_entry_safe(desc, _dt, &pch->work_list , node) {
-			desc->status = DONE;
-			list_move_tail(&desc->node, &list);
+		list_for_each_entry(desc, &pch->work_list , node) {
+			desc->status = FREE;
+			dma_cookie_complete(&desc->txd);
 		}
 
-		list_splice_tail_init(&list, &pdmac->desc_pool);
+		list_for_each_entry(desc, &pch->completed_list , node) {
+			desc->status = FREE;
+			dma_cookie_complete(&desc->txd);
+		}
+
+		list_splice_tail_init(&pch->work_list, &pdmac->desc_pool);
+		list_splice_tail_init(&pch->completed_list, &pdmac->desc_pool);
 		spin_unlock_irqrestore(&pch->lock, flags);
 		break;
 	case DMA_SLAVE_CONFIG:
@@ -2814,6 +2780,28 @@
 	return &desc->txd;
 }
 
+static void __pl330_giveback_desc(struct dma_pl330_dmac *pdmac,
+				  struct dma_pl330_desc *first)
+{
+	unsigned long flags;
+	struct dma_pl330_desc *desc;
+
+	if (!first)
+		return;
+
+	spin_lock_irqsave(&pdmac->pool_lock, flags);
+
+	while (!list_empty(&first->node)) {
+		desc = list_entry(first->node.next,
+				struct dma_pl330_desc, node);
+		list_move_tail(&desc->node, &pdmac->desc_pool);
+	}
+
+	list_move_tail(&first->node, &pdmac->desc_pool);
+
+	spin_unlock_irqrestore(&pdmac->pool_lock, flags);
+}
+
 static struct dma_async_tx_descriptor *
 pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
@@ -2822,7 +2810,6 @@
 	struct dma_pl330_desc *first, *desc = NULL;
 	struct dma_pl330_chan *pch = to_pchan(chan);
 	struct scatterlist *sg;
-	unsigned long flags;
 	int i;
 	dma_addr_t addr;
 
@@ -2842,20 +2829,7 @@
 			dev_err(pch->dmac->pif.dev,
 				"%s:%d Unable to fetch desc\n",
 				__func__, __LINE__);
-			if (!first)
-				return NULL;
-
-			spin_lock_irqsave(&pdmac->pool_lock, flags);
-
-			while (!list_empty(&first->node)) {
-				desc = list_entry(first->node.next,
-						struct dma_pl330_desc, node);
-				list_move_tail(&desc->node, &pdmac->desc_pool);
-			}
-
-			list_move_tail(&first->node, &pdmac->desc_pool);
-
-			spin_unlock_irqrestore(&pdmac->pool_lock, flags);
+			__pl330_giveback_desc(pdmac, first);
 
 			return NULL;
 		}
@@ -2896,6 +2870,25 @@
 		return IRQ_NONE;
 }
 
+#define PL330_DMA_BUSWIDTHS \
+	BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \
+	BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+	BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+	BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
+	BIT(DMA_SLAVE_BUSWIDTH_8_BYTES)
+
+static int pl330_dma_device_slave_caps(struct dma_chan *dchan,
+	struct dma_slave_caps *caps)
+{
+	caps->src_addr_widths = PL330_DMA_BUSWIDTHS;
+	caps->dstn_addr_widths = PL330_DMA_BUSWIDTHS;
+	caps->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	caps->cmd_pause = false;
+	caps->cmd_terminate = true;
+
+	return 0;
+}
+
 static int
 pl330_probe(struct amba_device *adev, const struct amba_id *id)
 {
@@ -2908,7 +2901,7 @@
 	int i, ret, irq;
 	int num_chan;
 
-	pdat = adev->dev.platform_data;
+	pdat = dev_get_platdata(&adev->dev);
 
 	/* Allocate a new DMAC and its Channels */
 	pdmac = devm_kzalloc(&adev->dev, sizeof(*pdmac), GFP_KERNEL);
@@ -2971,6 +2964,7 @@
 			pch->chan.private = adev->dev.of_node;
 
 		INIT_LIST_HEAD(&pch->work_list);
+		INIT_LIST_HEAD(&pch->completed_list);
 		spin_lock_init(&pch->lock);
 		pch->pl330_chid = NULL;
 		pch->chan.device = pd;
@@ -3000,6 +2994,7 @@
 	pd->device_prep_slave_sg = pl330_prep_slave_sg;
 	pd->device_control = pl330_control;
 	pd->device_issue_pending = pl330_issue_pending;
+	pd->device_slave_caps = pl330_dma_device_slave_caps;
 
 	ret = dma_async_device_register(pd);
 	if (ret) {
@@ -3015,6 +3010,14 @@
 			"unable to register DMA to the generic DT DMA helpers\n");
 		}
 	}
+	/*
+	 * This is the limit for transfers with a buswidth of 1, larger
+	 * buswidths will have larger limits.
+	 */
+	ret = dma_set_max_seg_size(&adev->dev, 1900800);
+	if (ret)
+		dev_err(&adev->dev, "unable to set the seg size\n");
+
 
 	dev_info(&adev->dev,
 		"Loaded driver for PL330 DMAC-%d\n", adev->periphid);
diff --git a/drivers/dma/sh/Kconfig b/drivers/dma/sh/Kconfig
index 5c1dee2..dadd9e01 100644
--- a/drivers/dma/sh/Kconfig
+++ b/drivers/dma/sh/Kconfig
@@ -22,3 +22,13 @@
 	depends on SH_DMAE_BASE
 	help
 	  Enable support for the Renesas SUDMAC controllers.
+
+config RCAR_HPB_DMAE
+	tristate "Renesas R-Car HPB DMAC support"
+	depends on SH_DMAE_BASE
+	help
+	  Enable support for the Renesas R-Car series DMA controllers.
+
+config SHDMA_R8A73A4
+	def_bool y
+	depends on ARCH_R8A73A4 && SH_DMAE != n
diff --git a/drivers/dma/sh/Makefile b/drivers/dma/sh/Makefile
index c962138..e856af2 100644
--- a/drivers/dma/sh/Makefile
+++ b/drivers/dma/sh/Makefile
@@ -1,3 +1,9 @@
 obj-$(CONFIG_SH_DMAE_BASE) += shdma-base.o shdma-of.o
 obj-$(CONFIG_SH_DMAE) += shdma.o
+shdma-y := shdmac.o
+ifeq ($(CONFIG_OF),y)
+shdma-$(CONFIG_SHDMA_R8A73A4) += shdma-r8a73a4.o
+endif
+shdma-objs := $(shdma-y)
 obj-$(CONFIG_SUDMAC) += sudmac.o
+obj-$(CONFIG_RCAR_HPB_DMAE) += rcar-hpbdma.o
diff --git a/drivers/dma/sh/rcar-hpbdma.c b/drivers/dma/sh/rcar-hpbdma.c
new file mode 100644
index 0000000..45a5202
--- /dev/null
+++ b/drivers/dma/sh/rcar-hpbdma.c
@@ -0,0 +1,655 @@
+/*
+ * Copyright (C) 2011-2013 Renesas Electronics Corporation
+ * Copyright (C) 2013 Cogent Embedded, Inc.
+ *
+ * This file is based on the drivers/dma/sh/shdma.c
+ *
+ * Renesas SuperH DMA Engine support
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * - DMA of SuperH does not have Hardware DMA chain mode.
+ * - max DMA size is 16MB.
+ *
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/platform_data/dma-rcar-hpbdma.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/shdma-base.h>
+#include <linux/slab.h>
+
+/* DMA channel registers */
+#define HPB_DMAE_DSAR0	0x00
+#define HPB_DMAE_DDAR0	0x04
+#define HPB_DMAE_DTCR0	0x08
+#define HPB_DMAE_DSAR1	0x0C
+#define HPB_DMAE_DDAR1	0x10
+#define HPB_DMAE_DTCR1	0x14
+#define HPB_DMAE_DSASR	0x18
+#define HPB_DMAE_DDASR	0x1C
+#define HPB_DMAE_DTCSR	0x20
+#define HPB_DMAE_DPTR	0x24
+#define HPB_DMAE_DCR	0x28
+#define HPB_DMAE_DCMDR	0x2C
+#define HPB_DMAE_DSTPR	0x30
+#define HPB_DMAE_DSTSR	0x34
+#define HPB_DMAE_DDBGR	0x38
+#define HPB_DMAE_DDBGR2	0x3C
+#define HPB_DMAE_CHAN(n)	(0x40 * (n))
+
+/* DMA command register (DCMDR) bits */
+#define HPB_DMAE_DCMDR_BDOUT	BIT(7)
+#define HPB_DMAE_DCMDR_DQSPD	BIT(6)
+#define HPB_DMAE_DCMDR_DQSPC	BIT(5)
+#define HPB_DMAE_DCMDR_DMSPD	BIT(4)
+#define HPB_DMAE_DCMDR_DMSPC	BIT(3)
+#define HPB_DMAE_DCMDR_DQEND	BIT(2)
+#define HPB_DMAE_DCMDR_DNXT	BIT(1)
+#define HPB_DMAE_DCMDR_DMEN	BIT(0)
+
+/* DMA forced stop register (DSTPR) bits */
+#define HPB_DMAE_DSTPR_DMSTP	BIT(0)
+
+/* DMA status register (DSTSR) bits */
+#define HPB_DMAE_DSTSR_DMSTS	BIT(0)
+
+/* DMA common registers */
+#define HPB_DMAE_DTIMR		0x00
+#define HPB_DMAE_DINTSR0		0x0C
+#define HPB_DMAE_DINTSR1		0x10
+#define HPB_DMAE_DINTCR0		0x14
+#define HPB_DMAE_DINTCR1		0x18
+#define HPB_DMAE_DINTMR0		0x1C
+#define HPB_DMAE_DINTMR1		0x20
+#define HPB_DMAE_DACTSR0		0x24
+#define HPB_DMAE_DACTSR1		0x28
+#define HPB_DMAE_HSRSTR(n)	(0x40 + (n) * 4)
+#define HPB_DMAE_HPB_DMASPR(n)	(0x140 + (n) * 4)
+#define HPB_DMAE_HPB_DMLVLR0	0x160
+#define HPB_DMAE_HPB_DMLVLR1	0x164
+#define HPB_DMAE_HPB_DMSHPT0	0x168
+#define HPB_DMAE_HPB_DMSHPT1	0x16C
+
+#define HPB_DMA_SLAVE_NUMBER 256
+#define HPB_DMA_TCR_MAX 0x01000000	/* 16 MiB */
+
+struct hpb_dmae_chan {
+	struct shdma_chan shdma_chan;
+	int xfer_mode;			/* DMA transfer mode */
+#define XFER_SINGLE	1
+#define XFER_DOUBLE	2
+	unsigned plane_idx;		/* current DMA information set */
+	bool first_desc;		/* first/next transfer */
+	int xmit_shift;			/* log_2(bytes_per_xfer) */
+	void __iomem *base;
+	const struct hpb_dmae_slave_config *cfg;
+	char dev_id[16];		/* unique name per DMAC of channel */
+};
+
+struct hpb_dmae_device {
+	struct shdma_dev shdma_dev;
+	spinlock_t reg_lock;		/* comm_reg operation lock */
+	struct hpb_dmae_pdata *pdata;
+	void __iomem *chan_reg;
+	void __iomem *comm_reg;
+	void __iomem *reset_reg;
+	void __iomem *mode_reg;
+};
+
+struct hpb_dmae_regs {
+	u32 sar; /* SAR / source address */
+	u32 dar; /* DAR / destination address */
+	u32 tcr; /* TCR / transfer count */
+};
+
+struct hpb_desc {
+	struct shdma_desc shdma_desc;
+	struct hpb_dmae_regs hw;
+	unsigned plane_idx;
+};
+
+#define to_chan(schan) container_of(schan, struct hpb_dmae_chan, shdma_chan)
+#define to_desc(sdesc) container_of(sdesc, struct hpb_desc, shdma_desc)
+#define to_dev(sc) container_of(sc->shdma_chan.dma_chan.device, \
+				struct hpb_dmae_device, shdma_dev.dma_dev)
+
+static void ch_reg_write(struct hpb_dmae_chan *hpb_dc, u32 data, u32 reg)
+{
+	iowrite32(data, hpb_dc->base + reg);
+}
+
+static u32 ch_reg_read(struct hpb_dmae_chan *hpb_dc, u32 reg)
+{
+	return ioread32(hpb_dc->base + reg);
+}
+
+static void dcmdr_write(struct hpb_dmae_device *hpbdev, u32 data)
+{
+	iowrite32(data, hpbdev->chan_reg + HPB_DMAE_DCMDR);
+}
+
+static void hsrstr_write(struct hpb_dmae_device *hpbdev, u32 ch)
+{
+	iowrite32(0x1, hpbdev->comm_reg + HPB_DMAE_HSRSTR(ch));
+}
+
+static u32 dintsr_read(struct hpb_dmae_device *hpbdev, u32 ch)
+{
+	u32 v;
+
+	if (ch < 32)
+		v = ioread32(hpbdev->comm_reg + HPB_DMAE_DINTSR0) >> ch;
+	else
+		v = ioread32(hpbdev->comm_reg + HPB_DMAE_DINTSR1) >> (ch - 32);
+	return v & 0x1;
+}
+
+static void dintcr_write(struct hpb_dmae_device *hpbdev, u32 ch)
+{
+	if (ch < 32)
+		iowrite32((0x1 << ch), hpbdev->comm_reg + HPB_DMAE_DINTCR0);
+	else
+		iowrite32((0x1 << (ch - 32)),
+			  hpbdev->comm_reg + HPB_DMAE_DINTCR1);
+}
+
+static void asyncmdr_write(struct hpb_dmae_device *hpbdev, u32 data)
+{
+	iowrite32(data, hpbdev->mode_reg);
+}
+
+static u32 asyncmdr_read(struct hpb_dmae_device *hpbdev)
+{
+	return ioread32(hpbdev->mode_reg);
+}
+
+static void hpb_dmae_enable_int(struct hpb_dmae_device *hpbdev, u32 ch)
+{
+	u32 intreg;
+
+	spin_lock_irq(&hpbdev->reg_lock);
+	if (ch < 32) {
+		intreg = ioread32(hpbdev->comm_reg + HPB_DMAE_DINTMR0);
+		iowrite32(BIT(ch) | intreg,
+			  hpbdev->comm_reg + HPB_DMAE_DINTMR0);
+	} else {
+		intreg = ioread32(hpbdev->comm_reg + HPB_DMAE_DINTMR1);
+		iowrite32(BIT(ch - 32) | intreg,
+			  hpbdev->comm_reg + HPB_DMAE_DINTMR1);
+	}
+	spin_unlock_irq(&hpbdev->reg_lock);
+}
+
+static void hpb_dmae_async_reset(struct hpb_dmae_device *hpbdev, u32 data)
+{
+	u32 rstr;
+	int timeout = 10000;	/* 100 ms */
+
+	spin_lock(&hpbdev->reg_lock);
+	rstr = ioread32(hpbdev->reset_reg);
+	rstr |= data;
+	iowrite32(rstr, hpbdev->reset_reg);
+	do {
+		rstr = ioread32(hpbdev->reset_reg);
+		if ((rstr & data) == data)
+			break;
+		udelay(10);
+	} while (timeout--);
+
+	if (timeout < 0)
+		dev_err(hpbdev->shdma_dev.dma_dev.dev,
+			"%s timeout\n", __func__);
+
+	rstr &= ~data;
+	iowrite32(rstr, hpbdev->reset_reg);
+	spin_unlock(&hpbdev->reg_lock);
+}
+
+static void hpb_dmae_set_async_mode(struct hpb_dmae_device *hpbdev,
+				    u32 mask, u32 data)
+{
+	u32 mode;
+
+	spin_lock_irq(&hpbdev->reg_lock);
+	mode = asyncmdr_read(hpbdev);
+	mode &= ~mask;
+	mode |= data;
+	asyncmdr_write(hpbdev, mode);
+	spin_unlock_irq(&hpbdev->reg_lock);
+}
+
+static void hpb_dmae_ctl_stop(struct hpb_dmae_device *hpbdev)
+{
+	dcmdr_write(hpbdev, HPB_DMAE_DCMDR_DQSPD);
+}
+
+static void hpb_dmae_reset(struct hpb_dmae_device *hpbdev)
+{
+	u32 ch;
+
+	for (ch = 0; ch < hpbdev->pdata->num_hw_channels; ch++)
+		hsrstr_write(hpbdev, ch);
+}
+
+static unsigned int calc_xmit_shift(struct hpb_dmae_chan *hpb_chan)
+{
+	struct hpb_dmae_device *hpbdev = to_dev(hpb_chan);
+	struct hpb_dmae_pdata *pdata = hpbdev->pdata;
+	int width = ch_reg_read(hpb_chan, HPB_DMAE_DCR);
+	int i;
+
+	switch (width & (HPB_DMAE_DCR_SPDS_MASK | HPB_DMAE_DCR_DPDS_MASK)) {
+	case HPB_DMAE_DCR_SPDS_8BIT | HPB_DMAE_DCR_DPDS_8BIT:
+	default:
+		i = XMIT_SZ_8BIT;
+		break;
+	case HPB_DMAE_DCR_SPDS_16BIT | HPB_DMAE_DCR_DPDS_16BIT:
+		i = XMIT_SZ_16BIT;
+		break;
+	case HPB_DMAE_DCR_SPDS_32BIT | HPB_DMAE_DCR_DPDS_32BIT:
+		i = XMIT_SZ_32BIT;
+		break;
+	}
+	return pdata->ts_shift[i];
+}
+
+static void hpb_dmae_set_reg(struct hpb_dmae_chan *hpb_chan,
+			     struct hpb_dmae_regs *hw, unsigned plane)
+{
+	ch_reg_write(hpb_chan, hw->sar,
+		     plane ? HPB_DMAE_DSAR1 : HPB_DMAE_DSAR0);
+	ch_reg_write(hpb_chan, hw->dar,
+		     plane ? HPB_DMAE_DDAR1 : HPB_DMAE_DDAR0);
+	ch_reg_write(hpb_chan, hw->tcr >> hpb_chan->xmit_shift,
+		     plane ? HPB_DMAE_DTCR1 : HPB_DMAE_DTCR0);
+}
+
+static void hpb_dmae_start(struct hpb_dmae_chan *hpb_chan, bool next)
+{
+	ch_reg_write(hpb_chan, (next ? HPB_DMAE_DCMDR_DNXT : 0) |
+		     HPB_DMAE_DCMDR_DMEN, HPB_DMAE_DCMDR);
+}
+
+static void hpb_dmae_halt(struct shdma_chan *schan)
+{
+	struct hpb_dmae_chan *chan = to_chan(schan);
+
+	ch_reg_write(chan, HPB_DMAE_DCMDR_DQEND, HPB_DMAE_DCMDR);
+	ch_reg_write(chan, HPB_DMAE_DSTPR_DMSTP, HPB_DMAE_DSTPR);
+}
+
+static const struct hpb_dmae_slave_config *
+hpb_dmae_find_slave(struct hpb_dmae_chan *hpb_chan, int slave_id)
+{
+	struct hpb_dmae_device *hpbdev = to_dev(hpb_chan);
+	struct hpb_dmae_pdata *pdata = hpbdev->pdata;
+	int i;
+
+	if (slave_id >= HPB_DMA_SLAVE_NUMBER)
+		return NULL;
+
+	for (i = 0; i < pdata->num_slaves; i++)
+		if (pdata->slaves[i].id == slave_id)
+			return pdata->slaves + i;
+
+	return NULL;
+}
+
+static void hpb_dmae_start_xfer(struct shdma_chan *schan,
+				struct shdma_desc *sdesc)
+{
+	struct hpb_dmae_chan *chan = to_chan(schan);
+	struct hpb_dmae_device *hpbdev = to_dev(chan);
+	struct hpb_desc *desc = to_desc(sdesc);
+
+	if (chan->cfg->flags & HPB_DMAE_SET_ASYNC_RESET)
+		hpb_dmae_async_reset(hpbdev, chan->cfg->rstr);
+
+	desc->plane_idx = chan->plane_idx;
+	hpb_dmae_set_reg(chan, &desc->hw, chan->plane_idx);
+	hpb_dmae_start(chan, !chan->first_desc);
+
+	if (chan->xfer_mode == XFER_DOUBLE) {
+		chan->plane_idx ^= 1;
+		chan->first_desc = false;
+	}
+}
+
+static bool hpb_dmae_desc_completed(struct shdma_chan *schan,
+				    struct shdma_desc *sdesc)
+{
+	/*
+	 * This is correct since we always have at most single
+	 * outstanding DMA transfer per channel, and by the time
+	 * we get completion interrupt the transfer is completed.
+	 * This will change if we ever use alternating DMA
+	 * information sets and submit two descriptors at once.
+	 */
+	return true;
+}
+
+static bool hpb_dmae_chan_irq(struct shdma_chan *schan, int irq)
+{
+	struct hpb_dmae_chan *chan = to_chan(schan);
+	struct hpb_dmae_device *hpbdev = to_dev(chan);
+	int ch = chan->cfg->dma_ch;
+
+	/* Check Complete DMA Transfer */
+	if (dintsr_read(hpbdev, ch)) {
+		/* Clear Interrupt status */
+		dintcr_write(hpbdev, ch);
+		return true;
+	}
+	return false;
+}
+
+static int hpb_dmae_desc_setup(struct shdma_chan *schan,
+			       struct shdma_desc *sdesc,
+			       dma_addr_t src, dma_addr_t dst, size_t *len)
+{
+	struct hpb_desc *desc = to_desc(sdesc);
+
+	if (*len > (size_t)HPB_DMA_TCR_MAX)
+		*len = (size_t)HPB_DMA_TCR_MAX;
+
+	desc->hw.sar = src;
+	desc->hw.dar = dst;
+	desc->hw.tcr = *len;
+
+	return 0;
+}
+
+static size_t hpb_dmae_get_partial(struct shdma_chan *schan,
+				   struct shdma_desc *sdesc)
+{
+	struct hpb_desc *desc = to_desc(sdesc);
+	struct hpb_dmae_chan *chan = to_chan(schan);
+	u32 tcr = ch_reg_read(chan, desc->plane_idx ?
+			      HPB_DMAE_DTCR1 : HPB_DMAE_DTCR0);
+
+	return (desc->hw.tcr - tcr) << chan->xmit_shift;
+}
+
+static bool hpb_dmae_channel_busy(struct shdma_chan *schan)
+{
+	struct hpb_dmae_chan *chan = to_chan(schan);
+	u32 dstsr = ch_reg_read(chan, HPB_DMAE_DSTSR);
+
+	return (dstsr & HPB_DMAE_DSTSR_DMSTS) == HPB_DMAE_DSTSR_DMSTS;
+}
+
+static int
+hpb_dmae_alloc_chan_resources(struct hpb_dmae_chan *hpb_chan,
+			      const struct hpb_dmae_slave_config *cfg)
+{
+	struct hpb_dmae_device *hpbdev = to_dev(hpb_chan);
+	struct hpb_dmae_pdata *pdata = hpbdev->pdata;
+	const struct hpb_dmae_channel *channel = pdata->channels;
+	int slave_id = cfg->id;
+	int i, err;
+
+	for (i = 0; i < pdata->num_channels; i++, channel++) {
+		if (channel->s_id == slave_id) {
+			struct device *dev = hpb_chan->shdma_chan.dev;
+
+			hpb_chan->base = hpbdev->chan_reg +
+				HPB_DMAE_CHAN(cfg->dma_ch);
+
+			dev_dbg(dev, "Detected Slave device\n");
+			dev_dbg(dev, " -- slave_id       : 0x%x\n", slave_id);
+			dev_dbg(dev, " -- cfg->dma_ch    : %d\n", cfg->dma_ch);
+			dev_dbg(dev, " -- channel->ch_irq: %d\n",
+				channel->ch_irq);
+			break;
+		}
+	}
+
+	err = shdma_request_irq(&hpb_chan->shdma_chan, channel->ch_irq,
+				IRQF_SHARED, hpb_chan->dev_id);
+	if (err) {
+		dev_err(hpb_chan->shdma_chan.dev,
+			"DMA channel request_irq %d failed with error %d\n",
+			channel->ch_irq, err);
+		return err;
+	}
+
+	hpb_chan->plane_idx = 0;
+	hpb_chan->first_desc = true;
+
+	if ((cfg->dcr & (HPB_DMAE_DCR_CT | HPB_DMAE_DCR_DIP)) == 0) {
+		hpb_chan->xfer_mode = XFER_SINGLE;
+	} else if ((cfg->dcr & (HPB_DMAE_DCR_CT | HPB_DMAE_DCR_DIP)) ==
+		   (HPB_DMAE_DCR_CT | HPB_DMAE_DCR_DIP)) {
+		hpb_chan->xfer_mode = XFER_DOUBLE;
+	} else {
+		dev_err(hpb_chan->shdma_chan.dev, "DCR setting error");
+		shdma_free_irq(&hpb_chan->shdma_chan);
+		return -EINVAL;
+	}
+
+	if (cfg->flags & HPB_DMAE_SET_ASYNC_MODE)
+		hpb_dmae_set_async_mode(hpbdev, cfg->mdm, cfg->mdr);
+	ch_reg_write(hpb_chan, cfg->dcr, HPB_DMAE_DCR);
+	ch_reg_write(hpb_chan, cfg->port, HPB_DMAE_DPTR);
+	hpb_chan->xmit_shift = calc_xmit_shift(hpb_chan);
+	hpb_dmae_enable_int(hpbdev, cfg->dma_ch);
+
+	return 0;
+}
+
+static int hpb_dmae_set_slave(struct shdma_chan *schan, int slave_id, bool try)
+{
+	struct hpb_dmae_chan *chan = to_chan(schan);
+	const struct hpb_dmae_slave_config *sc =
+		hpb_dmae_find_slave(chan, slave_id);
+
+	if (!sc)
+		return -ENODEV;
+	if (try)
+		return 0;
+	chan->cfg = sc;
+	return hpb_dmae_alloc_chan_resources(chan, sc);
+}
+
+static void hpb_dmae_setup_xfer(struct shdma_chan *schan, int slave_id)
+{
+}
+
+static dma_addr_t hpb_dmae_slave_addr(struct shdma_chan *schan)
+{
+	struct hpb_dmae_chan *chan = to_chan(schan);
+
+	return chan->cfg->addr;
+}
+
+static struct shdma_desc *hpb_dmae_embedded_desc(void *buf, int i)
+{
+	return &((struct hpb_desc *)buf)[i].shdma_desc;
+}
+
+static const struct shdma_ops hpb_dmae_ops = {
+	.desc_completed = hpb_dmae_desc_completed,
+	.halt_channel = hpb_dmae_halt,
+	.channel_busy = hpb_dmae_channel_busy,
+	.slave_addr = hpb_dmae_slave_addr,
+	.desc_setup = hpb_dmae_desc_setup,
+	.set_slave = hpb_dmae_set_slave,
+	.setup_xfer = hpb_dmae_setup_xfer,
+	.start_xfer = hpb_dmae_start_xfer,
+	.embedded_desc = hpb_dmae_embedded_desc,
+	.chan_irq = hpb_dmae_chan_irq,
+	.get_partial = hpb_dmae_get_partial,
+};
+
+static int hpb_dmae_chan_probe(struct hpb_dmae_device *hpbdev, int id)
+{
+	struct shdma_dev *sdev = &hpbdev->shdma_dev;
+	struct platform_device *pdev =
+		to_platform_device(hpbdev->shdma_dev.dma_dev.dev);
+	struct hpb_dmae_chan *new_hpb_chan;
+	struct shdma_chan *schan;
+
+	/* Alloc channel */
+	new_hpb_chan = devm_kzalloc(&pdev->dev,
+				    sizeof(struct hpb_dmae_chan), GFP_KERNEL);
+	if (!new_hpb_chan) {
+		dev_err(hpbdev->shdma_dev.dma_dev.dev,
+			"No free memory for allocating DMA channels!\n");
+		return -ENOMEM;
+	}
+
+	schan = &new_hpb_chan->shdma_chan;
+	shdma_chan_probe(sdev, schan, id);
+
+	if (pdev->id >= 0)
+		snprintf(new_hpb_chan->dev_id, sizeof(new_hpb_chan->dev_id),
+			 "hpb-dmae%d.%d", pdev->id, id);
+	else
+		snprintf(new_hpb_chan->dev_id, sizeof(new_hpb_chan->dev_id),
+			 "hpb-dma.%d", id);
+
+	return 0;
+}
+
+static int hpb_dmae_probe(struct platform_device *pdev)
+{
+	struct hpb_dmae_pdata *pdata = pdev->dev.platform_data;
+	struct hpb_dmae_device *hpbdev;
+	struct dma_device *dma_dev;
+	struct resource *chan, *comm, *rest, *mode, *irq_res;
+	int err, i;
+
+	/* Get platform data */
+	if (!pdata || !pdata->num_channels)
+		return -ENODEV;
+
+	chan = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	comm = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	rest = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+	mode = platform_get_resource(pdev, IORESOURCE_MEM, 3);
+
+	irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!irq_res)
+		return -ENODEV;
+
+	hpbdev = devm_kzalloc(&pdev->dev, sizeof(struct hpb_dmae_device),
+			      GFP_KERNEL);
+	if (!hpbdev) {
+		dev_err(&pdev->dev, "Not enough memory\n");
+		return -ENOMEM;
+	}
+
+	hpbdev->chan_reg = devm_ioremap_resource(&pdev->dev, chan);
+	if (IS_ERR(hpbdev->chan_reg))
+		return PTR_ERR(hpbdev->chan_reg);
+
+	hpbdev->comm_reg = devm_ioremap_resource(&pdev->dev, comm);
+	if (IS_ERR(hpbdev->comm_reg))
+		return PTR_ERR(hpbdev->comm_reg);
+
+	hpbdev->reset_reg = devm_ioremap_resource(&pdev->dev, rest);
+	if (IS_ERR(hpbdev->reset_reg))
+		return PTR_ERR(hpbdev->reset_reg);
+
+	hpbdev->mode_reg = devm_ioremap_resource(&pdev->dev, mode);
+	if (IS_ERR(hpbdev->mode_reg))
+		return PTR_ERR(hpbdev->mode_reg);
+
+	dma_dev = &hpbdev->shdma_dev.dma_dev;
+
+	spin_lock_init(&hpbdev->reg_lock);
+
+	/* Platform data */
+	hpbdev->pdata = pdata;
+
+	pm_runtime_enable(&pdev->dev);
+	err = pm_runtime_get_sync(&pdev->dev);
+	if (err < 0)
+		dev_err(&pdev->dev, "%s(): GET = %d\n", __func__, err);
+
+	/* Reset DMA controller */
+	hpb_dmae_reset(hpbdev);
+
+	pm_runtime_put(&pdev->dev);
+
+	dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+	dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
+
+	hpbdev->shdma_dev.ops = &hpb_dmae_ops;
+	hpbdev->shdma_dev.desc_size = sizeof(struct hpb_desc);
+	err = shdma_init(&pdev->dev, &hpbdev->shdma_dev, pdata->num_channels);
+	if (err < 0)
+		goto error;
+
+	/* Create DMA channels */
+	for (i = 0; i < pdata->num_channels; i++)
+		hpb_dmae_chan_probe(hpbdev, i);
+
+	platform_set_drvdata(pdev, hpbdev);
+	err = dma_async_device_register(dma_dev);
+	if (!err)
+		return 0;
+
+	shdma_cleanup(&hpbdev->shdma_dev);
+error:
+	pm_runtime_disable(&pdev->dev);
+	return err;
+}
+
+static void hpb_dmae_chan_remove(struct hpb_dmae_device *hpbdev)
+{
+	struct dma_device *dma_dev = &hpbdev->shdma_dev.dma_dev;
+	struct shdma_chan *schan;
+	int i;
+
+	shdma_for_each_chan(schan, &hpbdev->shdma_dev, i) {
+		BUG_ON(!schan);
+
+		shdma_free_irq(schan);
+		shdma_chan_remove(schan);
+	}
+	dma_dev->chancnt = 0;
+}
+
+static int hpb_dmae_remove(struct platform_device *pdev)
+{
+	struct hpb_dmae_device *hpbdev = platform_get_drvdata(pdev);
+
+	dma_async_device_unregister(&hpbdev->shdma_dev.dma_dev);
+
+	pm_runtime_disable(&pdev->dev);
+
+	hpb_dmae_chan_remove(hpbdev);
+
+	return 0;
+}
+
+static void hpb_dmae_shutdown(struct platform_device *pdev)
+{
+	struct hpb_dmae_device *hpbdev = platform_get_drvdata(pdev);
+	hpb_dmae_ctl_stop(hpbdev);
+}
+
+static struct platform_driver hpb_dmae_driver = {
+	.probe		= hpb_dmae_probe,
+	.remove		= hpb_dmae_remove,
+	.shutdown	= hpb_dmae_shutdown,
+	.driver = {
+		.owner	= THIS_MODULE,
+		.name	= "hpb-dma-engine",
+	},
+};
+module_platform_driver(hpb_dmae_driver);
+
+MODULE_AUTHOR("Max Filippov <max.filippov@cogentembedded.com>");
+MODULE_DESCRIPTION("Renesas HPB DMA Engine driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/sh/shdma-arm.h b/drivers/dma/sh/shdma-arm.h
new file mode 100644
index 0000000..a2b8258
--- /dev/null
+++ b/drivers/dma/sh/shdma-arm.h
@@ -0,0 +1,51 @@
+/*
+ * Renesas SuperH DMA Engine support
+ *
+ * Copyright (C) 2013 Renesas Electronics, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 the GNU General Public License as published by the Free
+ * Software Foundation.
+ */
+
+#ifndef SHDMA_ARM_H
+#define SHDMA_ARM_H
+
+#include "shdma.h"
+
+/* Transmit sizes and respective CHCR register values */
+enum {
+	XMIT_SZ_8BIT		= 0,
+	XMIT_SZ_16BIT		= 1,
+	XMIT_SZ_32BIT		= 2,
+	XMIT_SZ_64BIT		= 7,
+	XMIT_SZ_128BIT		= 3,
+	XMIT_SZ_256BIT		= 4,
+	XMIT_SZ_512BIT		= 5,
+};
+
+/* log2(size / 8) - used to calculate number of transfers */
+#define SH_DMAE_TS_SHIFT {		\
+	[XMIT_SZ_8BIT]		= 0,	\
+	[XMIT_SZ_16BIT]		= 1,	\
+	[XMIT_SZ_32BIT]		= 2,	\
+	[XMIT_SZ_64BIT]		= 3,	\
+	[XMIT_SZ_128BIT]	= 4,	\
+	[XMIT_SZ_256BIT]	= 5,	\
+	[XMIT_SZ_512BIT]	= 6,	\
+}
+
+#define TS_LOW_BIT	0x3 /* --xx */
+#define TS_HI_BIT	0xc /* xx-- */
+
+#define TS_LOW_SHIFT	(3)
+#define TS_HI_SHIFT	(20 - 2)	/* 2 bits for shifted low TS */
+
+#define TS_INDEX2VAL(i) \
+	((((i) & TS_LOW_BIT) << TS_LOW_SHIFT) |\
+	 (((i) & TS_HI_BIT)  << TS_HI_SHIFT))
+
+#define CHCR_TX(xmit_sz) (DM_FIX | SM_INC | 0x800 | TS_INDEX2VAL((xmit_sz)))
+#define CHCR_RX(xmit_sz) (DM_INC | SM_FIX | 0x800 | TS_INDEX2VAL((xmit_sz)))
+
+#endif
diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c
index 28ca361..d94ab59 100644
--- a/drivers/dma/sh/shdma-base.c
+++ b/drivers/dma/sh/shdma-base.c
@@ -171,7 +171,8 @@
 	return NULL;
 }
 
-static int shdma_setup_slave(struct shdma_chan *schan, int slave_id)
+static int shdma_setup_slave(struct shdma_chan *schan, int slave_id,
+			     dma_addr_t slave_addr)
 {
 	struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
 	const struct shdma_ops *ops = sdev->ops;
@@ -179,7 +180,7 @@
 
 	if (schan->dev->of_node) {
 		match = schan->hw_req;
-		ret = ops->set_slave(schan, match, true);
+		ret = ops->set_slave(schan, match, slave_addr, true);
 		if (ret < 0)
 			return ret;
 
@@ -194,7 +195,7 @@
 	if (test_and_set_bit(slave_id, shdma_slave_used))
 		return -EBUSY;
 
-	ret = ops->set_slave(schan, match, false);
+	ret = ops->set_slave(schan, match, slave_addr, false);
 	if (ret < 0) {
 		clear_bit(slave_id, shdma_slave_used);
 		return ret;
@@ -236,7 +237,7 @@
 	if (!schan->dev->of_node && match >= slave_num)
 		return false;
 
-	ret = ops->set_slave(schan, match, true);
+	ret = ops->set_slave(schan, match, 0, true);
 	if (ret < 0)
 		return false;
 
@@ -259,7 +260,7 @@
 	 */
 	if (slave) {
 		/* Legacy mode: .private is set in filter */
-		ret = shdma_setup_slave(schan, slave->slave_id);
+		ret = shdma_setup_slave(schan, slave->slave_id, 0);
 		if (ret < 0)
 			goto esetslave;
 	} else {
@@ -680,7 +681,9 @@
 		 * channel, while using it...
 		 */
 		config = (struct dma_slave_config *)arg;
-		ret = shdma_setup_slave(schan, config->slave_id);
+		ret = shdma_setup_slave(schan, config->slave_id,
+					config->direction == DMA_DEV_TO_MEM ?
+					config->src_addr : config->dst_addr);
 		if (ret < 0)
 			return ret;
 		break;
@@ -831,8 +834,8 @@
 int shdma_request_irq(struct shdma_chan *schan, int irq,
 			   unsigned long flags, const char *name)
 {
-	int ret = request_threaded_irq(irq, chan_irq, chan_irqt,
-				       flags, name, schan);
+	int ret = devm_request_threaded_irq(schan->dev, irq, chan_irq,
+					    chan_irqt, flags, name, schan);
 
 	schan->irq = ret < 0 ? ret : irq;
 
@@ -840,13 +843,6 @@
 }
 EXPORT_SYMBOL(shdma_request_irq);
 
-void shdma_free_irq(struct shdma_chan *schan)
-{
-	if (schan->irq >= 0)
-		free_irq(schan->irq, schan);
-}
-EXPORT_SYMBOL(shdma_free_irq);
-
 void shdma_chan_probe(struct shdma_dev *sdev,
 			   struct shdma_chan *schan, int id)
 {
diff --git a/drivers/dma/sh/shdma-of.c b/drivers/dma/sh/shdma-of.c
index 11bcb05..06473a0 100644
--- a/drivers/dma/sh/shdma-of.c
+++ b/drivers/dma/sh/shdma-of.c
@@ -42,12 +42,9 @@
 
 static int shdma_of_probe(struct platform_device *pdev)
 {
-	const struct of_dev_auxdata *lookup = pdev->dev.platform_data;
+	const struct of_dev_auxdata *lookup = dev_get_platdata(&pdev->dev);
 	int ret;
 
-	if (!lookup)
-		return -EINVAL;
-
 	ret = of_dma_controller_register(pdev->dev.of_node,
 					 shdma_of_xlate, pdev);
 	if (ret < 0)
diff --git a/drivers/dma/sh/shdma-r8a73a4.c b/drivers/dma/sh/shdma-r8a73a4.c
new file mode 100644
index 0000000..4fb9997
--- /dev/null
+++ b/drivers/dma/sh/shdma-r8a73a4.c
@@ -0,0 +1,77 @@
+/*
+ * Renesas SuperH DMA Engine support for r8a73a4 (APE6) SoCs
+ *
+ * Copyright (C) 2013 Renesas Electronics, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 the GNU General Public License as published by the Free
+ * Software Foundation.
+ */
+#include <linux/sh_dma.h>
+
+#include "shdma-arm.h"
+
+const unsigned int dma_ts_shift[] = SH_DMAE_TS_SHIFT;
+
+static const struct sh_dmae_slave_config dma_slaves[] = {
+	{
+		.chcr		= CHCR_TX(XMIT_SZ_32BIT),
+		.mid_rid	= 0xd1,		/* MMC0 Tx */
+	}, {
+		.chcr		= CHCR_RX(XMIT_SZ_32BIT),
+		.mid_rid	= 0xd2,		/* MMC0 Rx */
+	}, {
+		.chcr		= CHCR_TX(XMIT_SZ_32BIT),
+		.mid_rid	= 0xe1,		/* MMC1 Tx */
+	}, {
+		.chcr		= CHCR_RX(XMIT_SZ_32BIT),
+		.mid_rid	= 0xe2,		/* MMC1 Rx */
+	},
+};
+
+#define DMAE_CHANNEL(a, b)				\
+	{						\
+		.offset         = (a) - 0x20,		\
+		.dmars          = (a) - 0x20 + 0x40,	\
+		.chclr_bit	= (b),			\
+		.chclr_offset	= 0x80 - 0x20,		\
+	}
+
+static const struct sh_dmae_channel dma_channels[] = {
+	DMAE_CHANNEL(0x8000, 0),
+	DMAE_CHANNEL(0x8080, 1),
+	DMAE_CHANNEL(0x8100, 2),
+	DMAE_CHANNEL(0x8180, 3),
+	DMAE_CHANNEL(0x8200, 4),
+	DMAE_CHANNEL(0x8280, 5),
+	DMAE_CHANNEL(0x8300, 6),
+	DMAE_CHANNEL(0x8380, 7),
+	DMAE_CHANNEL(0x8400, 8),
+	DMAE_CHANNEL(0x8480, 9),
+	DMAE_CHANNEL(0x8500, 10),
+	DMAE_CHANNEL(0x8580, 11),
+	DMAE_CHANNEL(0x8600, 12),
+	DMAE_CHANNEL(0x8680, 13),
+	DMAE_CHANNEL(0x8700, 14),
+	DMAE_CHANNEL(0x8780, 15),
+	DMAE_CHANNEL(0x8800, 16),
+	DMAE_CHANNEL(0x8880, 17),
+	DMAE_CHANNEL(0x8900, 18),
+	DMAE_CHANNEL(0x8980, 19),
+};
+
+const struct sh_dmae_pdata r8a73a4_dma_pdata = {
+	.slave		= dma_slaves,
+	.slave_num	= ARRAY_SIZE(dma_slaves),
+	.channel	= dma_channels,
+	.channel_num	= ARRAY_SIZE(dma_channels),
+	.ts_low_shift	= TS_LOW_SHIFT,
+	.ts_low_mask	= TS_LOW_BIT << TS_LOW_SHIFT,
+	.ts_high_shift	= TS_HI_SHIFT,
+	.ts_high_mask	= TS_HI_BIT << TS_HI_SHIFT,
+	.ts_shift	= dma_ts_shift,
+	.ts_shift_num	= ARRAY_SIZE(dma_ts_shift),
+	.dmaor_init     = DMAOR_DME,
+	.chclr_present	= 1,
+	.chclr_bitwise	= 1,
+};
diff --git a/drivers/dma/sh/shdma.h b/drivers/dma/sh/shdma.h
index 9314e93..758a57b 100644
--- a/drivers/dma/sh/shdma.h
+++ b/drivers/dma/sh/shdma.h
@@ -28,18 +28,19 @@
 	struct shdma_chan shdma_chan;
 	const struct sh_dmae_slave_config *config; /* Slave DMA configuration */
 	int xmit_shift;			/* log_2(bytes_per_xfer) */
-	u32 __iomem *base;
+	void __iomem *base;
 	char dev_id[16];		/* unique name per DMAC of channel */
 	int pm_error;
+	dma_addr_t slave_addr;
 };
 
 struct sh_dmae_device {
 	struct shdma_dev shdma_dev;
 	struct sh_dmae_chan *chan[SH_DMAE_MAX_CHANNELS];
-	struct sh_dmae_pdata *pdata;
+	const struct sh_dmae_pdata *pdata;
 	struct list_head node;
-	u32 __iomem *chan_reg;
-	u16 __iomem *dmars;
+	void __iomem *chan_reg;
+	void __iomem *dmars;
 	unsigned int chcr_offset;
 	u32 chcr_ie_bit;
 };
@@ -61,4 +62,11 @@
 #define to_sh_dev(chan) container_of(chan->shdma_chan.dma_chan.device,\
 				     struct sh_dmae_device, shdma_dev.dma_dev)
 
+#ifdef CONFIG_SHDMA_R8A73A4
+extern const struct sh_dmae_pdata r8a73a4_dma_pdata;
+#define r8a73a4_shdma_devid (&r8a73a4_dma_pdata)
+#else
+#define r8a73a4_shdma_devid NULL
+#endif
+
 #endif	/* __DMA_SHDMA_H */
diff --git a/drivers/dma/sh/shdma.c b/drivers/dma/sh/shdmac.c
similarity index 88%
rename from drivers/dma/sh/shdma.c
rename to drivers/dma/sh/shdmac.c
index 5039fbc..1069e88 100644
--- a/drivers/dma/sh/shdma.c
+++ b/drivers/dma/sh/shdmac.c
@@ -20,6 +20,8 @@
 
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/dmaengine.h>
@@ -35,6 +37,15 @@
 #include "../dmaengine.h"
 #include "shdma.h"
 
+/* DMA register */
+#define SAR	0x00
+#define DAR	0x04
+#define TCR	0x08
+#define CHCR	0x0C
+#define DMAOR	0x40
+
+#define TEND	0x18 /* USB-DMAC */
+
 #define SH_DMAE_DRV_NAME "sh-dma-engine"
 
 /* Default MEMCPY transfer size = 2^2 = 4 bytes */
@@ -49,27 +60,37 @@
 static DEFINE_SPINLOCK(sh_dmae_lock);
 static LIST_HEAD(sh_dmae_devices);
 
-static void chclr_write(struct sh_dmae_chan *sh_dc, u32 data)
+/*
+ * Different DMAC implementations provide different ways to clear DMA channels:
+ * (1) none - no CHCLR registers are available
+ * (2) one CHCLR register per channel - 0 has to be written to it to clear
+ *     channel buffers
+ * (3) one CHCLR per several channels - 1 has to be written to the bit,
+ *     corresponding to the specific channel to reset it
+ */
+static void channel_clear(struct sh_dmae_chan *sh_dc)
 {
 	struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
+	const struct sh_dmae_channel *chan_pdata = shdev->pdata->channel +
+		sh_dc->shdma_chan.id;
+	u32 val = shdev->pdata->chclr_bitwise ? 1 << chan_pdata->chclr_bit : 0;
 
-	__raw_writel(data, shdev->chan_reg +
-		     shdev->pdata->channel[sh_dc->shdma_chan.id].chclr_offset);
+	__raw_writel(val, shdev->chan_reg + chan_pdata->chclr_offset);
 }
 
 static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg)
 {
-	__raw_writel(data, sh_dc->base + reg / sizeof(u32));
+	__raw_writel(data, sh_dc->base + reg);
 }
 
 static u32 sh_dmae_readl(struct sh_dmae_chan *sh_dc, u32 reg)
 {
-	return __raw_readl(sh_dc->base + reg / sizeof(u32));
+	return __raw_readl(sh_dc->base + reg);
 }
 
 static u16 dmaor_read(struct sh_dmae_device *shdev)
 {
-	u32 __iomem *addr = shdev->chan_reg + DMAOR / sizeof(u32);
+	void __iomem *addr = shdev->chan_reg + DMAOR;
 
 	if (shdev->pdata->dmaor_is_32bit)
 		return __raw_readl(addr);
@@ -79,7 +100,7 @@
 
 static void dmaor_write(struct sh_dmae_device *shdev, u16 data)
 {
-	u32 __iomem *addr = shdev->chan_reg + DMAOR / sizeof(u32);
+	void __iomem *addr = shdev->chan_reg + DMAOR;
 
 	if (shdev->pdata->dmaor_is_32bit)
 		__raw_writel(data, addr);
@@ -91,14 +112,14 @@
 {
 	struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
 
-	__raw_writel(data, sh_dc->base + shdev->chcr_offset / sizeof(u32));
+	__raw_writel(data, sh_dc->base + shdev->chcr_offset);
 }
 
 static u32 chcr_read(struct sh_dmae_chan *sh_dc)
 {
 	struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
 
-	return __raw_readl(sh_dc->base + shdev->chcr_offset / sizeof(u32));
+	return __raw_readl(sh_dc->base + shdev->chcr_offset);
 }
 
 /*
@@ -133,7 +154,7 @@
 		for (i = 0; i < shdev->pdata->channel_num; i++) {
 			struct sh_dmae_chan *sh_chan = shdev->chan[i];
 			if (sh_chan)
-				chclr_write(sh_chan, 0);
+				channel_clear(sh_chan);
 		}
 	}
 
@@ -167,7 +188,7 @@
 static unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan, u32 chcr)
 {
 	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
-	struct sh_dmae_pdata *pdata = shdev->pdata;
+	const struct sh_dmae_pdata *pdata = shdev->pdata;
 	int cnt = ((chcr & pdata->ts_low_mask) >> pdata->ts_low_shift) |
 		((chcr & pdata->ts_high_mask) >> pdata->ts_high_shift);
 
@@ -180,7 +201,7 @@
 static u32 log2size_to_chcr(struct sh_dmae_chan *sh_chan, int l2size)
 {
 	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
-	struct sh_dmae_pdata *pdata = shdev->pdata;
+	const struct sh_dmae_pdata *pdata = shdev->pdata;
 	int i;
 
 	for (i = 0; i < pdata->ts_shift_num; i++)
@@ -240,9 +261,9 @@
 static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val)
 {
 	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
-	struct sh_dmae_pdata *pdata = shdev->pdata;
+	const struct sh_dmae_pdata *pdata = shdev->pdata;
 	const struct sh_dmae_channel *chan_pdata = &pdata->channel[sh_chan->shdma_chan.id];
-	u16 __iomem *addr = shdev->dmars;
+	void __iomem *addr = shdev->dmars;
 	unsigned int shift = chan_pdata->dmars_bit;
 
 	if (dmae_is_busy(sh_chan))
@@ -253,8 +274,8 @@
 
 	/* in the case of a missing DMARS resource use first memory window */
 	if (!addr)
-		addr = (u16 __iomem *)shdev->chan_reg;
-	addr += chan_pdata->dmars / sizeof(u16);
+		addr = shdev->chan_reg;
+	addr += chan_pdata->dmars;
 
 	__raw_writew((__raw_readw(addr) & (0xff00 >> shift)) | (val << shift),
 		     addr);
@@ -309,7 +330,7 @@
 	struct sh_dmae_chan *sh_chan, int match)
 {
 	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
-	struct sh_dmae_pdata *pdata = shdev->pdata;
+	const struct sh_dmae_pdata *pdata = shdev->pdata;
 	const struct sh_dmae_slave_config *cfg;
 	int i;
 
@@ -323,7 +344,7 @@
 	} else {
 		for (i = 0, cfg = pdata->slave; i < pdata->slave_num; i++, cfg++)
 			if (cfg->mid_rid == match) {
-				sh_chan->shdma_chan.slave_id = cfg->slave_id;
+				sh_chan->shdma_chan.slave_id = i;
 				return cfg;
 			}
 	}
@@ -332,7 +353,7 @@
 }
 
 static int sh_dmae_set_slave(struct shdma_chan *schan,
-			     int slave_id, bool try)
+			     int slave_id, dma_addr_t slave_addr, bool try)
 {
 	struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan,
 						    shdma_chan);
@@ -340,8 +361,10 @@
 	if (!cfg)
 		return -ENXIO;
 
-	if (!try)
+	if (!try) {
 		sh_chan->config = cfg;
+		sh_chan->slave_addr = slave_addr ? : cfg->addr;
+	}
 
 	return 0;
 }
@@ -505,7 +528,8 @@
 	struct shdma_chan *schan;
 	int err;
 
-	sh_chan = kzalloc(sizeof(struct sh_dmae_chan), GFP_KERNEL);
+	sh_chan = devm_kzalloc(sdev->dma_dev.dev, sizeof(struct sh_dmae_chan),
+			       GFP_KERNEL);
 	if (!sh_chan) {
 		dev_err(sdev->dma_dev.dev,
 			"No free memory for allocating dma channels!\n");
@@ -517,7 +541,7 @@
 
 	shdma_chan_probe(sdev, schan, id);
 
-	sh_chan->base = shdev->chan_reg + chan_pdata->offset / sizeof(u32);
+	sh_chan->base = shdev->chan_reg + chan_pdata->offset;
 
 	/* set up channel irq */
 	if (pdev->id >= 0)
@@ -541,7 +565,6 @@
 err_no_irq:
 	/* remove from dmaengine device node */
 	shdma_chan_remove(schan);
-	kfree(sh_chan);
 	return err;
 }
 
@@ -552,14 +575,9 @@
 	int i;
 
 	shdma_for_each_chan(schan, &shdev->shdma_dev, i) {
-		struct sh_dmae_chan *sh_chan = container_of(schan,
-					struct sh_dmae_chan, shdma_chan);
 		BUG_ON(!schan);
 
-		shdma_free_irq(&sh_chan->shdma_chan);
-
 		shdma_chan_remove(schan);
-		kfree(sh_chan);
 	}
 	dma_dev->chancnt = 0;
 }
@@ -636,7 +654,7 @@
 	 * This is an exclusive slave DMA operation, may only be called after a
 	 * successful slave configuration.
 	 */
-	return sh_chan->config->addr;
+	return sh_chan->slave_addr;
 }
 
 static struct shdma_desc *sh_dmae_embedded_desc(void *buf, int i)
@@ -658,9 +676,15 @@
 	.get_partial = sh_dmae_get_partial,
 };
 
+static const struct of_device_id sh_dmae_of_match[] = {
+	{.compatible = "renesas,shdma-r8a73a4", .data = r8a73a4_shdma_devid,},
+	{}
+};
+MODULE_DEVICE_TABLE(of, sh_dmae_of_match);
+
 static int sh_dmae_probe(struct platform_device *pdev)
 {
-	struct sh_dmae_pdata *pdata = pdev->dev.platform_data;
+	const struct sh_dmae_pdata *pdata;
 	unsigned long irqflags = IRQF_DISABLED,
 		chan_flag[SH_DMAE_MAX_CHANNELS] = {};
 	int errirq, chan_irq[SH_DMAE_MAX_CHANNELS];
@@ -669,6 +693,11 @@
 	struct dma_device *dma_dev;
 	struct resource *chan, *dmars, *errirq_res, *chanirq_res;
 
+	if (pdev->dev.of_node)
+		pdata = of_match_device(sh_dmae_of_match, &pdev->dev)->data;
+	else
+		pdata = dev_get_platdata(&pdev->dev);
+
 	/* get platform data */
 	if (!pdata || !pdata->channel_num)
 		return -ENODEV;
@@ -696,33 +725,22 @@
 	if (!chan || !errirq_res)
 		return -ENODEV;
 
-	if (!request_mem_region(chan->start, resource_size(chan), pdev->name)) {
-		dev_err(&pdev->dev, "DMAC register region already claimed\n");
-		return -EBUSY;
-	}
-
-	if (dmars && !request_mem_region(dmars->start, resource_size(dmars), pdev->name)) {
-		dev_err(&pdev->dev, "DMAC DMARS region already claimed\n");
-		err = -EBUSY;
-		goto ermrdmars;
-	}
-
-	err = -ENOMEM;
-	shdev = kzalloc(sizeof(struct sh_dmae_device), GFP_KERNEL);
+	shdev = devm_kzalloc(&pdev->dev, sizeof(struct sh_dmae_device),
+			     GFP_KERNEL);
 	if (!shdev) {
 		dev_err(&pdev->dev, "Not enough memory\n");
-		goto ealloc;
+		return -ENOMEM;
 	}
 
 	dma_dev = &shdev->shdma_dev.dma_dev;
 
-	shdev->chan_reg = ioremap(chan->start, resource_size(chan));
-	if (!shdev->chan_reg)
-		goto emapchan;
+	shdev->chan_reg = devm_ioremap_resource(&pdev->dev, chan);
+	if (IS_ERR(shdev->chan_reg))
+		return PTR_ERR(shdev->chan_reg);
 	if (dmars) {
-		shdev->dmars = ioremap(dmars->start, resource_size(dmars));
-		if (!shdev->dmars)
-			goto emapdmars;
+		shdev->dmars = devm_ioremap_resource(&pdev->dev, dmars);
+		if (IS_ERR(shdev->dmars))
+			return PTR_ERR(shdev->dmars);
 	}
 
 	if (!pdata->slave_only)
@@ -783,8 +801,8 @@
 
 	errirq = errirq_res->start;
 
-	err = request_irq(errirq, sh_dmae_err, irqflags,
-			  "DMAC Address Error", shdev);
+	err = devm_request_irq(&pdev->dev, errirq, sh_dmae_err, irqflags,
+			       "DMAC Address Error", shdev);
 	if (err) {
 		dev_err(&pdev->dev,
 			"DMA failed requesting irq #%d, error %d\n",
@@ -862,7 +880,6 @@
 	sh_dmae_chan_remove(shdev);
 
 #if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_SHMOBILE)
-	free_irq(errirq, shdev);
 eirq_err:
 #endif
 rst_err:
@@ -873,21 +890,9 @@
 	pm_runtime_put(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
-	platform_set_drvdata(pdev, NULL);
 	shdma_cleanup(&shdev->shdma_dev);
 eshdma:
-	if (dmars)
-		iounmap(shdev->dmars);
-emapdmars:
-	iounmap(shdev->chan_reg);
 	synchronize_rcu();
-emapchan:
-	kfree(shdev);
-ealloc:
-	if (dmars)
-		release_mem_region(dmars->start, resource_size(dmars));
-ermrdmars:
-	release_mem_region(chan->start, resource_size(chan));
 
 	return err;
 }
@@ -896,14 +901,9 @@
 {
 	struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
 	struct dma_device *dma_dev = &shdev->shdma_dev.dma_dev;
-	struct resource *res;
-	int errirq = platform_get_irq(pdev, 0);
 
 	dma_async_device_unregister(dma_dev);
 
-	if (errirq > 0)
-		free_irq(errirq, shdev);
-
 	spin_lock_irq(&sh_dmae_lock);
 	list_del_rcu(&shdev->node);
 	spin_unlock_irq(&sh_dmae_lock);
@@ -913,31 +913,11 @@
 	sh_dmae_chan_remove(shdev);
 	shdma_cleanup(&shdev->shdma_dev);
 
-	if (shdev->dmars)
-		iounmap(shdev->dmars);
-	iounmap(shdev->chan_reg);
-
-	platform_set_drvdata(pdev, NULL);
-
 	synchronize_rcu();
-	kfree(shdev);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (res)
-		release_mem_region(res->start, resource_size(res));
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	if (res)
-		release_mem_region(res->start, resource_size(res));
 
 	return 0;
 }
 
-static const struct of_device_id sh_dmae_of_match[] = {
-	{ .compatible = "renesas,shdma", },
-	{ }
-};
-MODULE_DEVICE_TABLE(of, sh_dmae_of_match);
-
 static struct platform_driver sh_dmae_driver = {
 	.driver 	= {
 		.owner	= THIS_MODULE,
diff --git a/drivers/dma/sh/sudmac.c b/drivers/dma/sh/sudmac.c
index e7c94bb..c7e9cdf 100644
--- a/drivers/dma/sh/sudmac.c
+++ b/drivers/dma/sh/sudmac.c
@@ -150,7 +150,8 @@
 	return NULL;
 }
 
-static int sudmac_set_slave(struct shdma_chan *schan, int slave_id, bool try)
+static int sudmac_set_slave(struct shdma_chan *schan, int slave_id,
+			    dma_addr_t slave_addr, bool try)
 {
 	struct sudmac_chan *sc = to_chan(schan);
 	const struct sudmac_slave_config *cfg = sudmac_find_slave(sc, slave_id);
@@ -298,11 +299,8 @@
 	int i;
 
 	shdma_for_each_chan(schan, &su_dev->shdma_dev, i) {
-		struct sudmac_chan *sc = to_chan(schan);
-
 		BUG_ON(!schan);
 
-		shdma_free_irq(&sc->shdma_chan);
 		shdma_chan_remove(schan);
 	}
 	dma_dev->chancnt = 0;
@@ -335,7 +333,7 @@
 
 static int sudmac_probe(struct platform_device *pdev)
 {
-	struct sudmac_pdata *pdata = pdev->dev.platform_data;
+	struct sudmac_pdata *pdata = dev_get_platdata(&pdev->dev);
 	int err, i;
 	struct sudmac_device *su_dev;
 	struct dma_device *dma_dev;
@@ -345,9 +343,8 @@
 	if (!pdata)
 		return -ENODEV;
 
-	chan = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (!chan || !irq_res)
+	if (!irq_res)
 		return -ENODEV;
 
 	err = -ENOMEM;
@@ -360,9 +357,10 @@
 
 	dma_dev = &su_dev->shdma_dev.dma_dev;
 
-	su_dev->chan_reg = devm_request_and_ioremap(&pdev->dev, chan);
-	if (!su_dev->chan_reg)
-		return err;
+	chan = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	su_dev->chan_reg = devm_ioremap_resource(&pdev->dev, chan);
+	if (IS_ERR(su_dev->chan_reg))
+		return PTR_ERR(su_dev->chan_reg);
 
 	dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
 
@@ -373,7 +371,7 @@
 		return err;
 
 	/* platform data */
-	su_dev->pdata = pdev->dev.platform_data;
+	su_dev->pdata = dev_get_platdata(&pdev->dev);
 
 	platform_set_drvdata(pdev, su_dev);
 
@@ -393,7 +391,6 @@
 chan_probe_err:
 	sudmac_chan_remove(su_dev);
 
-	platform_set_drvdata(pdev, NULL);
 	shdma_cleanup(&su_dev->shdma_dev);
 
 	return err;
@@ -407,7 +404,6 @@
 	dma_async_device_unregister(dma_dev);
 	sudmac_chan_remove(su_dev);
 	shdma_cleanup(&su_dev->shdma_dev);
-	platform_set_drvdata(pdev, NULL);
 
 	return 0;
 }
diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c
index 716b23e..6aec3ad 100644
--- a/drivers/dma/sirf-dma.c
+++ b/drivers/dma/sirf-dma.c
@@ -9,6 +9,7 @@
 #include <linux/module.h>
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
+#include <linux/pm_runtime.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/slab.h>
@@ -73,6 +74,11 @@
 	int				mode;
 };
 
+struct sirfsoc_dma_regs {
+	u32				ctrl[SIRFSOC_DMA_CHANNELS];
+	u32				interrupt_en;
+};
+
 struct sirfsoc_dma {
 	struct dma_device		dma;
 	struct tasklet_struct		tasklet;
@@ -81,10 +87,13 @@
 	int				irq;
 	struct clk			*clk;
 	bool				is_marco;
+	struct sirfsoc_dma_regs		regs_save;
 };
 
 #define DRV_NAME	"sirfsoc_dma"
 
+static int sirfsoc_dma_runtime_suspend(struct device *dev);
+
 /* Convert struct dma_chan to struct sirfsoc_dma_chan */
 static inline
 struct sirfsoc_dma_chan *dma_chan_to_sirfsoc_dma_chan(struct dma_chan *c)
@@ -393,6 +402,8 @@
 	LIST_HEAD(descs);
 	int i;
 
+	pm_runtime_get_sync(sdma->dma.dev);
+
 	/* Alloc descriptors for this channel */
 	for (i = 0; i < SIRFSOC_DMA_DESCRIPTORS; i++) {
 		sdesc = kzalloc(sizeof(*sdesc), GFP_KERNEL);
@@ -425,6 +436,7 @@
 static void sirfsoc_dma_free_chan_resources(struct dma_chan *chan)
 {
 	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(chan);
 	struct sirfsoc_dma_desc *sdesc, *tmp;
 	unsigned long flags;
 	LIST_HEAD(descs);
@@ -445,6 +457,8 @@
 	/* Free descriptors */
 	list_for_each_entry_safe(sdesc, tmp, &descs, node)
 		kfree(sdesc);
+
+	pm_runtime_put(sdma->dma.dev);
 }
 
 /* Send pending descriptor to hardware */
@@ -595,7 +609,7 @@
 	spin_unlock_irqrestore(&schan->lock, iflags);
 
 	if (!sdesc)
-		return 0;
+		return NULL;
 
 	/* Place descriptor in prepared list */
 	spin_lock_irqsave(&schan->lock, iflags);
@@ -723,14 +737,14 @@
 
 	tasklet_init(&sdma->tasklet, sirfsoc_dma_tasklet, (unsigned long)sdma);
 
-	clk_prepare_enable(sdma->clk);
-
 	/* Register DMA engine */
 	dev_set_drvdata(dev, sdma);
+
 	ret = dma_async_device_register(dma);
 	if (ret)
 		goto free_irq;
 
+	pm_runtime_enable(&op->dev);
 	dev_info(dev, "initialized SIRFSOC DMAC driver\n");
 
 	return 0;
@@ -747,13 +761,124 @@
 	struct device *dev = &op->dev;
 	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
 
-	clk_disable_unprepare(sdma->clk);
 	dma_async_device_unregister(&sdma->dma);
 	free_irq(sdma->irq, sdma);
 	irq_dispose_mapping(sdma->irq);
+	pm_runtime_disable(&op->dev);
+	if (!pm_runtime_status_suspended(&op->dev))
+		sirfsoc_dma_runtime_suspend(&op->dev);
+
 	return 0;
 }
 
+static int sirfsoc_dma_runtime_suspend(struct device *dev)
+{
+	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(sdma->clk);
+	return 0;
+}
+
+static int sirfsoc_dma_runtime_resume(struct device *dev)
+{
+	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_prepare_enable(sdma->clk);
+	if (ret < 0) {
+		dev_err(dev, "clk_enable failed: %d\n", ret);
+		return ret;
+	}
+	return 0;
+}
+
+static int sirfsoc_dma_pm_suspend(struct device *dev)
+{
+	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
+	struct sirfsoc_dma_regs *save = &sdma->regs_save;
+	struct sirfsoc_dma_desc *sdesc;
+	struct sirfsoc_dma_chan *schan;
+	int ch;
+	int ret;
+
+	/*
+	 * if we were runtime-suspended before, resume to enable clock
+	 * before accessing register
+	 */
+	if (pm_runtime_status_suspended(dev)) {
+		ret = sirfsoc_dma_runtime_resume(dev);
+		if (ret < 0)
+			return ret;
+	}
+
+	/*
+	 * DMA controller will lose all registers while suspending
+	 * so we need to save registers for active channels
+	 */
+	for (ch = 0; ch < SIRFSOC_DMA_CHANNELS; ch++) {
+		schan = &sdma->channels[ch];
+		if (list_empty(&schan->active))
+			continue;
+		sdesc = list_first_entry(&schan->active,
+			struct sirfsoc_dma_desc,
+			node);
+		save->ctrl[ch] = readl_relaxed(sdma->base +
+			ch * 0x10 + SIRFSOC_DMA_CH_CTRL);
+	}
+	save->interrupt_en = readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN);
+
+	/* Disable clock */
+	sirfsoc_dma_runtime_suspend(dev);
+
+	return 0;
+}
+
+static int sirfsoc_dma_pm_resume(struct device *dev)
+{
+	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
+	struct sirfsoc_dma_regs *save = &sdma->regs_save;
+	struct sirfsoc_dma_desc *sdesc;
+	struct sirfsoc_dma_chan *schan;
+	int ch;
+	int ret;
+
+	/* Enable clock before accessing register */
+	ret = sirfsoc_dma_runtime_resume(dev);
+	if (ret < 0)
+		return ret;
+
+	writel_relaxed(save->interrupt_en, sdma->base + SIRFSOC_DMA_INT_EN);
+	for (ch = 0; ch < SIRFSOC_DMA_CHANNELS; ch++) {
+		schan = &sdma->channels[ch];
+		if (list_empty(&schan->active))
+			continue;
+		sdesc = list_first_entry(&schan->active,
+			struct sirfsoc_dma_desc,
+			node);
+		writel_relaxed(sdesc->width,
+			sdma->base + SIRFSOC_DMA_WIDTH_0 + ch * 4);
+		writel_relaxed(sdesc->xlen,
+			sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_XLEN);
+		writel_relaxed(sdesc->ylen,
+			sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_YLEN);
+		writel_relaxed(save->ctrl[ch],
+			sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_CTRL);
+		writel_relaxed(sdesc->addr >> 2,
+			sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_ADDR);
+	}
+
+	/* if we were runtime-suspended before, suspend again */
+	if (pm_runtime_status_suspended(dev))
+		sirfsoc_dma_runtime_suspend(dev);
+
+	return 0;
+}
+
+static const struct dev_pm_ops sirfsoc_dma_pm_ops = {
+	SET_RUNTIME_PM_OPS(sirfsoc_dma_runtime_suspend, sirfsoc_dma_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(sirfsoc_dma_pm_suspend, sirfsoc_dma_pm_resume)
+};
+
 static struct of_device_id sirfsoc_dma_match[] = {
 	{ .compatible = "sirf,prima2-dmac", },
 	{ .compatible = "sirf,marco-dmac", },
@@ -766,6 +891,7 @@
 	.driver = {
 		.name = DRV_NAME,
 		.owner = THIS_MODULE,
+		.pm = &sirfsoc_dma_pm_ops,
 		.of_match_table	= sirfsoc_dma_match,
 	},
 };
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 5ab5880..82d2b97 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -2591,6 +2591,9 @@
 	int i;
 
 	sg = kcalloc(periods + 1, sizeof(struct scatterlist), GFP_NOWAIT);
+	if (!sg)
+		return NULL;
+
 	for (i = 0; i < periods; i++) {
 		sg_dma_address(&sg[i]) = dma_addr;
 		sg_dma_len(&sg[i]) = period_len;
@@ -3139,7 +3142,7 @@
 
 static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 {
-	struct stedma40_platform_data *plat_data = pdev->dev.platform_data;
+	struct stedma40_platform_data *plat_data = dev_get_platdata(&pdev->dev);
 	struct clk *clk = NULL;
 	void __iomem *virtbase = NULL;
 	struct resource *res = NULL;
@@ -3226,8 +3229,8 @@
 	num_log_chans = num_phy_chans * D40_MAX_LOG_CHAN_PER_PHY;
 
 	dev_info(&pdev->dev,
-		 "hardware rev: %d @ 0x%x with %d physical and %d logical channels\n",
-		 rev, res->start, num_phy_chans, num_log_chans);
+		 "hardware rev: %d @ %pa with %d physical and %d logical channels\n",
+		 rev, &res->start, num_phy_chans, num_log_chans);
 
 	base = kzalloc(ALIGN(sizeof(struct d40_base), 4) +
 		       (num_phy_chans + num_log_chans + num_memcpy_chans) *
@@ -3485,7 +3488,7 @@
 {
 	struct stedma40_platform_data *pdata;
 	int num_phy = 0, num_memcpy = 0, num_disabled = 0;
-	const const __be32 *list;
+	const __be32 *list;
 
 	pdata = devm_kzalloc(&pdev->dev,
 			     sizeof(struct stedma40_platform_data),
@@ -3516,7 +3519,7 @@
 	list = of_get_property(np, "disabled-channels", &num_disabled);
 	num_disabled /= sizeof(*list);
 
-	if (num_disabled > STEDMA40_MAX_PHYS || num_disabled < 0) {
+	if (num_disabled >= STEDMA40_MAX_PHYS || num_disabled < 0) {
 		d40_err(&pdev->dev,
 			"Invalid number of disabled channels specified (%d)\n",
 			num_disabled);
@@ -3535,7 +3538,7 @@
 
 static int __init d40_probe(struct platform_device *pdev)
 {
-	struct stedma40_platform_data *plat_data = pdev->dev.platform_data;
+	struct stedma40_platform_data *plat_data = dev_get_platdata(&pdev->dev);
 	struct device_node *np = pdev->dev.of_node;
 	int ret = -ENOENT;
 	struct d40_base *base = NULL;
@@ -3579,9 +3582,7 @@
 	if (request_mem_region(res->start, resource_size(res),
 			       D40_NAME " I/O lcpa") == NULL) {
 		ret = -EBUSY;
-		d40_err(&pdev->dev,
-			"Failed to request LCPA region 0x%x-0x%x\n",
-			res->start, res->end);
+		d40_err(&pdev->dev, "Failed to request LCPA region %pR\n", res);
 		goto failure;
 	}
 
@@ -3589,8 +3590,8 @@
 	val = readl(base->virtbase + D40_DREG_LCPA);
 	if (res->start != val && val != 0) {
 		dev_warn(&pdev->dev,
-			 "[%s] Mismatch LCPA dma 0x%x, def 0x%x\n",
-			 __func__, val, res->start);
+			 "[%s] Mismatch LCPA dma 0x%x, def %pa\n",
+			 __func__, val, &res->start);
 	} else
 		writel(res->start, base->virtbase + D40_DREG_LCPA);
 
diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
index f137914..5d4986e 100644
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -767,13 +767,11 @@
 	unsigned long flags;
 	unsigned int residual;
 
-	spin_lock_irqsave(&tdc->lock, flags);
-
 	ret = dma_cookie_status(dc, cookie, txstate);
-	if (ret == DMA_SUCCESS) {
-		spin_unlock_irqrestore(&tdc->lock, flags);
+	if (ret == DMA_SUCCESS)
 		return ret;
-	}
+
+	spin_lock_irqsave(&tdc->lock, flags);
 
 	/* Check on wait_ack desc status */
 	list_for_each_entry(dma_desc, &tdc->free_dma_desc, node) {
diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c
index 0ef43c1..28af214 100644
--- a/drivers/dma/timb_dma.c
+++ b/drivers/dma/timb_dma.c
@@ -669,7 +669,7 @@
 
 static int td_probe(struct platform_device *pdev)
 {
-	struct timb_dma_platform_data *pdata = pdev->dev.platform_data;
+	struct timb_dma_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	struct timb_dma *td;
 	struct resource *iomem;
 	int irq;
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c
index a59fb48..71e8e77 100644
--- a/drivers/dma/txx9dmac.c
+++ b/drivers/dma/txx9dmac.c
@@ -962,15 +962,14 @@
 	enum dma_status ret;
 
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret != DMA_SUCCESS) {
-		spin_lock_bh(&dc->lock);
-		txx9dmac_scan_descriptors(dc);
-		spin_unlock_bh(&dc->lock);
+	if (ret == DMA_SUCCESS)
+		return DMA_SUCCESS;
 
-		ret = dma_cookie_status(chan, cookie, txstate);
-	}
+	spin_lock_bh(&dc->lock);
+	txx9dmac_scan_descriptors(dc);
+	spin_unlock_bh(&dc->lock);
 
-	return ret;
+	return dma_cookie_status(chan, cookie, txstate);
 }
 
 static void txx9dmac_chain_dynamic(struct txx9dmac_chan *dc,
@@ -1118,9 +1117,10 @@
 
 static int __init txx9dmac_chan_probe(struct platform_device *pdev)
 {
-	struct txx9dmac_chan_platform_data *cpdata = pdev->dev.platform_data;
+	struct txx9dmac_chan_platform_data *cpdata =
+			dev_get_platdata(&pdev->dev);
 	struct platform_device *dmac_dev = cpdata->dmac_dev;
-	struct txx9dmac_platform_data *pdata = dmac_dev->dev.platform_data;
+	struct txx9dmac_platform_data *pdata = dev_get_platdata(&dmac_dev->dev);
 	struct txx9dmac_chan *dc;
 	int err;
 	int ch = pdev->id % TXX9_DMA_MAX_NR_CHANNELS;
@@ -1203,7 +1203,7 @@
 
 static int __init txx9dmac_probe(struct platform_device *pdev)
 {
-	struct txx9dmac_platform_data *pdata = pdev->dev.platform_data;
+	struct txx9dmac_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	struct resource *io;
 	struct txx9dmac_dev *ddev;
 	u32 mcr;
@@ -1282,7 +1282,7 @@
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct txx9dmac_dev *ddev = platform_get_drvdata(pdev);
-	struct txx9dmac_platform_data *pdata = pdev->dev.platform_data;
+	struct txx9dmac_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	u32 mcr;
 
 	mcr = TXX9_DMA_MCR_MSTEN | MCR_LE;
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index ac1b43a..d7d5c8a 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -486,7 +486,7 @@
 static int add_client_resource(struct client *client,
 			       struct client_resource *resource, gfp_t gfp_mask)
 {
-	bool preload = gfp_mask & __GFP_WAIT;
+	bool preload = !!(gfp_mask & __GFP_WAIT);
 	unsigned long flags;
 	int ret;
 
diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index 28a94c7..e5af0e3 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -1262,8 +1262,7 @@
 {
 	int ret;
 
-	fw_workqueue = alloc_workqueue("firewire",
-				       WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
+	fw_workqueue = alloc_workqueue("firewire", WQ_MEM_RECLAIM, 0);
 	if (!fw_workqueue)
 		return -ENOMEM;
 
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index afb701e..6aa8a86 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -235,13 +235,15 @@
 	dma_addr_t next_config_rom_bus;
 	__be32     next_header;
 
-	__le32    *self_id_cpu;
+	__le32    *self_id;
 	dma_addr_t self_id_bus;
 	struct work_struct bus_reset_work;
 
 	u32 self_id_buffer[512];
 };
 
+static struct workqueue_struct *selfid_workqueue;
+
 static inline struct fw_ohci *fw_ohci(struct fw_card *card)
 {
 	return container_of(card, struct fw_ohci, card);
@@ -271,6 +273,7 @@
 
 static char ohci_driver_name[] = KBUILD_MODNAME;
 
+#define PCI_VENDOR_ID_PINNACLE_SYSTEMS	0x11bd
 #define PCI_DEVICE_ID_AGERE_FW643	0x5901
 #define PCI_DEVICE_ID_CREATIVE_SB1394	0x4001
 #define PCI_DEVICE_ID_JMICRON_JMB38X_FW	0x2380
@@ -278,17 +281,16 @@
 #define PCI_DEVICE_ID_TI_TSB12LV26	0x8020
 #define PCI_DEVICE_ID_TI_TSB82AA2	0x8025
 #define PCI_DEVICE_ID_VIA_VT630X	0x3044
-#define PCI_VENDOR_ID_PINNACLE_SYSTEMS	0x11bd
 #define PCI_REV_ID_VIA_VT6306		0x46
 
-#define QUIRK_CYCLE_TIMER		1
-#define QUIRK_RESET_PACKET		2
-#define QUIRK_BE_HEADERS		4
-#define QUIRK_NO_1394A			8
-#define QUIRK_NO_MSI			16
-#define QUIRK_TI_SLLZ059		32
-#define QUIRK_IR_WAKE			64
-#define QUIRK_PHY_LCTRL_TIMEOUT		128
+#define QUIRK_CYCLE_TIMER		0x1
+#define QUIRK_RESET_PACKET		0x2
+#define QUIRK_BE_HEADERS		0x4
+#define QUIRK_NO_1394A			0x8
+#define QUIRK_NO_MSI			0x10
+#define QUIRK_TI_SLLZ059		0x20
+#define QUIRK_IR_WAKE			0x40
+#define QUIRK_PHY_LCTRL_TIMEOUT		0x80
 
 /* In case of multiple matches in ohci_quirks[], only the first one is used. */
 static const struct {
@@ -1929,12 +1931,12 @@
 		return;
 	}
 
-	generation = (cond_le32_to_cpu(ohci->self_id_cpu[0]) >> 16) & 0xff;
+	generation = (cond_le32_to_cpu(ohci->self_id[0]) >> 16) & 0xff;
 	rmb();
 
 	for (i = 1, j = 0; j < self_id_count; i += 2, j++) {
-		u32 id  = cond_le32_to_cpu(ohci->self_id_cpu[i]);
-		u32 id2 = cond_le32_to_cpu(ohci->self_id_cpu[i + 1]);
+		u32 id  = cond_le32_to_cpu(ohci->self_id[i]);
+		u32 id2 = cond_le32_to_cpu(ohci->self_id[i + 1]);
 
 		if (id != ~id2) {
 			/*
@@ -2087,7 +2089,7 @@
 	log_irqs(ohci, event);
 
 	if (event & OHCI1394_selfIDComplete)
-		queue_work(fw_workqueue, &ohci->bus_reset_work);
+		queue_work(selfid_workqueue, &ohci->bus_reset_work);
 
 	if (event & OHCI1394_RQPkt)
 		tasklet_schedule(&ohci->ar_request_ctx.tasklet);
@@ -3692,7 +3694,7 @@
 		goto fail_contexts;
 	}
 
-	ohci->self_id_cpu = ohci->misc_buffer     + PAGE_SIZE/2;
+	ohci->self_id     = ohci->misc_buffer     + PAGE_SIZE/2;
 	ohci->self_id_bus = ohci->misc_buffer_bus + PAGE_SIZE/2;
 
 	bus_options = reg_read(ohci, OHCI1394_BusOptions);
@@ -3870,7 +3872,23 @@
 #endif
 };
 
-module_pci_driver(fw_ohci_pci_driver);
+static int __init fw_ohci_init(void)
+{
+	selfid_workqueue = alloc_workqueue(KBUILD_MODNAME, WQ_MEM_RECLAIM, 0);
+	if (!selfid_workqueue)
+		return -ENOMEM;
+
+	return pci_register_driver(&fw_ohci_pci_driver);
+}
+
+static void __exit fw_ohci_cleanup(void)
+{
+	pci_unregister_driver(&fw_ohci_pci_driver);
+	destroy_workqueue(selfid_workqueue);
+}
+
+module_init(fw_ohci_init);
+module_exit(fw_ohci_cleanup);
 
 MODULE_AUTHOR("Kristian Hoegsberg <krh@bitplanet.net>");
 MODULE_DESCRIPTION("Driver for PCI OHCI IEEE1394 controllers");
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index ba9876f..0dfaf20 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -195,8 +195,8 @@
 		return;
 
 	for (;; index++) {
-		ret = of_parse_phandle_with_args(np, "gpio-ranges",
-				"#gpio-range-cells", index, &pinspec);
+		ret = of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3,
+				index, &pinspec);
 		if (ret)
 			break;
 
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 1fea003..3792a1a 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -30,6 +30,11 @@
 	  The maximum number of VICs available in the system, for
 	  power management.
 
+config IMGPDC_IRQ
+	bool
+	select GENERIC_IRQ_CHIP
+	select IRQ_DOMAIN
+
 config ORION_IRQCHIP
 	bool
 	select IRQ_DOMAIN
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index e65c41a..c60b901 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -2,6 +2,7 @@
 
 obj-$(CONFIG_ARCH_BCM2835)		+= irq-bcm2835.o
 obj-$(CONFIG_ARCH_EXYNOS)		+= exynos-combiner.o
+obj-$(CONFIG_ARCH_MMP)			+= irq-mmp.o
 obj-$(CONFIG_ARCH_MVEBU)		+= irq-armada-370-xp.o
 obj-$(CONFIG_ARCH_MXS)			+= irq-mxs.o
 obj-$(CONFIG_ARCH_S3C24XX)		+= irq-s3c24xx.o
@@ -14,6 +15,7 @@
 obj-$(CONFIG_ARM_GIC)			+= irq-gic.o
 obj-$(CONFIG_ARM_NVIC)			+= irq-nvic.o
 obj-$(CONFIG_ARM_VIC)			+= irq-vic.o
+obj-$(CONFIG_IMGPDC_IRQ)		+= irq-imgpdc.o
 obj-$(CONFIG_SIRF_IRQ)			+= irq-sirfsoc.o
 obj-$(CONFIG_RENESAS_INTC_IRQPIN)	+= irq-renesas-intc-irqpin.o
 obj-$(CONFIG_RENESAS_IRQC)		+= irq-renesas-irqc.o
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index ee7c503..d0e9480 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -453,6 +453,12 @@
 	writel_relaxed(1, base + GIC_CPU_CTRL);
 }
 
+void gic_cpu_if_down(void)
+{
+	void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]);
+	writel_relaxed(0, cpu_base + GIC_CPU_CTRL);
+}
+
 #ifdef CONFIG_CPU_PM
 /*
  * Saves the GIC distributor registers during suspend or idle.  Must be called
diff --git a/drivers/irqchip/irq-imgpdc.c b/drivers/irqchip/irq-imgpdc.c
new file mode 100644
index 0000000..8071c2e
--- /dev/null
+++ b/drivers/irqchip/irq-imgpdc.c
@@ -0,0 +1,499 @@
+/*
+ * IMG PowerDown Controller (PDC)
+ *
+ * Copyright 2010-2013 Imagination Technologies Ltd.
+ *
+ * Exposes the syswake and PDC peripheral wake interrupts to the system.
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+
+/* PDC interrupt register numbers */
+
+#define PDC_IRQ_STATUS			0x310
+#define PDC_IRQ_ENABLE			0x314
+#define PDC_IRQ_CLEAR			0x318
+#define PDC_IRQ_ROUTE			0x31c
+#define PDC_SYS_WAKE_BASE		0x330
+#define PDC_SYS_WAKE_STRIDE		0x8
+#define PDC_SYS_WAKE_CONFIG_BASE	0x334
+#define PDC_SYS_WAKE_CONFIG_STRIDE	0x8
+
+/* PDC interrupt register field masks */
+
+#define PDC_IRQ_SYS3			0x08
+#define PDC_IRQ_SYS2			0x04
+#define PDC_IRQ_SYS1			0x02
+#define PDC_IRQ_SYS0			0x01
+#define PDC_IRQ_ROUTE_WU_EN_SYS3	0x08000000
+#define PDC_IRQ_ROUTE_WU_EN_SYS2	0x04000000
+#define PDC_IRQ_ROUTE_WU_EN_SYS1	0x02000000
+#define PDC_IRQ_ROUTE_WU_EN_SYS0	0x01000000
+#define PDC_IRQ_ROUTE_WU_EN_WD		0x00040000
+#define PDC_IRQ_ROUTE_WU_EN_IR		0x00020000
+#define PDC_IRQ_ROUTE_WU_EN_RTC		0x00010000
+#define PDC_IRQ_ROUTE_EXT_EN_SYS3	0x00000800
+#define PDC_IRQ_ROUTE_EXT_EN_SYS2	0x00000400
+#define PDC_IRQ_ROUTE_EXT_EN_SYS1	0x00000200
+#define PDC_IRQ_ROUTE_EXT_EN_SYS0	0x00000100
+#define PDC_IRQ_ROUTE_EXT_EN_WD		0x00000004
+#define PDC_IRQ_ROUTE_EXT_EN_IR		0x00000002
+#define PDC_IRQ_ROUTE_EXT_EN_RTC	0x00000001
+#define PDC_SYS_WAKE_RESET		0x00000010
+#define PDC_SYS_WAKE_INT_MODE		0x0000000e
+#define PDC_SYS_WAKE_INT_MODE_SHIFT	1
+#define PDC_SYS_WAKE_PIN_VAL		0x00000001
+
+/* PDC interrupt constants */
+
+#define PDC_SYS_WAKE_INT_LOW		0x0
+#define PDC_SYS_WAKE_INT_HIGH		0x1
+#define PDC_SYS_WAKE_INT_DOWN		0x2
+#define PDC_SYS_WAKE_INT_UP		0x3
+#define PDC_SYS_WAKE_INT_CHANGE		0x6
+#define PDC_SYS_WAKE_INT_NONE		0x4
+
+/**
+ * struct pdc_intc_priv - private pdc interrupt data.
+ * @nr_perips:		Number of peripheral interrupt signals.
+ * @nr_syswakes:	Number of syswake signals.
+ * @perip_irqs:		List of peripheral IRQ numbers handled.
+ * @syswake_irq:	Shared PDC syswake IRQ number.
+ * @domain:		IRQ domain for PDC peripheral and syswake IRQs.
+ * @pdc_base:		Base of PDC registers.
+ * @irq_route:		Cached version of PDC_IRQ_ROUTE register.
+ * @lock:		Lock to protect the PDC syswake registers and the cached
+ *			values of those registers in this struct.
+ */
+struct pdc_intc_priv {
+	unsigned int		nr_perips;
+	unsigned int		nr_syswakes;
+	unsigned int		*perip_irqs;
+	unsigned int		syswake_irq;
+	struct irq_domain	*domain;
+	void __iomem		*pdc_base;
+
+	u32			irq_route;
+	raw_spinlock_t		lock;
+};
+
+static void pdc_write(struct pdc_intc_priv *priv, unsigned int reg_offs,
+		      unsigned int data)
+{
+	iowrite32(data, priv->pdc_base + reg_offs);
+}
+
+static unsigned int pdc_read(struct pdc_intc_priv *priv,
+			     unsigned int reg_offs)
+{
+	return ioread32(priv->pdc_base + reg_offs);
+}
+
+/* Generic IRQ callbacks */
+
+#define SYS0_HWIRQ	8
+
+static unsigned int hwirq_is_syswake(irq_hw_number_t hw)
+{
+	return hw >= SYS0_HWIRQ;
+}
+
+static unsigned int hwirq_to_syswake(irq_hw_number_t hw)
+{
+	return hw - SYS0_HWIRQ;
+}
+
+static irq_hw_number_t syswake_to_hwirq(unsigned int syswake)
+{
+	return SYS0_HWIRQ + syswake;
+}
+
+static struct pdc_intc_priv *irqd_to_priv(struct irq_data *data)
+{
+	return (struct pdc_intc_priv *)data->domain->host_data;
+}
+
+/*
+ * perip_irq_mask() and perip_irq_unmask() use IRQ_ROUTE which also contains
+ * wake bits, therefore we cannot use the generic irqchip mask callbacks as they
+ * cache the mask.
+ */
+
+static void perip_irq_mask(struct irq_data *data)
+{
+	struct pdc_intc_priv *priv = irqd_to_priv(data);
+
+	raw_spin_lock(&priv->lock);
+	priv->irq_route &= ~data->mask;
+	pdc_write(priv, PDC_IRQ_ROUTE, priv->irq_route);
+	raw_spin_unlock(&priv->lock);
+}
+
+static void perip_irq_unmask(struct irq_data *data)
+{
+	struct pdc_intc_priv *priv = irqd_to_priv(data);
+
+	raw_spin_lock(&priv->lock);
+	priv->irq_route |= data->mask;
+	pdc_write(priv, PDC_IRQ_ROUTE, priv->irq_route);
+	raw_spin_unlock(&priv->lock);
+}
+
+static int syswake_irq_set_type(struct irq_data *data, unsigned int flow_type)
+{
+	struct pdc_intc_priv *priv = irqd_to_priv(data);
+	unsigned int syswake = hwirq_to_syswake(data->hwirq);
+	unsigned int irq_mode;
+	unsigned int soc_sys_wake_regoff, soc_sys_wake;
+
+	/* translate to syswake IRQ mode */
+	switch (flow_type) {
+	case IRQ_TYPE_EDGE_BOTH:
+		irq_mode = PDC_SYS_WAKE_INT_CHANGE;
+		break;
+	case IRQ_TYPE_EDGE_RISING:
+		irq_mode = PDC_SYS_WAKE_INT_UP;
+		break;
+	case IRQ_TYPE_EDGE_FALLING:
+		irq_mode = PDC_SYS_WAKE_INT_DOWN;
+		break;
+	case IRQ_TYPE_LEVEL_HIGH:
+		irq_mode = PDC_SYS_WAKE_INT_HIGH;
+		break;
+	case IRQ_TYPE_LEVEL_LOW:
+		irq_mode = PDC_SYS_WAKE_INT_LOW;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	raw_spin_lock(&priv->lock);
+
+	/* set the IRQ mode */
+	soc_sys_wake_regoff = PDC_SYS_WAKE_BASE + syswake*PDC_SYS_WAKE_STRIDE;
+	soc_sys_wake = pdc_read(priv, soc_sys_wake_regoff);
+	soc_sys_wake &= ~PDC_SYS_WAKE_INT_MODE;
+	soc_sys_wake |= irq_mode << PDC_SYS_WAKE_INT_MODE_SHIFT;
+	pdc_write(priv, soc_sys_wake_regoff, soc_sys_wake);
+
+	/* and update the handler */
+	irq_setup_alt_chip(data, flow_type);
+
+	raw_spin_unlock(&priv->lock);
+
+	return 0;
+}
+
+/* applies to both peripheral and syswake interrupts */
+static int pdc_irq_set_wake(struct irq_data *data, unsigned int on)
+{
+	struct pdc_intc_priv *priv = irqd_to_priv(data);
+	irq_hw_number_t hw = data->hwirq;
+	unsigned int mask = (1 << 16) << hw;
+	unsigned int dst_irq;
+
+	raw_spin_lock(&priv->lock);
+	if (on)
+		priv->irq_route |= mask;
+	else
+		priv->irq_route &= ~mask;
+	pdc_write(priv, PDC_IRQ_ROUTE, priv->irq_route);
+	raw_spin_unlock(&priv->lock);
+
+	/* control the destination IRQ wakeup too for standby mode */
+	if (hwirq_is_syswake(hw))
+		dst_irq = priv->syswake_irq;
+	else
+		dst_irq = priv->perip_irqs[hw];
+	irq_set_irq_wake(dst_irq, on);
+
+	return 0;
+}
+
+static void pdc_intc_perip_isr(unsigned int irq, struct irq_desc *desc)
+{
+	struct pdc_intc_priv *priv;
+	unsigned int i, irq_no;
+
+	priv = (struct pdc_intc_priv *)irq_desc_get_handler_data(desc);
+
+	/* find the peripheral number */
+	for (i = 0; i < priv->nr_perips; ++i)
+		if (irq == priv->perip_irqs[i])
+			goto found;
+
+	/* should never get here */
+	return;
+found:
+
+	/* pass on the interrupt */
+	irq_no = irq_linear_revmap(priv->domain, i);
+	generic_handle_irq(irq_no);
+}
+
+static void pdc_intc_syswake_isr(unsigned int irq, struct irq_desc *desc)
+{
+	struct pdc_intc_priv *priv;
+	unsigned int syswake, irq_no;
+	unsigned int status;
+
+	priv = (struct pdc_intc_priv *)irq_desc_get_handler_data(desc);
+
+	status = pdc_read(priv, PDC_IRQ_STATUS) &
+		 pdc_read(priv, PDC_IRQ_ENABLE);
+	status &= (1 << priv->nr_syswakes) - 1;
+
+	for (syswake = 0; status; status >>= 1, ++syswake) {
+		/* Has this sys_wake triggered? */
+		if (!(status & 1))
+			continue;
+
+		irq_no = irq_linear_revmap(priv->domain,
+					   syswake_to_hwirq(syswake));
+		generic_handle_irq(irq_no);
+	}
+}
+
+static void pdc_intc_setup(struct pdc_intc_priv *priv)
+{
+	int i;
+	unsigned int soc_sys_wake_regoff;
+	unsigned int soc_sys_wake;
+
+	/*
+	 * Mask all syswake interrupts before routing, or we could receive an
+	 * interrupt before we're ready to handle it.
+	 */
+	pdc_write(priv, PDC_IRQ_ENABLE, 0);
+
+	/*
+	 * Enable routing of all syswakes
+	 * Disable all wake sources
+	 */
+	priv->irq_route = ((PDC_IRQ_ROUTE_EXT_EN_SYS0 << priv->nr_syswakes) -
+				PDC_IRQ_ROUTE_EXT_EN_SYS0);
+	pdc_write(priv, PDC_IRQ_ROUTE, priv->irq_route);
+
+	/* Initialise syswake IRQ */
+	for (i = 0; i < priv->nr_syswakes; ++i) {
+		/* set the IRQ mode to none */
+		soc_sys_wake_regoff = PDC_SYS_WAKE_BASE + i*PDC_SYS_WAKE_STRIDE;
+		soc_sys_wake = PDC_SYS_WAKE_INT_NONE
+				<< PDC_SYS_WAKE_INT_MODE_SHIFT;
+		pdc_write(priv, soc_sys_wake_regoff, soc_sys_wake);
+	}
+}
+
+static int pdc_intc_probe(struct platform_device *pdev)
+{
+	struct pdc_intc_priv *priv;
+	struct device_node *node = pdev->dev.of_node;
+	struct resource *res_regs;
+	struct irq_chip_generic *gc;
+	unsigned int i;
+	int irq, ret;
+	u32 val;
+
+	if (!node)
+		return -ENOENT;
+
+	/* Get registers */
+	res_regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (res_regs == NULL) {
+		dev_err(&pdev->dev, "cannot find registers resource\n");
+		return -ENOENT;
+	}
+
+	/* Allocate driver data */
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		dev_err(&pdev->dev, "cannot allocate device data\n");
+		return -ENOMEM;
+	}
+	raw_spin_lock_init(&priv->lock);
+	platform_set_drvdata(pdev, priv);
+
+	/* Ioremap the registers */
+	priv->pdc_base = devm_ioremap(&pdev->dev, res_regs->start,
+				      res_regs->end - res_regs->start);
+	if (!priv->pdc_base)
+		return -EIO;
+
+	/* Get number of peripherals */
+	ret = of_property_read_u32(node, "num-perips", &val);
+	if (ret) {
+		dev_err(&pdev->dev, "No num-perips node property found\n");
+		return -EINVAL;
+	}
+	if (val > SYS0_HWIRQ) {
+		dev_err(&pdev->dev, "num-perips (%u) out of range\n", val);
+		return -EINVAL;
+	}
+	priv->nr_perips = val;
+
+	/* Get number of syswakes */
+	ret = of_property_read_u32(node, "num-syswakes", &val);
+	if (ret) {
+		dev_err(&pdev->dev, "No num-syswakes node property found\n");
+		return -EINVAL;
+	}
+	if (val > SYS0_HWIRQ) {
+		dev_err(&pdev->dev, "num-syswakes (%u) out of range\n", val);
+		return -EINVAL;
+	}
+	priv->nr_syswakes = val;
+
+	/* Get peripheral IRQ numbers */
+	priv->perip_irqs = devm_kzalloc(&pdev->dev, 4 * priv->nr_perips,
+					GFP_KERNEL);
+	if (!priv->perip_irqs) {
+		dev_err(&pdev->dev, "cannot allocate perip IRQ list\n");
+		return -ENOMEM;
+	}
+	for (i = 0; i < priv->nr_perips; ++i) {
+		irq = platform_get_irq(pdev, 1 + i);
+		if (irq < 0) {
+			dev_err(&pdev->dev, "cannot find perip IRQ #%u\n", i);
+			return irq;
+		}
+		priv->perip_irqs[i] = irq;
+	}
+	/* check if too many were provided */
+	if (platform_get_irq(pdev, 1 + i) >= 0) {
+		dev_err(&pdev->dev, "surplus perip IRQs detected\n");
+		return -EINVAL;
+	}
+
+	/* Get syswake IRQ number */
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "cannot find syswake IRQ\n");
+		return irq;
+	}
+	priv->syswake_irq = irq;
+
+	/* Set up an IRQ domain */
+	priv->domain = irq_domain_add_linear(node, 16, &irq_generic_chip_ops,
+					     priv);
+	if (unlikely(!priv->domain)) {
+		dev_err(&pdev->dev, "cannot add IRQ domain\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * Set up 2 generic irq chips with 2 chip types.
+	 * The first one for peripheral irqs (only 1 chip type used)
+	 * The second one for syswake irqs (edge and level chip types)
+	 */
+	ret = irq_alloc_domain_generic_chips(priv->domain, 8, 2, "pdc",
+					     handle_level_irq, 0, 0,
+					     IRQ_GC_INIT_NESTED_LOCK);
+	if (ret)
+		goto err_generic;
+
+	/* peripheral interrupt chip */
+
+	gc = irq_get_domain_generic_chip(priv->domain, 0);
+	gc->unused	= ~(BIT(priv->nr_perips) - 1);
+	gc->reg_base	= priv->pdc_base;
+	/*
+	 * IRQ_ROUTE contains wake bits, so we can't use the generic versions as
+	 * they cache the mask
+	 */
+	gc->chip_types[0].regs.mask		= PDC_IRQ_ROUTE;
+	gc->chip_types[0].chip.irq_mask		= perip_irq_mask;
+	gc->chip_types[0].chip.irq_unmask	= perip_irq_unmask;
+	gc->chip_types[0].chip.irq_set_wake	= pdc_irq_set_wake;
+
+	/* syswake interrupt chip */
+
+	gc = irq_get_domain_generic_chip(priv->domain, 8);
+	gc->unused	= ~(BIT(priv->nr_syswakes) - 1);
+	gc->reg_base	= priv->pdc_base;
+
+	/* edge interrupts */
+	gc->chip_types[0].type			= IRQ_TYPE_EDGE_BOTH;
+	gc->chip_types[0].handler		= handle_edge_irq;
+	gc->chip_types[0].regs.ack		= PDC_IRQ_CLEAR;
+	gc->chip_types[0].regs.mask		= PDC_IRQ_ENABLE;
+	gc->chip_types[0].chip.irq_ack		= irq_gc_ack_set_bit;
+	gc->chip_types[0].chip.irq_mask		= irq_gc_mask_clr_bit;
+	gc->chip_types[0].chip.irq_unmask	= irq_gc_mask_set_bit;
+	gc->chip_types[0].chip.irq_set_type	= syswake_irq_set_type;
+	gc->chip_types[0].chip.irq_set_wake	= pdc_irq_set_wake;
+	/* for standby we pass on to the shared syswake IRQ */
+	gc->chip_types[0].chip.flags		= IRQCHIP_MASK_ON_SUSPEND;
+
+	/* level interrupts */
+	gc->chip_types[1].type			= IRQ_TYPE_LEVEL_MASK;
+	gc->chip_types[1].handler		= handle_level_irq;
+	gc->chip_types[1].regs.ack		= PDC_IRQ_CLEAR;
+	gc->chip_types[1].regs.mask		= PDC_IRQ_ENABLE;
+	gc->chip_types[1].chip.irq_ack		= irq_gc_ack_set_bit;
+	gc->chip_types[1].chip.irq_mask		= irq_gc_mask_clr_bit;
+	gc->chip_types[1].chip.irq_unmask	= irq_gc_mask_set_bit;
+	gc->chip_types[1].chip.irq_set_type	= syswake_irq_set_type;
+	gc->chip_types[1].chip.irq_set_wake	= pdc_irq_set_wake;
+	/* for standby we pass on to the shared syswake IRQ */
+	gc->chip_types[1].chip.flags		= IRQCHIP_MASK_ON_SUSPEND;
+
+	/* Set up the hardware to enable interrupt routing */
+	pdc_intc_setup(priv);
+
+	/* Setup chained handlers for the peripheral IRQs */
+	for (i = 0; i < priv->nr_perips; ++i) {
+		irq = priv->perip_irqs[i];
+		irq_set_handler_data(irq, priv);
+		irq_set_chained_handler(irq, pdc_intc_perip_isr);
+	}
+
+	/* Setup chained handler for the syswake IRQ */
+	irq_set_handler_data(priv->syswake_irq, priv);
+	irq_set_chained_handler(priv->syswake_irq, pdc_intc_syswake_isr);
+
+	dev_info(&pdev->dev,
+		 "PDC IRQ controller initialised (%u perip IRQs, %u syswake IRQs)\n",
+		 priv->nr_perips,
+		 priv->nr_syswakes);
+
+	return 0;
+err_generic:
+	irq_domain_remove(priv->domain);
+	return ret;
+}
+
+static int pdc_intc_remove(struct platform_device *pdev)
+{
+	struct pdc_intc_priv *priv = platform_get_drvdata(pdev);
+
+	irq_domain_remove(priv->domain);
+	return 0;
+}
+
+static const struct of_device_id pdc_intc_match[] = {
+	{ .compatible = "img,pdc-intc" },
+	{}
+};
+
+static struct platform_driver pdc_intc_driver = {
+	.driver = {
+		.name		= "pdc-intc",
+		.of_match_table	= pdc_intc_match,
+	},
+	.probe = pdc_intc_probe,
+	.remove = pdc_intc_remove,
+};
+
+static int __init pdc_intc_init(void)
+{
+	return platform_driver_register(&pdc_intc_driver);
+}
+core_initcall(pdc_intc_init);
diff --git a/arch/arm/mach-mmp/irq.c b/drivers/irqchip/irq-mmp.c
similarity index 62%
rename from arch/arm/mach-mmp/irq.c
rename to drivers/irqchip/irq-mmp.c
index 3c71246..2cb7cd0 100644
--- a/arch/arm/mach-mmp/irq.c
+++ b/drivers/irqchip/irq-mmp.c
@@ -21,19 +21,20 @@
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 
-#include <mach/irqs.h>
+#include <asm/exception.h>
+#include <asm/mach/irq.h>
 
-#ifdef CONFIG_CPU_MMP2
-#include <mach/pm-mmp2.h>
-#endif
-#ifdef CONFIG_CPU_PXA910
-#include <mach/pm-pxa910.h>
-#endif
-
-#include "common.h"
+#include "irqchip.h"
 
 #define MAX_ICU_NR		16
 
+#define PJ1_INT_SEL		0x10c
+#define PJ4_INT_SEL		0x104
+
+/* bit fields in PJ1_INT_SEL and PJ4_INT_SEL */
+#define SEL_INT_PENDING		(1 << 6)
+#define SEL_INT_NUM_MASK	0x3f
+
 struct icu_chip_data {
 	int			nr_irqs;
 	unsigned int		virq_base;
@@ -54,7 +55,7 @@
 	unsigned int	conf_mask;
 };
 
-void __iomem *mmp_icu_base;
+static void __iomem *mmp_icu_base;
 static struct icu_chip_data icu_data[MAX_ICU_NR];
 static int max_icu_nr;
 
@@ -122,7 +123,7 @@
 	}
 }
 
-static struct irq_chip icu_irq_chip = {
+struct irq_chip icu_irq_chip = {
 	.name		= "icu_irq",
 	.irq_mask	= icu_mask_irq,
 	.irq_mask_ack	= icu_mask_ack_irq,
@@ -193,6 +194,32 @@
 	.conf_mask	= 0x7f,
 };
 
+static asmlinkage void __exception_irq_entry
+mmp_handle_irq(struct pt_regs *regs)
+{
+	int irq, hwirq;
+
+	hwirq = readl_relaxed(mmp_icu_base + PJ1_INT_SEL);
+	if (!(hwirq & SEL_INT_PENDING))
+		return;
+	hwirq &= SEL_INT_NUM_MASK;
+	irq = irq_find_mapping(icu_data[0].domain, hwirq);
+	handle_IRQ(irq, regs);
+}
+
+static asmlinkage void __exception_irq_entry
+mmp2_handle_irq(struct pt_regs *regs)
+{
+	int irq, hwirq;
+
+	hwirq = readl_relaxed(mmp_icu_base + PJ4_INT_SEL);
+	if (!(hwirq & SEL_INT_PENDING))
+		return;
+	hwirq &= SEL_INT_NUM_MASK;
+	irq = irq_find_mapping(icu_data[0].domain, hwirq);
+	handle_IRQ(irq, regs);
+}
+
 /* MMP (ARMv5) */
 void __init icu_init_irq(void)
 {
@@ -214,15 +241,13 @@
 		set_irq_flags(irq, IRQF_VALID);
 	}
 	irq_set_default_host(icu_data[0].domain);
-#ifdef CONFIG_CPU_PXA910
-	icu_irq_chip.irq_set_wake = pxa910_set_wake;
-#endif
+	set_handle_irq(mmp_handle_irq);
 }
 
 /* MMP2 (ARMv7) */
 void __init mmp2_init_icu(void)
 {
-	int irq;
+	int irq, end;
 
 	max_icu_nr = 8;
 	mmp_icu_base = ioremap(0xd4282000, 0x1000);
@@ -236,11 +261,12 @@
 						   &icu_data[0]);
 	icu_data[1].reg_status = mmp_icu_base + 0x150;
 	icu_data[1].reg_mask = mmp_icu_base + 0x168;
-	icu_data[1].clr_mfp_irq_base = IRQ_MMP2_PMIC_BASE;
-	icu_data[1].clr_mfp_hwirq = IRQ_MMP2_PMIC - IRQ_MMP2_PMIC_BASE;
+	icu_data[1].clr_mfp_irq_base = icu_data[0].virq_base +
+				icu_data[0].nr_irqs;
+	icu_data[1].clr_mfp_hwirq = 1;		/* offset to IRQ_MMP2_PMIC_BASE */
 	icu_data[1].nr_irqs = 2;
 	icu_data[1].cascade_irq = 4;
-	icu_data[1].virq_base = IRQ_MMP2_PMIC_BASE;
+	icu_data[1].virq_base = icu_data[0].virq_base + icu_data[0].nr_irqs;
 	icu_data[1].domain = irq_domain_add_legacy(NULL, icu_data[1].nr_irqs,
 						   icu_data[1].virq_base, 0,
 						   &irq_domain_simple_ops,
@@ -249,7 +275,7 @@
 	icu_data[2].reg_mask = mmp_icu_base + 0x16c;
 	icu_data[2].nr_irqs = 2;
 	icu_data[2].cascade_irq = 5;
-	icu_data[2].virq_base = IRQ_MMP2_RTC_BASE;
+	icu_data[2].virq_base = icu_data[1].virq_base + icu_data[1].nr_irqs;
 	icu_data[2].domain = irq_domain_add_legacy(NULL, icu_data[2].nr_irqs,
 						   icu_data[2].virq_base, 0,
 						   &irq_domain_simple_ops,
@@ -258,7 +284,7 @@
 	icu_data[3].reg_mask = mmp_icu_base + 0x17c;
 	icu_data[3].nr_irqs = 3;
 	icu_data[3].cascade_irq = 9;
-	icu_data[3].virq_base = IRQ_MMP2_KEYPAD_BASE;
+	icu_data[3].virq_base = icu_data[2].virq_base + icu_data[2].nr_irqs;
 	icu_data[3].domain = irq_domain_add_legacy(NULL, icu_data[3].nr_irqs,
 						   icu_data[3].virq_base, 0,
 						   &irq_domain_simple_ops,
@@ -267,7 +293,7 @@
 	icu_data[4].reg_mask = mmp_icu_base + 0x170;
 	icu_data[4].nr_irqs = 5;
 	icu_data[4].cascade_irq = 17;
-	icu_data[4].virq_base = IRQ_MMP2_TWSI_BASE;
+	icu_data[4].virq_base = icu_data[3].virq_base + icu_data[3].nr_irqs;
 	icu_data[4].domain = irq_domain_add_legacy(NULL, icu_data[4].nr_irqs,
 						   icu_data[4].virq_base, 0,
 						   &irq_domain_simple_ops,
@@ -276,7 +302,7 @@
 	icu_data[5].reg_mask = mmp_icu_base + 0x174;
 	icu_data[5].nr_irqs = 15;
 	icu_data[5].cascade_irq = 35;
-	icu_data[5].virq_base = IRQ_MMP2_MISC_BASE;
+	icu_data[5].virq_base = icu_data[4].virq_base + icu_data[4].nr_irqs;
 	icu_data[5].domain = irq_domain_add_legacy(NULL, icu_data[5].nr_irqs,
 						   icu_data[5].virq_base, 0,
 						   &irq_domain_simple_ops,
@@ -285,7 +311,7 @@
 	icu_data[6].reg_mask = mmp_icu_base + 0x178;
 	icu_data[6].nr_irqs = 2;
 	icu_data[6].cascade_irq = 51;
-	icu_data[6].virq_base = IRQ_MMP2_MIPI_HSI1_BASE;
+	icu_data[6].virq_base = icu_data[5].virq_base + icu_data[5].nr_irqs;
 	icu_data[6].domain = irq_domain_add_legacy(NULL, icu_data[6].nr_irqs,
 						   icu_data[6].virq_base, 0,
 						   &irq_domain_simple_ops,
@@ -294,170 +320,176 @@
 	icu_data[7].reg_mask = mmp_icu_base + 0x184;
 	icu_data[7].nr_irqs = 2;
 	icu_data[7].cascade_irq = 55;
-	icu_data[7].virq_base = IRQ_MMP2_MIPI_HSI0_BASE;
+	icu_data[7].virq_base = icu_data[6].virq_base + icu_data[6].nr_irqs;
 	icu_data[7].domain = irq_domain_add_legacy(NULL, icu_data[7].nr_irqs,
 						   icu_data[7].virq_base, 0,
 						   &irq_domain_simple_ops,
 						   &icu_data[7]);
-	for (irq = 0; irq < IRQ_MMP2_MUX_END; irq++) {
+	end = icu_data[7].virq_base + icu_data[7].nr_irqs;
+	for (irq = 0; irq < end; irq++) {
 		icu_mask_irq(irq_get_irq_data(irq));
-		switch (irq) {
-		case IRQ_MMP2_PMIC_MUX:
-		case IRQ_MMP2_RTC_MUX:
-		case IRQ_MMP2_KEYPAD_MUX:
-		case IRQ_MMP2_TWSI_MUX:
-		case IRQ_MMP2_MISC_MUX:
-		case IRQ_MMP2_MIPI_HSI1_MUX:
-		case IRQ_MMP2_MIPI_HSI0_MUX:
+		if (irq == icu_data[1].cascade_irq ||
+		    irq == icu_data[2].cascade_irq ||
+		    irq == icu_data[3].cascade_irq ||
+		    irq == icu_data[4].cascade_irq ||
+		    irq == icu_data[5].cascade_irq ||
+		    irq == icu_data[6].cascade_irq ||
+		    irq == icu_data[7].cascade_irq) {
 			irq_set_chip(irq, &icu_irq_chip);
 			irq_set_chained_handler(irq, icu_mux_irq_demux);
-			break;
-		default:
+		} else {
 			irq_set_chip_and_handler(irq, &icu_irq_chip,
 						 handle_level_irq);
-			break;
 		}
 		set_irq_flags(irq, IRQF_VALID);
 	}
 	irq_set_default_host(icu_data[0].domain);
-#ifdef CONFIG_CPU_MMP2
-	icu_irq_chip.irq_set_wake = mmp2_set_wake;
-#endif
+	set_handle_irq(mmp2_handle_irq);
 }
 
 #ifdef CONFIG_OF
-static const struct of_device_id intc_ids[] __initconst = {
-	{ .compatible = "mrvl,mmp-intc", .data = &mmp_conf },
-	{ .compatible = "mrvl,mmp2-intc", .data = &mmp2_conf },
-	{}
-};
-
-static const struct of_device_id mmp_mux_irq_match[] __initconst = {
-	{ .compatible = "mrvl,mmp2-mux-intc" },
-	{}
-};
-
-int __init mmp2_mux_init(struct device_node *parent)
+static int __init mmp_init_bases(struct device_node *node)
 {
-	struct device_node *node;
-	const struct of_device_id *of_id;
-	struct resource res;
-	int i, irq_base, ret, irq;
-	u32 nr_irqs, mfp_irq;
-
-	node = parent;
-	max_icu_nr = 1;
-	for (i = 1; i < MAX_ICU_NR; i++) {
-		node = of_find_matching_node(node, mmp_mux_irq_match);
-		if (!node)
-			break;
-		of_id = of_match_node(&mmp_mux_irq_match[0], node);
-		ret = of_property_read_u32(node, "mrvl,intc-nr-irqs",
-					   &nr_irqs);
-		if (ret) {
-			pr_err("Not found mrvl,intc-nr-irqs property\n");
-			ret = -EINVAL;
-			goto err;
-		}
-		ret = of_address_to_resource(node, 0, &res);
-		if (ret < 0) {
-			pr_err("Not found reg property\n");
-			ret = -EINVAL;
-			goto err;
-		}
-		icu_data[i].reg_status = mmp_icu_base + res.start;
-		ret = of_address_to_resource(node, 1, &res);
-		if (ret < 0) {
-			pr_err("Not found reg property\n");
-			ret = -EINVAL;
-			goto err;
-		}
-		icu_data[i].reg_mask = mmp_icu_base + res.start;
-		icu_data[i].cascade_irq = irq_of_parse_and_map(node, 0);
-		if (!icu_data[i].cascade_irq) {
-			ret = -EINVAL;
-			goto err;
-		}
-
-		irq_base = irq_alloc_descs(-1, 0, nr_irqs, 0);
-		if (irq_base < 0) {
-			pr_err("Failed to allocate IRQ numbers for mux intc\n");
-			ret = irq_base;
-			goto err;
-		}
-		if (!of_property_read_u32(node, "mrvl,clr-mfp-irq",
-					  &mfp_irq)) {
-			icu_data[i].clr_mfp_irq_base = irq_base;
-			icu_data[i].clr_mfp_hwirq = mfp_irq;
-		}
-		irq_set_chained_handler(icu_data[i].cascade_irq,
-					icu_mux_irq_demux);
-		icu_data[i].nr_irqs = nr_irqs;
-		icu_data[i].virq_base = irq_base;
-		icu_data[i].domain = irq_domain_add_legacy(node, nr_irqs,
-							   irq_base, 0,
-							   &mmp_irq_domain_ops,
-							   &icu_data[i]);
-		for (irq = irq_base; irq < irq_base + nr_irqs; irq++)
-			icu_mask_irq(irq_get_irq_data(irq));
-	}
-	max_icu_nr = i;
-	return 0;
-err:
-	of_node_put(node);
-	max_icu_nr = i;
-	return ret;
-}
-
-void __init mmp_dt_irq_init(void)
-{
-	struct device_node *node;
-	const struct of_device_id *of_id;
-	struct mmp_intc_conf *conf;
-	int nr_irqs, irq_base, ret, irq;
-
-	node = of_find_matching_node(NULL, intc_ids);
-	if (!node) {
-		pr_err("Failed to find interrupt controller in arch-mmp\n");
-		return;
-	}
-	of_id = of_match_node(intc_ids, node);
-	conf = of_id->data;
+	int ret, nr_irqs, irq, i = 0;
 
 	ret = of_property_read_u32(node, "mrvl,intc-nr-irqs", &nr_irqs);
 	if (ret) {
 		pr_err("Not found mrvl,intc-nr-irqs property\n");
-		return;
+		return ret;
 	}
 
 	mmp_icu_base = of_iomap(node, 0);
 	if (!mmp_icu_base) {
 		pr_err("Failed to get interrupt controller register\n");
-		return;
+		return -ENOMEM;
 	}
 
-	irq_base = irq_alloc_descs(-1, 0, nr_irqs - NR_IRQS_LEGACY, 0);
-	if (irq_base < 0) {
-		pr_err("Failed to allocate IRQ numbers\n");
-		goto err;
-	} else if (irq_base != NR_IRQS_LEGACY) {
-		pr_err("ICU's irqbase should be started from 0\n");
-		goto err;
-	}
-	icu_data[0].conf_enable = conf->conf_enable;
-	icu_data[0].conf_disable = conf->conf_disable;
-	icu_data[0].conf_mask = conf->conf_mask;
-	icu_data[0].nr_irqs = nr_irqs;
 	icu_data[0].virq_base = 0;
-	icu_data[0].domain = irq_domain_add_legacy(node, nr_irqs, 0, 0,
+	icu_data[0].domain = irq_domain_add_linear(node, nr_irqs,
 						   &mmp_irq_domain_ops,
 						   &icu_data[0]);
-	irq_set_default_host(icu_data[0].domain);
-	for (irq = 0; irq < nr_irqs; irq++)
-		icu_mask_irq(irq_get_irq_data(irq));
-	mmp2_mux_init(node);
-	return;
+	for (irq = 0; irq < nr_irqs; irq++) {
+		ret = irq_create_mapping(icu_data[0].domain, irq);
+		if (!ret) {
+			pr_err("Failed to mapping hwirq\n");
+			goto err;
+		}
+		if (!irq)
+			icu_data[0].virq_base = ret;
+	}
+	icu_data[0].nr_irqs = nr_irqs;
+	return 0;
 err:
+	if (icu_data[0].virq_base) {
+		for (i = 0; i < irq; i++)
+			irq_dispose_mapping(icu_data[0].virq_base + i);
+	}
+	irq_domain_remove(icu_data[0].domain);
 	iounmap(mmp_icu_base);
+	return -EINVAL;
 }
+
+static int __init mmp_of_init(struct device_node *node,
+			      struct device_node *parent)
+{
+	int ret;
+
+	ret = mmp_init_bases(node);
+	if (ret < 0)
+		return ret;
+
+	icu_data[0].conf_enable = mmp_conf.conf_enable;
+	icu_data[0].conf_disable = mmp_conf.conf_disable;
+	icu_data[0].conf_mask = mmp_conf.conf_mask;
+	irq_set_default_host(icu_data[0].domain);
+	set_handle_irq(mmp_handle_irq);
+	max_icu_nr = 1;
+	return 0;
+}
+IRQCHIP_DECLARE(mmp_intc, "mrvl,mmp-intc", mmp_of_init);
+
+static int __init mmp2_of_init(struct device_node *node,
+			       struct device_node *parent)
+{
+	int ret;
+
+	ret = mmp_init_bases(node);
+	if (ret < 0)
+		return ret;
+
+	icu_data[0].conf_enable = mmp2_conf.conf_enable;
+	icu_data[0].conf_disable = mmp2_conf.conf_disable;
+	icu_data[0].conf_mask = mmp2_conf.conf_mask;
+	irq_set_default_host(icu_data[0].domain);
+	set_handle_irq(mmp2_handle_irq);
+	max_icu_nr = 1;
+	return 0;
+}
+IRQCHIP_DECLARE(mmp2_intc, "mrvl,mmp2-intc", mmp2_of_init);
+
+static int __init mmp2_mux_of_init(struct device_node *node,
+				   struct device_node *parent)
+{
+	struct resource res;
+	int i, ret, irq, j = 0;
+	u32 nr_irqs, mfp_irq;
+
+	if (!parent)
+		return -ENODEV;
+
+	i = max_icu_nr;
+	ret = of_property_read_u32(node, "mrvl,intc-nr-irqs",
+				   &nr_irqs);
+	if (ret) {
+		pr_err("Not found mrvl,intc-nr-irqs property\n");
+		return -EINVAL;
+	}
+	ret = of_address_to_resource(node, 0, &res);
+	if (ret < 0) {
+		pr_err("Not found reg property\n");
+		return -EINVAL;
+	}
+	icu_data[i].reg_status = mmp_icu_base + res.start;
+	ret = of_address_to_resource(node, 1, &res);
+	if (ret < 0) {
+		pr_err("Not found reg property\n");
+		return -EINVAL;
+	}
+	icu_data[i].reg_mask = mmp_icu_base + res.start;
+	icu_data[i].cascade_irq = irq_of_parse_and_map(node, 0);
+	if (!icu_data[i].cascade_irq)
+		return -EINVAL;
+
+	icu_data[i].virq_base = 0;
+	icu_data[i].domain = irq_domain_add_linear(node, nr_irqs,
+						   &mmp_irq_domain_ops,
+						   &icu_data[i]);
+	for (irq = 0; irq < nr_irqs; irq++) {
+		ret = irq_create_mapping(icu_data[i].domain, irq);
+		if (!ret) {
+			pr_err("Failed to mapping hwirq\n");
+			goto err;
+		}
+		if (!irq)
+			icu_data[i].virq_base = ret;
+	}
+	icu_data[i].nr_irqs = nr_irqs;
+	if (!of_property_read_u32(node, "mrvl,clr-mfp-irq",
+				  &mfp_irq)) {
+		icu_data[i].clr_mfp_irq_base = icu_data[i].virq_base;
+		icu_data[i].clr_mfp_hwirq = mfp_irq;
+	}
+	irq_set_chained_handler(icu_data[i].cascade_irq,
+				icu_mux_irq_demux);
+	max_icu_nr++;
+	return 0;
+err:
+	if (icu_data[i].virq_base) {
+		for (j = 0; j < irq; j++)
+			irq_dispose_mapping(icu_data[i].virq_base + j);
+	}
+	irq_domain_remove(icu_data[i].domain);
+	return -EINVAL;
+}
+IRQCHIP_DECLARE(mmp2_mux_intc, "mrvl,mmp2-mux-intc", mmp2_mux_of_init);
 #endif
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index 28433a1..70dfcdc 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -140,6 +140,16 @@
 	cpu->regs->eip = idt_address(lo, hi);
 
 	/*
+	 * Trapping always clears these flags:
+	 * TF: Trap flag
+	 * VM: Virtual 8086 mode
+	 * RF: Resume
+	 * NT: Nested task.
+	 */
+	cpu->regs->eflags &=
+		~(X86_EFLAGS_TF|X86_EFLAGS_VM|X86_EFLAGS_RF|X86_EFLAGS_NT);
+
+	/*
 	 * There are two kinds of interrupt handlers: 0xE is an "interrupt
 	 * gate" which expects interrupts to be disabled on entry.
 	 */
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index a35d8d1..bfb39bb 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -669,8 +669,10 @@
 
 #ifdef CONFIG_X86_PAE
 	gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
-	if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
+	if (!(pmd_flags(gpmd) & _PAGE_PRESENT)) {
 		kill_guest(cpu, "Bad address %#lx", vaddr);
+		return -1UL;
+	}
 	gpte = lgread(cpu, gpte_addr(cpu, gpmd, vaddr), pte_t);
 #else
 	gpte = lgread(cpu, gpte_addr(cpu, gpgd, vaddr), pte_t);
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 5ef78ef..2acc43f 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -3,7 +3,7 @@
 #
 
 dm-mod-y	+= dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
-		   dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o
+		   dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o dm-stats.o
 dm-multipath-y	+= dm-path-selector.o dm-mpath.o
 dm-snapshot-y	+= dm-snap.o dm-exception-store.o dm-snap-transient.o \
 		    dm-snap-persistent.o
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 0df3ec0..2956976 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -67,9 +67,11 @@
 #define MIGRATION_COUNT_WINDOW 10
 
 /*
- * The block size of the device holding cache data must be >= 32KB
+ * The block size of the device holding cache data must be
+ * between 32KB and 1GB.
  */
 #define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT)
+#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
 
 /*
  * FIXME: the cache is read/write for the time being.
@@ -101,6 +103,8 @@
 	struct dm_target *ti;
 	struct dm_target_callbacks callbacks;
 
+	struct dm_cache_metadata *cmd;
+
 	/*
 	 * Metadata is written to this device.
 	 */
@@ -117,11 +121,6 @@
 	struct dm_dev *cache_dev;
 
 	/*
-	 * Cache features such as write-through.
-	 */
-	struct cache_features features;
-
-	/*
 	 * Size of the origin device in _complete_ blocks and native sectors.
 	 */
 	dm_oblock_t origin_blocks;
@@ -138,8 +137,6 @@
 	uint32_t sectors_per_block;
 	int sectors_per_block_shift;
 
-	struct dm_cache_metadata *cmd;
-
 	spinlock_t lock;
 	struct bio_list deferred_bios;
 	struct bio_list deferred_flush_bios;
@@ -148,8 +145,8 @@
 	struct list_head completed_migrations;
 	struct list_head need_commit_migrations;
 	sector_t migration_threshold;
-	atomic_t nr_migrations;
 	wait_queue_head_t migration_wait;
+	atomic_t nr_migrations;
 
 	/*
 	 * cache_size entries, dirty if set
@@ -160,9 +157,16 @@
 	/*
 	 * origin_blocks entries, discarded if set.
 	 */
-	uint32_t discard_block_size; /* a power of 2 times sectors per block */
 	dm_dblock_t discard_nr_blocks;
 	unsigned long *discard_bitset;
+	uint32_t discard_block_size; /* a power of 2 times sectors per block */
+
+	/*
+	 * Rather than reconstructing the table line for the status we just
+	 * save it and regurgitate.
+	 */
+	unsigned nr_ctr_args;
+	const char **ctr_args;
 
 	struct dm_kcopyd_client *copier;
 	struct workqueue_struct *wq;
@@ -187,14 +191,12 @@
 	bool loaded_mappings:1;
 	bool loaded_discards:1;
 
-	struct cache_stats stats;
-
 	/*
-	 * Rather than reconstructing the table line for the status we just
-	 * save it and regurgitate.
+	 * Cache features such as write-through.
 	 */
-	unsigned nr_ctr_args;
-	const char **ctr_args;
+	struct cache_features features;
+
+	struct cache_stats stats;
 };
 
 struct per_bio_data {
@@ -1687,24 +1689,25 @@
 static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as,
 			    char **error)
 {
-	unsigned long tmp;
+	unsigned long block_size;
 
 	if (!at_least_one_arg(as, error))
 		return -EINVAL;
 
-	if (kstrtoul(dm_shift_arg(as), 10, &tmp) || !tmp ||
-	    tmp < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
-	    tmp & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
+	if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size ||
+	    block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
+	    block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
+	    block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
 		*error = "Invalid data block size";
 		return -EINVAL;
 	}
 
-	if (tmp > ca->cache_sectors) {
+	if (block_size > ca->cache_sectors) {
 		*error = "Data block size is larger than the cache device";
 		return -EINVAL;
 	}
 
-	ca->block_size = tmp;
+	ca->block_size = block_size;
 
 	return 0;
 }
@@ -2609,9 +2612,17 @@
 static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
 {
 	struct cache *cache = ti->private;
+	uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
 
-	blk_limits_io_min(limits, 0);
-	blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
+	/*
+	 * If the system-determined stacked limits are compatible with the
+	 * cache's blocksize (io_opt is a factor) do not override them.
+	 */
+	if (io_opt_sectors < cache->sectors_per_block ||
+	    do_div(io_opt_sectors, cache->sectors_per_block)) {
+		blk_limits_io_min(limits, 0);
+		blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
+	}
 	set_discard_limits(cache, limits);
 }
 
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 6d2d41a..0fce0bc 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1645,20 +1645,14 @@
 	}
 
 	ret = -ENOMEM;
-	cc->io_queue = alloc_workqueue("kcryptd_io",
-				       WQ_NON_REENTRANT|
-				       WQ_MEM_RECLAIM,
-				       1);
+	cc->io_queue = alloc_workqueue("kcryptd_io", WQ_MEM_RECLAIM, 1);
 	if (!cc->io_queue) {
 		ti->error = "Couldn't create kcryptd io queue";
 		goto bad;
 	}
 
 	cc->crypt_queue = alloc_workqueue("kcryptd",
-					  WQ_NON_REENTRANT|
-					  WQ_CPU_INTENSIVE|
-					  WQ_MEM_RECLAIM,
-					  1);
+					  WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1);
 	if (!cc->crypt_queue) {
 		ti->error = "Couldn't create kcryptd queue";
 		goto bad;
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index f1b7586..afe0814 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -877,7 +877,7 @@
 	unsigned change_uuid = (param->flags & DM_UUID_FLAG) ? 1 : 0;
 
 	if (new_data < param->data ||
-	    invalid_str(new_data, (void *) param + param_size) ||
+	    invalid_str(new_data, (void *) param + param_size) || !*new_data ||
 	    strlen(new_data) > (change_uuid ? DM_UUID_LEN - 1 : DM_NAME_LEN - 1)) {
 		DMWARN("Invalid new mapped device name or uuid string supplied.");
 		return -EINVAL;
@@ -1262,44 +1262,37 @@
 
 	r = dm_table_create(&t, get_mode(param), param->target_count, md);
 	if (r)
-		goto out;
+		goto err;
 
+	/* Protect md->type and md->queue against concurrent table loads. */
+	dm_lock_md_type(md);
 	r = populate_table(t, param, param_size);
-	if (r) {
-		dm_table_destroy(t);
-		goto out;
-	}
+	if (r)
+		goto err_unlock_md_type;
 
 	immutable_target_type = dm_get_immutable_target_type(md);
 	if (immutable_target_type &&
 	    (immutable_target_type != dm_table_get_immutable_target_type(t))) {
 		DMWARN("can't replace immutable target type %s",
 		       immutable_target_type->name);
-		dm_table_destroy(t);
 		r = -EINVAL;
-		goto out;
+		goto err_unlock_md_type;
 	}
 
-	/* Protect md->type and md->queue against concurrent table loads. */
-	dm_lock_md_type(md);
 	if (dm_get_md_type(md) == DM_TYPE_NONE)
 		/* Initial table load: acquire type of table. */
 		dm_set_md_type(md, dm_table_get_type(t));
 	else if (dm_get_md_type(md) != dm_table_get_type(t)) {
 		DMWARN("can't change device type after initial table load.");
-		dm_table_destroy(t);
-		dm_unlock_md_type(md);
 		r = -EINVAL;
-		goto out;
+		goto err_unlock_md_type;
 	}
 
 	/* setup md->queue to reflect md's type (may block) */
 	r = dm_setup_md_queue(md);
 	if (r) {
 		DMWARN("unable to set up device queue for new table.");
-		dm_table_destroy(t);
-		dm_unlock_md_type(md);
-		goto out;
+		goto err_unlock_md_type;
 	}
 	dm_unlock_md_type(md);
 
@@ -1309,9 +1302,8 @@
 	if (!hc || hc->md != md) {
 		DMWARN("device has been removed from the dev hash table.");
 		up_write(&_hash_lock);
-		dm_table_destroy(t);
 		r = -ENXIO;
-		goto out;
+		goto err_destroy_table;
 	}
 
 	if (hc->new_map)
@@ -1322,7 +1314,6 @@
 	param->flags |= DM_INACTIVE_PRESENT_FLAG;
 	__dev_status(md, param);
 
-out:
 	if (old_map) {
 		dm_sync_table(md);
 		dm_table_destroy(old_map);
@@ -1330,6 +1321,15 @@
 
 	dm_put(md);
 
+	return 0;
+
+err_unlock_md_type:
+	dm_unlock_md_type(md);
+err_destroy_table:
+	dm_table_destroy(t);
+err:
+	dm_put(md);
+
 	return r;
 }
 
@@ -1455,20 +1455,26 @@
 	return 0;
 }
 
-static bool buffer_test_overflow(char *result, unsigned maxlen)
-{
-	return !maxlen || strlen(result) + 1 >= maxlen;
-}
-
 /*
- * Process device-mapper dependent messages.
+ * Process device-mapper dependent messages.  Messages prefixed with '@'
+ * are processed by the DM core.  All others are delivered to the target.
  * Returns a number <= 1 if message was processed by device mapper.
  * Returns 2 if message should be delivered to the target.
  */
 static int message_for_md(struct mapped_device *md, unsigned argc, char **argv,
 			  char *result, unsigned maxlen)
 {
-	return 2;
+	int r;
+
+	if (**argv != '@')
+		return 2; /* no '@' prefix, deliver to target */
+
+	r = dm_stats_message(md, argc, argv, result, maxlen);
+	if (r < 2)
+		return r;
+
+	DMERR("Unsupported message sent to DM core: %s", argv[0]);
+	return -EINVAL;
 }
 
 /*
@@ -1542,7 +1548,7 @@
 
 	if (r == 1) {
 		param->flags |= DM_DATA_OUT_FLAG;
-		if (buffer_test_overflow(result, maxlen))
+		if (dm_message_test_buffer_overflow(result, maxlen))
 			param->flags |= DM_BUFFER_FULL_FLAG;
 		else
 			param->data_size = param->data_start + strlen(result) + 1;
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index d581fe5..3a7cade 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -833,8 +833,7 @@
 		goto bad_slab;
 
 	INIT_WORK(&kc->kcopyd_work, do_work);
-	kc->kcopyd_wq = alloc_workqueue("kcopyd",
-					WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
+	kc->kcopyd_wq = alloc_workqueue("kcopyd", WQ_MEM_RECLAIM, 0);
 	if (!kc->kcopyd_wq)
 		goto bad_workqueue;
 
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 699b5be..9584443 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1080,8 +1080,7 @@
 	ti->per_bio_data_size = sizeof(struct dm_raid1_bio_record);
 	ti->discard_zeroes_data_unsupported = true;
 
-	ms->kmirrord_wq = alloc_workqueue("kmirrord",
-					  WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
+	ms->kmirrord_wq = alloc_workqueue("kmirrord", WQ_MEM_RECLAIM, 0);
 	if (!ms->kmirrord_wq) {
 		DMERR("couldn't start kmirrord");
 		r = -ENOMEM;
diff --git a/drivers/md/dm-stats.c b/drivers/md/dm-stats.c
new file mode 100644
index 0000000..8ae31e8
--- /dev/null
+++ b/drivers/md/dm-stats.c
@@ -0,0 +1,969 @@
+#include <linux/errno.h>
+#include <linux/numa.h>
+#include <linux/slab.h>
+#include <linux/rculist.h>
+#include <linux/threads.h>
+#include <linux/preempt.h>
+#include <linux/irqflags.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/device-mapper.h>
+
+#include "dm.h"
+#include "dm-stats.h"
+
+#define DM_MSG_PREFIX "stats"
+
+static int dm_stat_need_rcu_barrier;
+
+/*
+ * Using 64-bit values to avoid overflow (which is a
+ * problem that block/genhd.c's IO accounting has).
+ */
+struct dm_stat_percpu {
+	unsigned long long sectors[2];
+	unsigned long long ios[2];
+	unsigned long long merges[2];
+	unsigned long long ticks[2];
+	unsigned long long io_ticks[2];
+	unsigned long long io_ticks_total;
+	unsigned long long time_in_queue;
+};
+
+struct dm_stat_shared {
+	atomic_t in_flight[2];
+	unsigned long stamp;
+	struct dm_stat_percpu tmp;
+};
+
+struct dm_stat {
+	struct list_head list_entry;
+	int id;
+	size_t n_entries;
+	sector_t start;
+	sector_t end;
+	sector_t step;
+	const char *program_id;
+	const char *aux_data;
+	struct rcu_head rcu_head;
+	size_t shared_alloc_size;
+	size_t percpu_alloc_size;
+	struct dm_stat_percpu *stat_percpu[NR_CPUS];
+	struct dm_stat_shared stat_shared[0];
+};
+
+struct dm_stats_last_position {
+	sector_t last_sector;
+	unsigned last_rw;
+};
+
+/*
+ * A typo on the command line could possibly make the kernel run out of memory
+ * and crash. To prevent the crash we account all used memory. We fail if we
+ * exhaust 1/4 of all memory or 1/2 of vmalloc space.
+ */
+#define DM_STATS_MEMORY_FACTOR		4
+#define DM_STATS_VMALLOC_FACTOR		2
+
+static DEFINE_SPINLOCK(shared_memory_lock);
+
+static unsigned long shared_memory_amount;
+
+static bool __check_shared_memory(size_t alloc_size)
+{
+	size_t a;
+
+	a = shared_memory_amount + alloc_size;
+	if (a < shared_memory_amount)
+		return false;
+	if (a >> PAGE_SHIFT > totalram_pages / DM_STATS_MEMORY_FACTOR)
+		return false;
+#ifdef CONFIG_MMU
+	if (a > (VMALLOC_END - VMALLOC_START) / DM_STATS_VMALLOC_FACTOR)
+		return false;
+#endif
+	return true;
+}
+
+static bool check_shared_memory(size_t alloc_size)
+{
+	bool ret;
+
+	spin_lock_irq(&shared_memory_lock);
+
+	ret = __check_shared_memory(alloc_size);
+
+	spin_unlock_irq(&shared_memory_lock);
+
+	return ret;
+}
+
+static bool claim_shared_memory(size_t alloc_size)
+{
+	spin_lock_irq(&shared_memory_lock);
+
+	if (!__check_shared_memory(alloc_size)) {
+		spin_unlock_irq(&shared_memory_lock);
+		return false;
+	}
+
+	shared_memory_amount += alloc_size;
+
+	spin_unlock_irq(&shared_memory_lock);
+
+	return true;
+}
+
+static void free_shared_memory(size_t alloc_size)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&shared_memory_lock, flags);
+
+	if (WARN_ON_ONCE(shared_memory_amount < alloc_size)) {
+		spin_unlock_irqrestore(&shared_memory_lock, flags);
+		DMCRIT("Memory usage accounting bug.");
+		return;
+	}
+
+	shared_memory_amount -= alloc_size;
+
+	spin_unlock_irqrestore(&shared_memory_lock, flags);
+}
+
+static void *dm_kvzalloc(size_t alloc_size, int node)
+{
+	void *p;
+
+	if (!claim_shared_memory(alloc_size))
+		return NULL;
+
+	if (alloc_size <= KMALLOC_MAX_SIZE) {
+		p = kzalloc_node(alloc_size, GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN, node);
+		if (p)
+			return p;
+	}
+	p = vzalloc_node(alloc_size, node);
+	if (p)
+		return p;
+
+	free_shared_memory(alloc_size);
+
+	return NULL;
+}
+
+static void dm_kvfree(void *ptr, size_t alloc_size)
+{
+	if (!ptr)
+		return;
+
+	free_shared_memory(alloc_size);
+
+	if (is_vmalloc_addr(ptr))
+		vfree(ptr);
+	else
+		kfree(ptr);
+}
+
+static void dm_stat_free(struct rcu_head *head)
+{
+	int cpu;
+	struct dm_stat *s = container_of(head, struct dm_stat, rcu_head);
+
+	kfree(s->program_id);
+	kfree(s->aux_data);
+	for_each_possible_cpu(cpu)
+		dm_kvfree(s->stat_percpu[cpu], s->percpu_alloc_size);
+	dm_kvfree(s, s->shared_alloc_size);
+}
+
+static int dm_stat_in_flight(struct dm_stat_shared *shared)
+{
+	return atomic_read(&shared->in_flight[READ]) +
+	       atomic_read(&shared->in_flight[WRITE]);
+}
+
+void dm_stats_init(struct dm_stats *stats)
+{
+	int cpu;
+	struct dm_stats_last_position *last;
+
+	mutex_init(&stats->mutex);
+	INIT_LIST_HEAD(&stats->list);
+	stats->last = alloc_percpu(struct dm_stats_last_position);
+	for_each_possible_cpu(cpu) {
+		last = per_cpu_ptr(stats->last, cpu);
+		last->last_sector = (sector_t)ULLONG_MAX;
+		last->last_rw = UINT_MAX;
+	}
+}
+
+void dm_stats_cleanup(struct dm_stats *stats)
+{
+	size_t ni;
+	struct dm_stat *s;
+	struct dm_stat_shared *shared;
+
+	while (!list_empty(&stats->list)) {
+		s = container_of(stats->list.next, struct dm_stat, list_entry);
+		list_del(&s->list_entry);
+		for (ni = 0; ni < s->n_entries; ni++) {
+			shared = &s->stat_shared[ni];
+			if (WARN_ON(dm_stat_in_flight(shared))) {
+				DMCRIT("leaked in-flight counter at index %lu "
+				       "(start %llu, end %llu, step %llu): reads %d, writes %d",
+				       (unsigned long)ni,
+				       (unsigned long long)s->start,
+				       (unsigned long long)s->end,
+				       (unsigned long long)s->step,
+				       atomic_read(&shared->in_flight[READ]),
+				       atomic_read(&shared->in_flight[WRITE]));
+			}
+		}
+		dm_stat_free(&s->rcu_head);
+	}
+	free_percpu(stats->last);
+}
+
+static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
+			   sector_t step, const char *program_id, const char *aux_data,
+			   void (*suspend_callback)(struct mapped_device *),
+			   void (*resume_callback)(struct mapped_device *),
+			   struct mapped_device *md)
+{
+	struct list_head *l;
+	struct dm_stat *s, *tmp_s;
+	sector_t n_entries;
+	size_t ni;
+	size_t shared_alloc_size;
+	size_t percpu_alloc_size;
+	struct dm_stat_percpu *p;
+	int cpu;
+	int ret_id;
+	int r;
+
+	if (end < start || !step)
+		return -EINVAL;
+
+	n_entries = end - start;
+	if (dm_sector_div64(n_entries, step))
+		n_entries++;
+
+	if (n_entries != (size_t)n_entries || !(size_t)(n_entries + 1))
+		return -EOVERFLOW;
+
+	shared_alloc_size = sizeof(struct dm_stat) + (size_t)n_entries * sizeof(struct dm_stat_shared);
+	if ((shared_alloc_size - sizeof(struct dm_stat)) / sizeof(struct dm_stat_shared) != n_entries)
+		return -EOVERFLOW;
+
+	percpu_alloc_size = (size_t)n_entries * sizeof(struct dm_stat_percpu);
+	if (percpu_alloc_size / sizeof(struct dm_stat_percpu) != n_entries)
+		return -EOVERFLOW;
+
+	if (!check_shared_memory(shared_alloc_size + num_possible_cpus() * percpu_alloc_size))
+		return -ENOMEM;
+
+	s = dm_kvzalloc(shared_alloc_size, NUMA_NO_NODE);
+	if (!s)
+		return -ENOMEM;
+
+	s->n_entries = n_entries;
+	s->start = start;
+	s->end = end;
+	s->step = step;
+	s->shared_alloc_size = shared_alloc_size;
+	s->percpu_alloc_size = percpu_alloc_size;
+
+	s->program_id = kstrdup(program_id, GFP_KERNEL);
+	if (!s->program_id) {
+		r = -ENOMEM;
+		goto out;
+	}
+	s->aux_data = kstrdup(aux_data, GFP_KERNEL);
+	if (!s->aux_data) {
+		r = -ENOMEM;
+		goto out;
+	}
+
+	for (ni = 0; ni < n_entries; ni++) {
+		atomic_set(&s->stat_shared[ni].in_flight[READ], 0);
+		atomic_set(&s->stat_shared[ni].in_flight[WRITE], 0);
+	}
+
+	for_each_possible_cpu(cpu) {
+		p = dm_kvzalloc(percpu_alloc_size, cpu_to_node(cpu));
+		if (!p) {
+			r = -ENOMEM;
+			goto out;
+		}
+		s->stat_percpu[cpu] = p;
+	}
+
+	/*
+	 * Suspend/resume to make sure there is no i/o in flight,
+	 * so that newly created statistics will be exact.
+	 *
+	 * (note: we couldn't suspend earlier because we must not
+	 * allocate memory while suspended)
+	 */
+	suspend_callback(md);
+
+	mutex_lock(&stats->mutex);
+	s->id = 0;
+	list_for_each(l, &stats->list) {
+		tmp_s = container_of(l, struct dm_stat, list_entry);
+		if (WARN_ON(tmp_s->id < s->id)) {
+			r = -EINVAL;
+			goto out_unlock_resume;
+		}
+		if (tmp_s->id > s->id)
+			break;
+		if (unlikely(s->id == INT_MAX)) {
+			r = -ENFILE;
+			goto out_unlock_resume;
+		}
+		s->id++;
+	}
+	ret_id = s->id;
+	list_add_tail_rcu(&s->list_entry, l);
+	mutex_unlock(&stats->mutex);
+
+	resume_callback(md);
+
+	return ret_id;
+
+out_unlock_resume:
+	mutex_unlock(&stats->mutex);
+	resume_callback(md);
+out:
+	dm_stat_free(&s->rcu_head);
+	return r;
+}
+
+static struct dm_stat *__dm_stats_find(struct dm_stats *stats, int id)
+{
+	struct dm_stat *s;
+
+	list_for_each_entry(s, &stats->list, list_entry) {
+		if (s->id > id)
+			break;
+		if (s->id == id)
+			return s;
+	}
+
+	return NULL;
+}
+
+static int dm_stats_delete(struct dm_stats *stats, int id)
+{
+	struct dm_stat *s;
+	int cpu;
+
+	mutex_lock(&stats->mutex);
+
+	s = __dm_stats_find(stats, id);
+	if (!s) {
+		mutex_unlock(&stats->mutex);
+		return -ENOENT;
+	}
+
+	list_del_rcu(&s->list_entry);
+	mutex_unlock(&stats->mutex);
+
+	/*
+	 * vfree can't be called from RCU callback
+	 */
+	for_each_possible_cpu(cpu)
+		if (is_vmalloc_addr(s->stat_percpu))
+			goto do_sync_free;
+	if (is_vmalloc_addr(s)) {
+do_sync_free:
+		synchronize_rcu_expedited();
+		dm_stat_free(&s->rcu_head);
+	} else {
+		ACCESS_ONCE(dm_stat_need_rcu_barrier) = 1;
+		call_rcu(&s->rcu_head, dm_stat_free);
+	}
+	return 0;
+}
+
+static int dm_stats_list(struct dm_stats *stats, const char *program,
+			 char *result, unsigned maxlen)
+{
+	struct dm_stat *s;
+	sector_t len;
+	unsigned sz = 0;
+
+	/*
+	 * Output format:
+	 *   <region_id>: <start_sector>+<length> <step> <program_id> <aux_data>
+	 */
+
+	mutex_lock(&stats->mutex);
+	list_for_each_entry(s, &stats->list, list_entry) {
+		if (!program || !strcmp(program, s->program_id)) {
+			len = s->end - s->start;
+			DMEMIT("%d: %llu+%llu %llu %s %s\n", s->id,
+				(unsigned long long)s->start,
+				(unsigned long long)len,
+				(unsigned long long)s->step,
+				s->program_id,
+				s->aux_data);
+		}
+	}
+	mutex_unlock(&stats->mutex);
+
+	return 1;
+}
+
+static void dm_stat_round(struct dm_stat_shared *shared, struct dm_stat_percpu *p)
+{
+	/*
+	 * This is racy, but so is part_round_stats_single.
+	 */
+	unsigned long now = jiffies;
+	unsigned in_flight_read;
+	unsigned in_flight_write;
+	unsigned long difference = now - shared->stamp;
+
+	if (!difference)
+		return;
+	in_flight_read = (unsigned)atomic_read(&shared->in_flight[READ]);
+	in_flight_write = (unsigned)atomic_read(&shared->in_flight[WRITE]);
+	if (in_flight_read)
+		p->io_ticks[READ] += difference;
+	if (in_flight_write)
+		p->io_ticks[WRITE] += difference;
+	if (in_flight_read + in_flight_write) {
+		p->io_ticks_total += difference;
+		p->time_in_queue += (in_flight_read + in_flight_write) * difference;
+	}
+	shared->stamp = now;
+}
+
+static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
+			      unsigned long bi_rw, sector_t len, bool merged,
+			      bool end, unsigned long duration)
+{
+	unsigned long idx = bi_rw & REQ_WRITE;
+	struct dm_stat_shared *shared = &s->stat_shared[entry];
+	struct dm_stat_percpu *p;
+
+	/*
+	 * For strict correctness we should use local_irq_disable/enable
+	 * instead of preempt_disable/enable.
+	 *
+	 * This is racy if the driver finishes bios from non-interrupt
+	 * context as well as from interrupt context or from more different
+	 * interrupts.
+	 *
+	 * However, the race only results in not counting some events,
+	 * so it is acceptable.
+	 *
+	 * part_stat_lock()/part_stat_unlock() have this race too.
+	 */
+	preempt_disable();
+	p = &s->stat_percpu[smp_processor_id()][entry];
+
+	if (!end) {
+		dm_stat_round(shared, p);
+		atomic_inc(&shared->in_flight[idx]);
+	} else {
+		dm_stat_round(shared, p);
+		atomic_dec(&shared->in_flight[idx]);
+		p->sectors[idx] += len;
+		p->ios[idx] += 1;
+		p->merges[idx] += merged;
+		p->ticks[idx] += duration;
+	}
+
+	preempt_enable();
+}
+
+static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
+			  sector_t bi_sector, sector_t end_sector,
+			  bool end, unsigned long duration,
+			  struct dm_stats_aux *stats_aux)
+{
+	sector_t rel_sector, offset, todo, fragment_len;
+	size_t entry;
+
+	if (end_sector <= s->start || bi_sector >= s->end)
+		return;
+	if (unlikely(bi_sector < s->start)) {
+		rel_sector = 0;
+		todo = end_sector - s->start;
+	} else {
+		rel_sector = bi_sector - s->start;
+		todo = end_sector - bi_sector;
+	}
+	if (unlikely(end_sector > s->end))
+		todo -= (end_sector - s->end);
+
+	offset = dm_sector_div64(rel_sector, s->step);
+	entry = rel_sector;
+	do {
+		if (WARN_ON_ONCE(entry >= s->n_entries)) {
+			DMCRIT("Invalid area access in region id %d", s->id);
+			return;
+		}
+		fragment_len = todo;
+		if (fragment_len > s->step - offset)
+			fragment_len = s->step - offset;
+		dm_stat_for_entry(s, entry, bi_rw, fragment_len,
+				  stats_aux->merged, end, duration);
+		todo -= fragment_len;
+		entry++;
+		offset = 0;
+	} while (unlikely(todo != 0));
+}
+
+void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw,
+			 sector_t bi_sector, unsigned bi_sectors, bool end,
+			 unsigned long duration, struct dm_stats_aux *stats_aux)
+{
+	struct dm_stat *s;
+	sector_t end_sector;
+	struct dm_stats_last_position *last;
+
+	if (unlikely(!bi_sectors))
+		return;
+
+	end_sector = bi_sector + bi_sectors;
+
+	if (!end) {
+		/*
+		 * A race condition can at worst result in the merged flag being
+		 * misrepresented, so we don't have to disable preemption here.
+		 */
+		last = __this_cpu_ptr(stats->last);
+		stats_aux->merged =
+			(bi_sector == (ACCESS_ONCE(last->last_sector) &&
+				       ((bi_rw & (REQ_WRITE | REQ_DISCARD)) ==
+					(ACCESS_ONCE(last->last_rw) & (REQ_WRITE | REQ_DISCARD)))
+				       ));
+		ACCESS_ONCE(last->last_sector) = end_sector;
+		ACCESS_ONCE(last->last_rw) = bi_rw;
+	}
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(s, &stats->list, list_entry)
+		__dm_stat_bio(s, bi_rw, bi_sector, end_sector, end, duration, stats_aux);
+
+	rcu_read_unlock();
+}
+
+static void __dm_stat_init_temporary_percpu_totals(struct dm_stat_shared *shared,
+						   struct dm_stat *s, size_t x)
+{
+	int cpu;
+	struct dm_stat_percpu *p;
+
+	local_irq_disable();
+	p = &s->stat_percpu[smp_processor_id()][x];
+	dm_stat_round(shared, p);
+	local_irq_enable();
+
+	memset(&shared->tmp, 0, sizeof(shared->tmp));
+	for_each_possible_cpu(cpu) {
+		p = &s->stat_percpu[cpu][x];
+		shared->tmp.sectors[READ] += ACCESS_ONCE(p->sectors[READ]);
+		shared->tmp.sectors[WRITE] += ACCESS_ONCE(p->sectors[WRITE]);
+		shared->tmp.ios[READ] += ACCESS_ONCE(p->ios[READ]);
+		shared->tmp.ios[WRITE] += ACCESS_ONCE(p->ios[WRITE]);
+		shared->tmp.merges[READ] += ACCESS_ONCE(p->merges[READ]);
+		shared->tmp.merges[WRITE] += ACCESS_ONCE(p->merges[WRITE]);
+		shared->tmp.ticks[READ] += ACCESS_ONCE(p->ticks[READ]);
+		shared->tmp.ticks[WRITE] += ACCESS_ONCE(p->ticks[WRITE]);
+		shared->tmp.io_ticks[READ] += ACCESS_ONCE(p->io_ticks[READ]);
+		shared->tmp.io_ticks[WRITE] += ACCESS_ONCE(p->io_ticks[WRITE]);
+		shared->tmp.io_ticks_total += ACCESS_ONCE(p->io_ticks_total);
+		shared->tmp.time_in_queue += ACCESS_ONCE(p->time_in_queue);
+	}
+}
+
+static void __dm_stat_clear(struct dm_stat *s, size_t idx_start, size_t idx_end,
+			    bool init_tmp_percpu_totals)
+{
+	size_t x;
+	struct dm_stat_shared *shared;
+	struct dm_stat_percpu *p;
+
+	for (x = idx_start; x < idx_end; x++) {
+		shared = &s->stat_shared[x];
+		if (init_tmp_percpu_totals)
+			__dm_stat_init_temporary_percpu_totals(shared, s, x);
+		local_irq_disable();
+		p = &s->stat_percpu[smp_processor_id()][x];
+		p->sectors[READ] -= shared->tmp.sectors[READ];
+		p->sectors[WRITE] -= shared->tmp.sectors[WRITE];
+		p->ios[READ] -= shared->tmp.ios[READ];
+		p->ios[WRITE] -= shared->tmp.ios[WRITE];
+		p->merges[READ] -= shared->tmp.merges[READ];
+		p->merges[WRITE] -= shared->tmp.merges[WRITE];
+		p->ticks[READ] -= shared->tmp.ticks[READ];
+		p->ticks[WRITE] -= shared->tmp.ticks[WRITE];
+		p->io_ticks[READ] -= shared->tmp.io_ticks[READ];
+		p->io_ticks[WRITE] -= shared->tmp.io_ticks[WRITE];
+		p->io_ticks_total -= shared->tmp.io_ticks_total;
+		p->time_in_queue -= shared->tmp.time_in_queue;
+		local_irq_enable();
+	}
+}
+
+static int dm_stats_clear(struct dm_stats *stats, int id)
+{
+	struct dm_stat *s;
+
+	mutex_lock(&stats->mutex);
+
+	s = __dm_stats_find(stats, id);
+	if (!s) {
+		mutex_unlock(&stats->mutex);
+		return -ENOENT;
+	}
+
+	__dm_stat_clear(s, 0, s->n_entries, true);
+
+	mutex_unlock(&stats->mutex);
+
+	return 1;
+}
+
+/*
+ * This is like jiffies_to_msec, but works for 64-bit values.
+ */
+static unsigned long long dm_jiffies_to_msec64(unsigned long long j)
+{
+	unsigned long long result = 0;
+	unsigned mult;
+
+	if (j)
+		result = jiffies_to_msecs(j & 0x3fffff);
+	if (j >= 1 << 22) {
+		mult = jiffies_to_msecs(1 << 22);
+		result += (unsigned long long)mult * (unsigned long long)jiffies_to_msecs((j >> 22) & 0x3fffff);
+	}
+	if (j >= 1ULL << 44)
+		result += (unsigned long long)mult * (unsigned long long)mult * (unsigned long long)jiffies_to_msecs(j >> 44);
+
+	return result;
+}
+
+static int dm_stats_print(struct dm_stats *stats, int id,
+			  size_t idx_start, size_t idx_len,
+			  bool clear, char *result, unsigned maxlen)
+{
+	unsigned sz = 0;
+	struct dm_stat *s;
+	size_t x;
+	sector_t start, end, step;
+	size_t idx_end;
+	struct dm_stat_shared *shared;
+
+	/*
+	 * Output format:
+	 *   <start_sector>+<length> counters
+	 */
+
+	mutex_lock(&stats->mutex);
+
+	s = __dm_stats_find(stats, id);
+	if (!s) {
+		mutex_unlock(&stats->mutex);
+		return -ENOENT;
+	}
+
+	idx_end = idx_start + idx_len;
+	if (idx_end < idx_start ||
+	    idx_end > s->n_entries)
+		idx_end = s->n_entries;
+
+	if (idx_start > idx_end)
+		idx_start = idx_end;
+
+	step = s->step;
+	start = s->start + (step * idx_start);
+
+	for (x = idx_start; x < idx_end; x++, start = end) {
+		shared = &s->stat_shared[x];
+		end = start + step;
+		if (unlikely(end > s->end))
+			end = s->end;
+
+		__dm_stat_init_temporary_percpu_totals(shared, s, x);
+
+		DMEMIT("%llu+%llu %llu %llu %llu %llu %llu %llu %llu %llu %d %llu %llu %llu %llu\n",
+		       (unsigned long long)start,
+		       (unsigned long long)step,
+		       shared->tmp.ios[READ],
+		       shared->tmp.merges[READ],
+		       shared->tmp.sectors[READ],
+		       dm_jiffies_to_msec64(shared->tmp.ticks[READ]),
+		       shared->tmp.ios[WRITE],
+		       shared->tmp.merges[WRITE],
+		       shared->tmp.sectors[WRITE],
+		       dm_jiffies_to_msec64(shared->tmp.ticks[WRITE]),
+		       dm_stat_in_flight(shared),
+		       dm_jiffies_to_msec64(shared->tmp.io_ticks_total),
+		       dm_jiffies_to_msec64(shared->tmp.time_in_queue),
+		       dm_jiffies_to_msec64(shared->tmp.io_ticks[READ]),
+		       dm_jiffies_to_msec64(shared->tmp.io_ticks[WRITE]));
+
+		if (unlikely(sz + 1 >= maxlen))
+			goto buffer_overflow;
+	}
+
+	if (clear)
+		__dm_stat_clear(s, idx_start, idx_end, false);
+
+buffer_overflow:
+	mutex_unlock(&stats->mutex);
+
+	return 1;
+}
+
+static int dm_stats_set_aux(struct dm_stats *stats, int id, const char *aux_data)
+{
+	struct dm_stat *s;
+	const char *new_aux_data;
+
+	mutex_lock(&stats->mutex);
+
+	s = __dm_stats_find(stats, id);
+	if (!s) {
+		mutex_unlock(&stats->mutex);
+		return -ENOENT;
+	}
+
+	new_aux_data = kstrdup(aux_data, GFP_KERNEL);
+	if (!new_aux_data) {
+		mutex_unlock(&stats->mutex);
+		return -ENOMEM;
+	}
+
+	kfree(s->aux_data);
+	s->aux_data = new_aux_data;
+
+	mutex_unlock(&stats->mutex);
+
+	return 0;
+}
+
+static int message_stats_create(struct mapped_device *md,
+				unsigned argc, char **argv,
+				char *result, unsigned maxlen)
+{
+	int id;
+	char dummy;
+	unsigned long long start, end, len, step;
+	unsigned divisor;
+	const char *program_id, *aux_data;
+
+	/*
+	 * Input format:
+	 *   <range> <step> [<program_id> [<aux_data>]]
+	 */
+
+	if (argc < 3 || argc > 5)
+		return -EINVAL;
+
+	if (!strcmp(argv[1], "-")) {
+		start = 0;
+		len = dm_get_size(md);
+		if (!len)
+			len = 1;
+	} else if (sscanf(argv[1], "%llu+%llu%c", &start, &len, &dummy) != 2 ||
+		   start != (sector_t)start || len != (sector_t)len)
+		return -EINVAL;
+
+	end = start + len;
+	if (start >= end)
+		return -EINVAL;
+
+	if (sscanf(argv[2], "/%u%c", &divisor, &dummy) == 1) {
+		step = end - start;
+		if (do_div(step, divisor))
+			step++;
+		if (!step)
+			step = 1;
+	} else if (sscanf(argv[2], "%llu%c", &step, &dummy) != 1 ||
+		   step != (sector_t)step || !step)
+		return -EINVAL;
+
+	program_id = "-";
+	aux_data = "-";
+
+	if (argc > 3)
+		program_id = argv[3];
+
+	if (argc > 4)
+		aux_data = argv[4];
+
+	/*
+	 * If a buffer overflow happens after we created the region,
+	 * it's too late (the userspace would retry with a larger
+	 * buffer, but the region id that caused the overflow is already
+	 * leaked).  So we must detect buffer overflow in advance.
+	 */
+	snprintf(result, maxlen, "%d", INT_MAX);
+	if (dm_message_test_buffer_overflow(result, maxlen))
+		return 1;
+
+	id = dm_stats_create(dm_get_stats(md), start, end, step, program_id, aux_data,
+			     dm_internal_suspend, dm_internal_resume, md);
+	if (id < 0)
+		return id;
+
+	snprintf(result, maxlen, "%d", id);
+
+	return 1;
+}
+
+static int message_stats_delete(struct mapped_device *md,
+				unsigned argc, char **argv)
+{
+	int id;
+	char dummy;
+
+	if (argc != 2)
+		return -EINVAL;
+
+	if (sscanf(argv[1], "%d%c", &id, &dummy) != 1 || id < 0)
+		return -EINVAL;
+
+	return dm_stats_delete(dm_get_stats(md), id);
+}
+
+static int message_stats_clear(struct mapped_device *md,
+			       unsigned argc, char **argv)
+{
+	int id;
+	char dummy;
+
+	if (argc != 2)
+		return -EINVAL;
+
+	if (sscanf(argv[1], "%d%c", &id, &dummy) != 1 || id < 0)
+		return -EINVAL;
+
+	return dm_stats_clear(dm_get_stats(md), id);
+}
+
+static int message_stats_list(struct mapped_device *md,
+			      unsigned argc, char **argv,
+			      char *result, unsigned maxlen)
+{
+	int r;
+	const char *program = NULL;
+
+	if (argc < 1 || argc > 2)
+		return -EINVAL;
+
+	if (argc > 1) {
+		program = kstrdup(argv[1], GFP_KERNEL);
+		if (!program)
+			return -ENOMEM;
+	}
+
+	r = dm_stats_list(dm_get_stats(md), program, result, maxlen);
+
+	kfree(program);
+
+	return r;
+}
+
+static int message_stats_print(struct mapped_device *md,
+			       unsigned argc, char **argv, bool clear,
+			       char *result, unsigned maxlen)
+{
+	int id;
+	char dummy;
+	unsigned long idx_start = 0, idx_len = ULONG_MAX;
+
+	if (argc != 2 && argc != 4)
+		return -EINVAL;
+
+	if (sscanf(argv[1], "%d%c", &id, &dummy) != 1 || id < 0)
+		return -EINVAL;
+
+	if (argc > 3) {
+		if (strcmp(argv[2], "-") &&
+		    sscanf(argv[2], "%lu%c", &idx_start, &dummy) != 1)
+			return -EINVAL;
+		if (strcmp(argv[3], "-") &&
+		    sscanf(argv[3], "%lu%c", &idx_len, &dummy) != 1)
+			return -EINVAL;
+	}
+
+	return dm_stats_print(dm_get_stats(md), id, idx_start, idx_len, clear,
+			      result, maxlen);
+}
+
+static int message_stats_set_aux(struct mapped_device *md,
+				 unsigned argc, char **argv)
+{
+	int id;
+	char dummy;
+
+	if (argc != 3)
+		return -EINVAL;
+
+	if (sscanf(argv[1], "%d%c", &id, &dummy) != 1 || id < 0)
+		return -EINVAL;
+
+	return dm_stats_set_aux(dm_get_stats(md), id, argv[2]);
+}
+
+int dm_stats_message(struct mapped_device *md, unsigned argc, char **argv,
+		     char *result, unsigned maxlen)
+{
+	int r;
+
+	if (dm_request_based(md)) {
+		DMWARN("Statistics are only supported for bio-based devices");
+		return -EOPNOTSUPP;
+	}
+
+	/* All messages here must start with '@' */
+	if (!strcasecmp(argv[0], "@stats_create"))
+		r = message_stats_create(md, argc, argv, result, maxlen);
+	else if (!strcasecmp(argv[0], "@stats_delete"))
+		r = message_stats_delete(md, argc, argv);
+	else if (!strcasecmp(argv[0], "@stats_clear"))
+		r = message_stats_clear(md, argc, argv);
+	else if (!strcasecmp(argv[0], "@stats_list"))
+		r = message_stats_list(md, argc, argv, result, maxlen);
+	else if (!strcasecmp(argv[0], "@stats_print"))
+		r = message_stats_print(md, argc, argv, false, result, maxlen);
+	else if (!strcasecmp(argv[0], "@stats_print_clear"))
+		r = message_stats_print(md, argc, argv, true, result, maxlen);
+	else if (!strcasecmp(argv[0], "@stats_set_aux"))
+		r = message_stats_set_aux(md, argc, argv);
+	else
+		return 2; /* this wasn't a stats message */
+
+	if (r == -EINVAL)
+		DMWARN("Invalid parameters for message %s", argv[0]);
+
+	return r;
+}
+
+int __init dm_statistics_init(void)
+{
+	dm_stat_need_rcu_barrier = 0;
+	return 0;
+}
+
+void dm_statistics_exit(void)
+{
+	if (dm_stat_need_rcu_barrier)
+		rcu_barrier();
+	if (WARN_ON(shared_memory_amount))
+		DMCRIT("shared_memory_amount leaked: %lu", shared_memory_amount);
+}
+
+module_param_named(stats_current_allocated_bytes, shared_memory_amount, ulong, S_IRUGO);
+MODULE_PARM_DESC(stats_current_allocated_bytes, "Memory currently used by statistics");
diff --git a/drivers/md/dm-stats.h b/drivers/md/dm-stats.h
new file mode 100644
index 0000000..e7c4984
--- /dev/null
+++ b/drivers/md/dm-stats.h
@@ -0,0 +1,40 @@
+#ifndef DM_STATS_H
+#define DM_STATS_H
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+
+int dm_statistics_init(void);
+void dm_statistics_exit(void);
+
+struct dm_stats {
+	struct mutex mutex;
+	struct list_head list;	/* list of struct dm_stat */
+	struct dm_stats_last_position __percpu *last;
+	sector_t last_sector;
+	unsigned last_rw;
+};
+
+struct dm_stats_aux {
+	bool merged;
+};
+
+void dm_stats_init(struct dm_stats *st);
+void dm_stats_cleanup(struct dm_stats *st);
+
+struct mapped_device;
+
+int dm_stats_message(struct mapped_device *md, unsigned argc, char **argv,
+		     char *result, unsigned maxlen);
+
+void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw,
+			 sector_t bi_sector, unsigned bi_sectors, bool end,
+			 unsigned long duration, struct dm_stats_aux *aux);
+
+static inline bool dm_stats_used(struct dm_stats *st)
+{
+	return !list_empty(&st->list);
+}
+
+#endif
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index d907ca6..73c1712 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -4,6 +4,7 @@
  * This file is released under the GPL.
  */
 
+#include "dm.h"
 #include <linux/device-mapper.h>
 
 #include <linux/module.h>
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index f221812..8f87835 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -860,14 +860,17 @@
 static int dm_table_set_type(struct dm_table *t)
 {
 	unsigned i;
-	unsigned bio_based = 0, request_based = 0;
+	unsigned bio_based = 0, request_based = 0, hybrid = 0;
 	struct dm_target *tgt;
 	struct dm_dev_internal *dd;
 	struct list_head *devices;
+	unsigned live_md_type;
 
 	for (i = 0; i < t->num_targets; i++) {
 		tgt = t->targets + i;
-		if (dm_target_request_based(tgt))
+		if (dm_target_hybrid(tgt))
+			hybrid = 1;
+		else if (dm_target_request_based(tgt))
 			request_based = 1;
 		else
 			bio_based = 1;
@@ -879,6 +882,19 @@
 		}
 	}
 
+	if (hybrid && !bio_based && !request_based) {
+		/*
+		 * The targets can work either way.
+		 * Determine the type from the live device.
+		 * Default to bio-based if device is new.
+		 */
+		live_md_type = dm_get_md_type(t->md);
+		if (live_md_type == DM_TYPE_REQUEST_BASED)
+			request_based = 1;
+		else
+			bio_based = 1;
+	}
+
 	if (bio_based) {
 		/* We must use this table as bio-based */
 		t->type = DM_TYPE_BIO_BASED;
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 37ba5db..242e3ce 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -131,12 +131,19 @@
 	return -EIO;
 }
 
+static int io_err_map_rq(struct dm_target *ti, struct request *clone,
+			 union map_info *map_context)
+{
+	return -EIO;
+}
+
 static struct target_type error_target = {
 	.name = "error",
-	.version = {1, 1, 0},
+	.version = {1, 2, 0},
 	.ctr  = io_err_ctr,
 	.dtr  = io_err_dtr,
 	.map  = io_err_map,
+	.map_rq = io_err_map_rq,
 };
 
 int __init dm_target_init(void)
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 88f2f80..ed06342 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -887,7 +887,8 @@
 
 	r = dm_pool_commit_metadata(pool->pmd);
 	if (r)
-		DMERR_LIMIT("commit failed: error = %d", r);
+		DMERR_LIMIT("%s: commit failed: error = %d",
+			    dm_device_name(pool->pool_md), r);
 
 	return r;
 }
@@ -917,6 +918,13 @@
 	unsigned long flags;
 	struct pool *pool = tc->pool;
 
+	/*
+	 * Once no_free_space is set we must not allow allocation to succeed.
+	 * Otherwise it is difficult to explain, debug, test and support.
+	 */
+	if (pool->no_free_space)
+		return -ENOSPC;
+
 	r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
 	if (r)
 		return r;
@@ -931,31 +939,30 @@
 	}
 
 	if (!free_blocks) {
-		if (pool->no_free_space)
+		/*
+		 * Try to commit to see if that will free up some
+		 * more space.
+		 */
+		(void) commit_or_fallback(pool);
+
+		r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
+		if (r)
+			return r;
+
+		/*
+		 * If we still have no space we set a flag to avoid
+		 * doing all this checking and return -ENOSPC.  This
+		 * flag serves as a latch that disallows allocations from
+		 * this pool until the admin takes action (e.g. resize or
+		 * table reload).
+		 */
+		if (!free_blocks) {
+			DMWARN("%s: no free space available.",
+			       dm_device_name(pool->pool_md));
+			spin_lock_irqsave(&pool->lock, flags);
+			pool->no_free_space = 1;
+			spin_unlock_irqrestore(&pool->lock, flags);
 			return -ENOSPC;
-		else {
-			/*
-			 * Try to commit to see if that will free up some
-			 * more space.
-			 */
-			(void) commit_or_fallback(pool);
-
-			r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
-			if (r)
-				return r;
-
-			/*
-			 * If we still have no space we set a flag to avoid
-			 * doing all this checking and return -ENOSPC.
-			 */
-			if (!free_blocks) {
-				DMWARN("%s: no free space available.",
-				       dm_device_name(pool->pool_md));
-				spin_lock_irqsave(&pool->lock, flags);
-				pool->no_free_space = 1;
-				spin_unlock_irqrestore(&pool->lock, flags);
-				return -ENOSPC;
-			}
 		}
 	}
 
@@ -1085,6 +1092,7 @@
 {
 	int r;
 	dm_block_t data_block;
+	struct pool *pool = tc->pool;
 
 	r = alloc_data_block(tc, &data_block);
 	switch (r) {
@@ -1094,13 +1102,14 @@
 		break;
 
 	case -ENOSPC:
-		no_space(tc->pool, cell);
+		no_space(pool, cell);
 		break;
 
 	default:
 		DMERR_LIMIT("%s: alloc_data_block() failed: error = %d",
 			    __func__, r);
-		cell_error(tc->pool, cell);
+		set_pool_mode(pool, PM_READ_ONLY);
+		cell_error(pool, cell);
 		break;
 	}
 }
@@ -1386,7 +1395,8 @@
 
 	switch (mode) {
 	case PM_FAIL:
-		DMERR("switching pool to failure mode");
+		DMERR("%s: switching pool to failure mode",
+		      dm_device_name(pool->pool_md));
 		pool->process_bio = process_bio_fail;
 		pool->process_discard = process_bio_fail;
 		pool->process_prepared_mapping = process_prepared_mapping_fail;
@@ -1394,10 +1404,12 @@
 		break;
 
 	case PM_READ_ONLY:
-		DMERR("switching pool to read-only mode");
+		DMERR("%s: switching pool to read-only mode",
+		      dm_device_name(pool->pool_md));
 		r = dm_pool_abort_metadata(pool->pmd);
 		if (r) {
-			DMERR("aborting transaction failed");
+			DMERR("%s: aborting transaction failed",
+			      dm_device_name(pool->pool_md));
 			set_pool_mode(pool, PM_FAIL);
 		} else {
 			dm_pool_metadata_read_only(pool->pmd);
@@ -2156,19 +2168,22 @@
 
 	r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size);
 	if (r) {
-		DMERR("failed to retrieve data device size");
+		DMERR("%s: failed to retrieve data device size",
+		      dm_device_name(pool->pool_md));
 		return r;
 	}
 
 	if (data_size < sb_data_size) {
-		DMERR("pool target (%llu blocks) too small: expected %llu",
+		DMERR("%s: pool target (%llu blocks) too small: expected %llu",
+		      dm_device_name(pool->pool_md),
 		      (unsigned long long)data_size, sb_data_size);
 		return -EINVAL;
 
 	} else if (data_size > sb_data_size) {
 		r = dm_pool_resize_data_dev(pool->pmd, data_size);
 		if (r) {
-			DMERR("failed to resize data device");
+			DMERR("%s: failed to resize data device",
+			      dm_device_name(pool->pool_md));
 			set_pool_mode(pool, PM_READ_ONLY);
 			return r;
 		}
@@ -2192,19 +2207,22 @@
 
 	r = dm_pool_get_metadata_dev_size(pool->pmd, &sb_metadata_dev_size);
 	if (r) {
-		DMERR("failed to retrieve data device size");
+		DMERR("%s: failed to retrieve metadata device size",
+		      dm_device_name(pool->pool_md));
 		return r;
 	}
 
 	if (metadata_dev_size < sb_metadata_dev_size) {
-		DMERR("metadata device (%llu blocks) too small: expected %llu",
+		DMERR("%s: metadata device (%llu blocks) too small: expected %llu",
+		      dm_device_name(pool->pool_md),
 		      metadata_dev_size, sb_metadata_dev_size);
 		return -EINVAL;
 
 	} else if (metadata_dev_size > sb_metadata_dev_size) {
 		r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size);
 		if (r) {
-			DMERR("failed to resize metadata device");
+			DMERR("%s: failed to resize metadata device",
+			      dm_device_name(pool->pool_md));
 			return r;
 		}
 
@@ -2530,37 +2548,43 @@
 
 		r = dm_pool_get_metadata_transaction_id(pool->pmd, &transaction_id);
 		if (r) {
-			DMERR("dm_pool_get_metadata_transaction_id returned %d", r);
+			DMERR("%s: dm_pool_get_metadata_transaction_id returned %d",
+			      dm_device_name(pool->pool_md), r);
 			goto err;
 		}
 
 		r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free_blocks_metadata);
 		if (r) {
-			DMERR("dm_pool_get_free_metadata_block_count returned %d", r);
+			DMERR("%s: dm_pool_get_free_metadata_block_count returned %d",
+			      dm_device_name(pool->pool_md), r);
 			goto err;
 		}
 
 		r = dm_pool_get_metadata_dev_size(pool->pmd, &nr_blocks_metadata);
 		if (r) {
-			DMERR("dm_pool_get_metadata_dev_size returned %d", r);
+			DMERR("%s: dm_pool_get_metadata_dev_size returned %d",
+			      dm_device_name(pool->pool_md), r);
 			goto err;
 		}
 
 		r = dm_pool_get_free_block_count(pool->pmd, &nr_free_blocks_data);
 		if (r) {
-			DMERR("dm_pool_get_free_block_count returned %d", r);
+			DMERR("%s: dm_pool_get_free_block_count returned %d",
+			      dm_device_name(pool->pool_md), r);
 			goto err;
 		}
 
 		r = dm_pool_get_data_dev_size(pool->pmd, &nr_blocks_data);
 		if (r) {
-			DMERR("dm_pool_get_data_dev_size returned %d", r);
+			DMERR("%s: dm_pool_get_data_dev_size returned %d",
+			      dm_device_name(pool->pool_md), r);
 			goto err;
 		}
 
 		r = dm_pool_get_metadata_snap(pool->pmd, &held_root);
 		if (r) {
-			DMERR("dm_pool_get_metadata_snap returned %d", r);
+			DMERR("%s: dm_pool_get_metadata_snap returned %d",
+			      dm_device_name(pool->pool_md), r);
 			goto err;
 		}
 
@@ -2648,9 +2672,17 @@
 {
 	struct pool_c *pt = ti->private;
 	struct pool *pool = pt->pool;
+	uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
 
-	blk_limits_io_min(limits, 0);
-	blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
+	/*
+	 * If the system-determined stacked limits are compatible with the
+	 * pool's blocksize (io_opt is a factor) do not override them.
+	 */
+	if (io_opt_sectors < pool->sectors_per_block ||
+	    do_div(io_opt_sectors, pool->sectors_per_block)) {
+		blk_limits_io_min(limits, 0);
+		blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
+	}
 
 	/*
 	 * pt->adjusted_pf is a staging area for the actual features to use.
@@ -2669,7 +2701,7 @@
 	.name = "thin-pool",
 	.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
 		    DM_TARGET_IMMUTABLE,
-	.version = {1, 8, 0},
+	.version = {1, 9, 0},
 	.module = THIS_MODULE,
 	.ctr = pool_ctr,
 	.dtr = pool_dtr,
@@ -2956,7 +2988,7 @@
 
 static struct target_type thin_target = {
 	.name = "thin",
-	.version = {1, 8, 0},
+	.version = {1, 9, 0},
 	.module	= THIS_MODULE,
 	.ctr = thin_ctr,
 	.dtr = thin_dtr,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 9e39d2b..6a5e9ed 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -60,6 +60,7 @@
 	struct bio *bio;
 	unsigned long start_time;
 	spinlock_t endio_lock;
+	struct dm_stats_aux stats_aux;
 };
 
 /*
@@ -198,6 +199,8 @@
 
 	/* zero-length flush that will be cloned and submitted to targets */
 	struct bio flush_bio;
+
+	struct dm_stats stats;
 };
 
 /*
@@ -269,6 +272,7 @@
 	dm_io_init,
 	dm_kcopyd_init,
 	dm_interface_init,
+	dm_statistics_init,
 };
 
 static void (*_exits[])(void) = {
@@ -279,6 +283,7 @@
 	dm_io_exit,
 	dm_kcopyd_exit,
 	dm_interface_exit,
+	dm_statistics_exit,
 };
 
 static int __init dm_init(void)
@@ -384,6 +389,16 @@
 	return r;
 }
 
+sector_t dm_get_size(struct mapped_device *md)
+{
+	return get_capacity(md->disk);
+}
+
+struct dm_stats *dm_get_stats(struct mapped_device *md)
+{
+	return &md->stats;
+}
+
 static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
 	struct mapped_device *md = bdev->bd_disk->private_data;
@@ -466,8 +481,9 @@
 static void start_io_acct(struct dm_io *io)
 {
 	struct mapped_device *md = io->md;
+	struct bio *bio = io->bio;
 	int cpu;
-	int rw = bio_data_dir(io->bio);
+	int rw = bio_data_dir(bio);
 
 	io->start_time = jiffies;
 
@@ -476,6 +492,10 @@
 	part_stat_unlock();
 	atomic_set(&dm_disk(md)->part0.in_flight[rw],
 		atomic_inc_return(&md->pending[rw]));
+
+	if (unlikely(dm_stats_used(&md->stats)))
+		dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_sector,
+				    bio_sectors(bio), false, 0, &io->stats_aux);
 }
 
 static void end_io_acct(struct dm_io *io)
@@ -491,6 +511,10 @@
 	part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
 	part_stat_unlock();
 
+	if (unlikely(dm_stats_used(&md->stats)))
+		dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_sector,
+				    bio_sectors(bio), true, duration, &io->stats_aux);
+
 	/*
 	 * After this is decremented the bio must not be touched if it is
 	 * a flush.
@@ -1519,7 +1543,7 @@
 	return;
 }
 
-static int dm_request_based(struct mapped_device *md)
+int dm_request_based(struct mapped_device *md)
 {
 	return blk_queue_stackable(md->queue);
 }
@@ -1946,8 +1970,7 @@
 	add_disk(md->disk);
 	format_dev_t(md->name, MKDEV(_major, minor));
 
-	md->wq = alloc_workqueue("kdmflush",
-				 WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
+	md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0);
 	if (!md->wq)
 		goto bad_thread;
 
@@ -1959,6 +1982,8 @@
 	md->flush_bio.bi_bdev = md->bdev;
 	md->flush_bio.bi_rw = WRITE_FLUSH;
 
+	dm_stats_init(&md->stats);
+
 	/* Populate the mapping, nobody knows we exist yet */
 	spin_lock(&_minor_lock);
 	old_md = idr_replace(&_minor_idr, md, minor);
@@ -2010,6 +2035,7 @@
 
 	put_disk(md->disk);
 	blk_cleanup_queue(md->queue);
+	dm_stats_cleanup(&md->stats);
 	module_put(THIS_MODULE);
 	kfree(md);
 }
@@ -2151,7 +2177,7 @@
 	/*
 	 * Wipe any geometry if the size of the table changed.
 	 */
-	if (size != get_capacity(md->disk))
+	if (size != dm_get_size(md))
 		memset(&md->geometry, 0, sizeof(md->geometry));
 
 	__set_size(md, size);
@@ -2236,11 +2262,13 @@
 
 void dm_set_md_type(struct mapped_device *md, unsigned type)
 {
+	BUG_ON(!mutex_is_locked(&md->type_lock));
 	md->type = type;
 }
 
 unsigned dm_get_md_type(struct mapped_device *md)
 {
+	BUG_ON(!mutex_is_locked(&md->type_lock));
 	return md->type;
 }
 
@@ -2695,6 +2723,38 @@
 	return r;
 }
 
+/*
+ * Internal suspend/resume works like userspace-driven suspend. It waits
+ * until all bios finish and prevents issuing new bios to the target drivers.
+ * It may be used only from the kernel.
+ *
+ * Internal suspend holds md->suspend_lock, which prevents interaction with
+ * userspace-driven suspend.
+ */
+
+void dm_internal_suspend(struct mapped_device *md)
+{
+	mutex_lock(&md->suspend_lock);
+	if (dm_suspended_md(md))
+		return;
+
+	set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
+	synchronize_srcu(&md->io_barrier);
+	flush_workqueue(md->wq);
+	dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
+}
+
+void dm_internal_resume(struct mapped_device *md)
+{
+	if (dm_suspended_md(md))
+		goto done;
+
+	dm_queue_flush(md);
+
+done:
+	mutex_unlock(&md->suspend_lock);
+}
+
 /*-----------------------------------------------------------------
  * Event notification.
  *---------------------------------------------------------------*/
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 45b97da..5e604cc 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -16,6 +16,8 @@
 #include <linux/blkdev.h>
 #include <linux/hdreg.h>
 
+#include "dm-stats.h"
+
 /*
  * Suspend feature flags
  */
@@ -89,10 +91,21 @@
 #define dm_target_is_valid(t) ((t)->table)
 
 /*
+ * To check whether the target type is bio-based or not (request-based).
+ */
+#define dm_target_bio_based(t) ((t)->type->map != NULL)
+
+/*
  * To check whether the target type is request-based or not (bio-based).
  */
 #define dm_target_request_based(t) ((t)->type->map_rq != NULL)
 
+/*
+ * To check whether the target type is a hybrid (capable of being
+ * either request-based or bio-based).
+ */
+#define dm_target_hybrid(t) (dm_target_bio_based(t) && dm_target_request_based(t))
+
 /*-----------------------------------------------------------------
  * A registry of target types.
  *---------------------------------------------------------------*/
@@ -146,10 +159,16 @@
 void dm_destroy_immediate(struct mapped_device *md);
 int dm_open_count(struct mapped_device *md);
 int dm_lock_for_deletion(struct mapped_device *md);
+int dm_request_based(struct mapped_device *md);
+sector_t dm_get_size(struct mapped_device *md);
+struct dm_stats *dm_get_stats(struct mapped_device *md);
 
 int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
 		      unsigned cookie);
 
+void dm_internal_suspend(struct mapped_device *md);
+void dm_internal_resume(struct mapped_device *md);
+
 int dm_io_init(void);
 void dm_io_exit(void);
 
@@ -162,4 +181,12 @@
 struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size);
 void dm_free_md_mempools(struct dm_md_mempools *pools);
 
+/*
+ * Helpers that are used by DM core
+ */
+static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen)
+{
+	return !maxlen || strlen(result) + 1 >= maxlen;
+}
+
 #endif
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 9f13e13..adf4d7e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1180,7 +1180,7 @@
 			mddev->bitmap_info.offset =
 				mddev->bitmap_info.default_offset;
 			mddev->bitmap_info.space =
-				mddev->bitmap_info.space;
+				mddev->bitmap_info.default_space;
 		}
 
 	} else if (mddev->pers == NULL) {
@@ -3429,7 +3429,7 @@
 		mddev->safemode_delay = (msec*HZ)/1000;
 		if (mddev->safemode_delay == 0)
 			mddev->safemode_delay = 1;
-		if (mddev->safemode_delay < old_delay)
+		if (mddev->safemode_delay < old_delay || old_delay == 0)
 			md_safemode_timeout((unsigned long)mddev);
 	}
 	return len;
@@ -5144,7 +5144,7 @@
 	
 	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 	
-	if (mddev->flags)
+	if (mddev->flags & MD_UPDATE_SB_FLAGS)
 		md_update_sb(mddev, 0);
 
 	md_new_event(mddev);
@@ -5289,7 +5289,7 @@
 	md_super_wait(mddev);
 
 	if (mddev->ro == 0 &&
-	    (!mddev->in_sync || mddev->flags)) {
+	    (!mddev->in_sync || (mddev->flags & MD_UPDATE_SB_FLAGS))) {
 		/* mark array as shutdown cleanly */
 		mddev->in_sync = 1;
 		md_update_sb(mddev, 1);
@@ -5337,8 +5337,14 @@
 		err = -EBUSY;
 		goto out;
 	}
-	if (bdev)
-		sync_blockdev(bdev);
+	if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
+		/* Someone opened the device since we flushed it
+		 * so page cache could be dirty and it is too late
+		 * to flush.  So abort
+		 */
+		mutex_unlock(&mddev->open_mutex);
+		return -EBUSY;
+	}
 	if (mddev->pers) {
 		__md_stop_writes(mddev);
 
@@ -5373,14 +5379,14 @@
 		mutex_unlock(&mddev->open_mutex);
 		return -EBUSY;
 	}
-	if (bdev)
-		/* It is possible IO was issued on some other
-		 * open file which was closed before we took ->open_mutex.
-		 * As that was not the last close __blkdev_put will not
-		 * have called sync_blockdev, so we must.
+	if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
+		/* Someone opened the device since we flushed it
+		 * so page cache could be dirty and it is too late
+		 * to flush.  So abort
 		 */
-		sync_blockdev(bdev);
-
+		mutex_unlock(&mddev->open_mutex);
+		return -EBUSY;
+	}
 	if (mddev->pers) {
 		if (mddev->ro)
 			set_disk_ro(disk, 0);
@@ -5628,10 +5634,7 @@
 	char *ptr, *buf = NULL;
 	int err = -ENOMEM;
 
-	if (md_allow_write(mddev))
-		file = kmalloc(sizeof(*file), GFP_NOIO);
-	else
-		file = kmalloc(sizeof(*file), GFP_KERNEL);
+	file = kmalloc(sizeof(*file), GFP_NOIO);
 
 	if (!file)
 		goto out;
@@ -6420,6 +6423,20 @@
 						 !test_bit(MD_RECOVERY_NEEDED,
 							   &mddev->flags),
 						 msecs_to_jiffies(5000));
+	if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
+		/* Need to flush page cache, and ensure no-one else opens
+		 * and writes
+		 */
+		mutex_lock(&mddev->open_mutex);
+		if (atomic_read(&mddev->openers) > 1) {
+			mutex_unlock(&mddev->open_mutex);
+			err = -EBUSY;
+			goto abort;
+		}
+		set_bit(MD_STILL_CLOSED, &mddev->flags);
+		mutex_unlock(&mddev->open_mutex);
+		sync_blockdev(bdev);
+	}
 	err = mddev_lock(mddev);
 	if (err) {
 		printk(KERN_INFO 
@@ -6673,6 +6690,7 @@
 
 	err = 0;
 	atomic_inc(&mddev->openers);
+	clear_bit(MD_STILL_CLOSED, &mddev->flags);
 	mutex_unlock(&mddev->open_mutex);
 
 	check_disk_change(bdev);
@@ -7817,7 +7835,7 @@
 				sysfs_notify_dirent_safe(mddev->sysfs_state);
 		}
 
-		if (mddev->flags)
+		if (mddev->flags & MD_UPDATE_SB_FLAGS)
 			md_update_sb(mddev, 0);
 
 		if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 20f02c0..608050c 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -204,12 +204,16 @@
 	struct md_personality		*pers;
 	dev_t				unit;
 	int				md_minor;
-	struct list_head 		disks;
+	struct list_head		disks;
 	unsigned long			flags;
 #define MD_CHANGE_DEVS	0	/* Some device status has changed */
 #define MD_CHANGE_CLEAN 1	/* transition to or from 'clean' */
 #define MD_CHANGE_PENDING 2	/* switch from 'clean' to 'active' in progress */
+#define MD_UPDATE_SB_FLAGS (1 | 2 | 4)	/* If these are set, md_update_sb needed */
 #define MD_ARRAY_FIRST_USE 3    /* First use of array, needs initialization */
+#define MD_STILL_CLOSED	4	/* If set, then array has not been opened since
+				 * md_ioctl checked on it.
+				 */
 
 	int				suspended;
 	atomic_t			active_io;
@@ -218,7 +222,7 @@
 						       * are happening, so run/
 						       * takeover/stop are not safe
 						       */
-	int				ready; /* See when safe to pass 
+	int				ready; /* See when safe to pass
 						* IO requests down */
 	struct gendisk			*gendisk;
 
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index 81b5138..a7e8bf2 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -615,6 +615,11 @@
 }
 EXPORT_SYMBOL_GPL(dm_bm_flush_and_unlock);
 
+void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b)
+{
+	dm_bufio_prefetch(bm->bufio, b, 1);
+}
+
 void dm_bm_set_read_only(struct dm_block_manager *bm)
 {
 	bm->read_only = true;
diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h
index be5bff6..9a82083 100644
--- a/drivers/md/persistent-data/dm-block-manager.h
+++ b/drivers/md/persistent-data/dm-block-manager.h
@@ -108,6 +108,11 @@
 int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
 			   struct dm_block *superblock);
 
+ /*
+  * Request data be prefetched into the cache.
+  */
+void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b);
+
 /*
  * Switches the bm to a read only mode.  Once read-only mode
  * has been entered the following functions will return -EPERM.
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index 3586542..468e371e 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -161,6 +161,7 @@
 };
 
 struct del_stack {
+	struct dm_btree_info *info;
 	struct dm_transaction_manager *tm;
 	int top;
 	struct frame spine[MAX_SPINE_DEPTH];
@@ -183,6 +184,20 @@
 	return s->top >= 0;
 }
 
+static void prefetch_children(struct del_stack *s, struct frame *f)
+{
+	unsigned i;
+	struct dm_block_manager *bm = dm_tm_get_bm(s->tm);
+
+	for (i = 0; i < f->nr_children; i++)
+		dm_bm_prefetch(bm, value64(f->n, i));
+}
+
+static bool is_internal_level(struct dm_btree_info *info, struct frame *f)
+{
+	return f->level < (info->levels - 1);
+}
+
 static int push_frame(struct del_stack *s, dm_block_t b, unsigned level)
 {
 	int r;
@@ -205,6 +220,7 @@
 		dm_tm_dec(s->tm, b);
 
 	else {
+		uint32_t flags;
 		struct frame *f = s->spine + ++s->top;
 
 		r = dm_tm_read_lock(s->tm, b, &btree_node_validator, &f->b);
@@ -217,6 +233,10 @@
 		f->level = level;
 		f->nr_children = le32_to_cpu(f->n->header.nr_entries);
 		f->current_child = 0;
+
+		flags = le32_to_cpu(f->n->header.flags);
+		if (flags & INTERNAL_NODE || is_internal_level(s->info, f))
+			prefetch_children(s, f);
 	}
 
 	return 0;
@@ -230,11 +250,6 @@
 	dm_tm_unlock(s->tm, f->b);
 }
 
-static bool is_internal_level(struct dm_btree_info *info, struct frame *f)
-{
-	return f->level < (info->levels - 1);
-}
-
 int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
 {
 	int r;
@@ -243,6 +258,7 @@
 	s = kmalloc(sizeof(*s), GFP_KERNEL);
 	if (!s)
 		return -ENOMEM;
+	s->info = info;
 	s->tm = info->tm;
 	s->top = -1;
 
@@ -287,7 +303,7 @@
 					info->value_type.dec(info->value_type.context,
 							     value_ptr(f->n, i));
 			}
-			f->current_child = f->nr_children;
+			pop_frame(s);
 		}
 	}
 
diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c
index 3e7a88d..6058569 100644
--- a/drivers/md/persistent-data/dm-space-map-common.c
+++ b/drivers/md/persistent-data/dm-space-map-common.c
@@ -292,16 +292,11 @@
 	return dm_tm_unlock(ll->tm, blk);
 }
 
-int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result)
+static int sm_ll_lookup_big_ref_count(struct ll_disk *ll, dm_block_t b,
+				      uint32_t *result)
 {
 	__le32 le_rc;
-	int r = sm_ll_lookup_bitmap(ll, b, result);
-
-	if (r)
-		return r;
-
-	if (*result != 3)
-		return r;
+	int r;
 
 	r = dm_btree_lookup(&ll->ref_count_info, ll->ref_count_root, &b, &le_rc);
 	if (r < 0)
@@ -312,6 +307,19 @@
 	return r;
 }
 
+int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result)
+{
+	int r = sm_ll_lookup_bitmap(ll, b, result);
+
+	if (r)
+		return r;
+
+	if (*result != 3)
+		return r;
+
+	return sm_ll_lookup_big_ref_count(ll, b, result);
+}
+
 int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
 			  dm_block_t end, dm_block_t *result)
 {
@@ -372,11 +380,12 @@
 	return -ENOSPC;
 }
 
-int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
-		 uint32_t ref_count, enum allocation_event *ev)
+static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
+			uint32_t (*mutator)(void *context, uint32_t old),
+			void *context, enum allocation_event *ev)
 {
 	int r;
-	uint32_t bit, old;
+	uint32_t bit, old, ref_count;
 	struct dm_block *nb;
 	dm_block_t index = b;
 	struct disk_index_entry ie_disk;
@@ -399,6 +408,14 @@
 	bm_le = dm_bitmap_data(nb);
 	old = sm_lookup_bitmap(bm_le, bit);
 
+	if (old > 2) {
+		r = sm_ll_lookup_big_ref_count(ll, b, &old);
+		if (r < 0)
+			return r;
+	}
+
+	ref_count = mutator(context, old);
+
 	if (ref_count <= 2) {
 		sm_set_bitmap(bm_le, bit, ref_count);
 
@@ -448,31 +465,35 @@
 	return ll->save_ie(ll, index, &ie_disk);
 }
 
+static uint32_t set_ref_count(void *context, uint32_t old)
+{
+	return *((uint32_t *) context);
+}
+
+int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
+		 uint32_t ref_count, enum allocation_event *ev)
+{
+	return sm_ll_mutate(ll, b, set_ref_count, &ref_count, ev);
+}
+
+static uint32_t inc_ref_count(void *context, uint32_t old)
+{
+	return old + 1;
+}
+
 int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
 {
-	int r;
-	uint32_t rc;
+	return sm_ll_mutate(ll, b, inc_ref_count, NULL, ev);
+}
 
-	r = sm_ll_lookup(ll, b, &rc);
-	if (r)
-		return r;
-
-	return sm_ll_insert(ll, b, rc + 1, ev);
+static uint32_t dec_ref_count(void *context, uint32_t old)
+{
+	return old - 1;
 }
 
 int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
 {
-	int r;
-	uint32_t rc;
-
-	r = sm_ll_lookup(ll, b, &rc);
-	if (r)
-		return r;
-
-	if (!rc)
-		return -EINVAL;
-
-	return sm_ll_insert(ll, b, rc - 1, ev);
+	return sm_ll_mutate(ll, b, dec_ref_count, NULL, ev);
 }
 
 int sm_ll_commit(struct ll_disk *ll)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 78ea443..7ff4f25 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -53,6 +53,7 @@
 #include <linux/cpu.h>
 #include <linux/slab.h>
 #include <linux/ratelimit.h>
+#include <linux/nodemask.h>
 #include <trace/events/block.h>
 
 #include "md.h"
@@ -60,6 +61,10 @@
 #include "raid0.h"
 #include "bitmap.h"
 
+#define cpu_to_group(cpu) cpu_to_node(cpu)
+#define ANY_GROUP NUMA_NO_NODE
+
+static struct workqueue_struct *raid5_wq;
 /*
  * Stripe cache
  */
@@ -72,6 +77,7 @@
 #define BYPASS_THRESHOLD	1
 #define NR_HASH			(PAGE_SIZE / sizeof(struct hlist_head))
 #define HASH_MASK		(NR_HASH - 1)
+#define MAX_STRIPE_BATCH	8
 
 static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect)
 {
@@ -200,6 +206,49 @@
 	       test_bit(STRIPE_COMPUTE_RUN, &sh->state);
 }
 
+static void raid5_wakeup_stripe_thread(struct stripe_head *sh)
+{
+	struct r5conf *conf = sh->raid_conf;
+	struct r5worker_group *group;
+	int thread_cnt;
+	int i, cpu = sh->cpu;
+
+	if (!cpu_online(cpu)) {
+		cpu = cpumask_any(cpu_online_mask);
+		sh->cpu = cpu;
+	}
+
+	if (list_empty(&sh->lru)) {
+		struct r5worker_group *group;
+		group = conf->worker_groups + cpu_to_group(cpu);
+		list_add_tail(&sh->lru, &group->handle_list);
+		group->stripes_cnt++;
+		sh->group = group;
+	}
+
+	if (conf->worker_cnt_per_group == 0) {
+		md_wakeup_thread(conf->mddev->thread);
+		return;
+	}
+
+	group = conf->worker_groups + cpu_to_group(sh->cpu);
+
+	group->workers[0].working = true;
+	/* at least one worker should run to avoid race */
+	queue_work_on(sh->cpu, raid5_wq, &group->workers[0].work);
+
+	thread_cnt = group->stripes_cnt / MAX_STRIPE_BATCH - 1;
+	/* wakeup more workers */
+	for (i = 1; i < conf->worker_cnt_per_group && thread_cnt > 0; i++) {
+		if (group->workers[i].working == false) {
+			group->workers[i].working = true;
+			queue_work_on(sh->cpu, raid5_wq,
+				      &group->workers[i].work);
+			thread_cnt--;
+		}
+	}
+}
+
 static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh)
 {
 	BUG_ON(!list_empty(&sh->lru));
@@ -214,7 +263,12 @@
 		else {
 			clear_bit(STRIPE_DELAYED, &sh->state);
 			clear_bit(STRIPE_BIT_DELAY, &sh->state);
-			list_add_tail(&sh->lru, &conf->handle_list);
+			if (conf->worker_cnt_per_group == 0) {
+				list_add_tail(&sh->lru, &conf->handle_list);
+			} else {
+				raid5_wakeup_stripe_thread(sh);
+				return;
+			}
 		}
 		md_wakeup_thread(conf->mddev->thread);
 	} else {
@@ -239,12 +293,62 @@
 		do_release_stripe(conf, sh);
 }
 
+static struct llist_node *llist_reverse_order(struct llist_node *head)
+{
+	struct llist_node *new_head = NULL;
+
+	while (head) {
+		struct llist_node *tmp = head;
+		head = head->next;
+		tmp->next = new_head;
+		new_head = tmp;
+	}
+
+	return new_head;
+}
+
+/* should hold conf->device_lock already */
+static int release_stripe_list(struct r5conf *conf)
+{
+	struct stripe_head *sh;
+	int count = 0;
+	struct llist_node *head;
+
+	head = llist_del_all(&conf->released_stripes);
+	head = llist_reverse_order(head);
+	while (head) {
+		sh = llist_entry(head, struct stripe_head, release_list);
+		head = llist_next(head);
+		/* sh could be readded after STRIPE_ON_RELEASE_LIST is cleard */
+		smp_mb();
+		clear_bit(STRIPE_ON_RELEASE_LIST, &sh->state);
+		/*
+		 * Don't worry the bit is set here, because if the bit is set
+		 * again, the count is always > 1. This is true for
+		 * STRIPE_ON_UNPLUG_LIST bit too.
+		 */
+		__release_stripe(conf, sh);
+		count++;
+	}
+
+	return count;
+}
+
 static void release_stripe(struct stripe_head *sh)
 {
 	struct r5conf *conf = sh->raid_conf;
 	unsigned long flags;
+	bool wakeup;
 
+	if (test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state))
+		goto slow_path;
+	wakeup = llist_add(&sh->release_list, &conf->released_stripes);
+	if (wakeup)
+		md_wakeup_thread(conf->mddev->thread);
+	return;
+slow_path:
 	local_irq_save(flags);
+	/* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */
 	if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) {
 		do_release_stripe(conf, sh);
 		spin_unlock(&conf->device_lock);
@@ -359,6 +463,7 @@
 		raid5_build_block(sh, i, previous);
 	}
 	insert_hash(conf, sh);
+	sh->cpu = smp_processor_id();
 }
 
 static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
@@ -491,7 +596,8 @@
 			if (atomic_read(&sh->count)) {
 				BUG_ON(!list_empty(&sh->lru)
 				    && !test_bit(STRIPE_EXPANDING, &sh->state)
-				    && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state));
+				    && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)
+				    && !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state));
 			} else {
 				if (!test_bit(STRIPE_HANDLE, &sh->state))
 					atomic_inc(&conf->active_stripes);
@@ -499,6 +605,10 @@
 				    !test_bit(STRIPE_EXPANDING, &sh->state))
 					BUG();
 				list_del_init(&sh->lru);
+				if (sh->group) {
+					sh->group->stripes_cnt--;
+					sh->group = NULL;
+				}
 			}
 		}
 	} while (sh == NULL);
@@ -3779,6 +3889,7 @@
 			if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
 				atomic_inc(&conf->preread_active_stripes);
 			list_add_tail(&sh->lru, &conf->hold_list);
+			raid5_wakeup_stripe_thread(sh);
 		}
 	}
 }
@@ -4058,18 +4169,35 @@
  * head of the hold_list has changed, i.e. the head was promoted to the
  * handle_list.
  */
-static struct stripe_head *__get_priority_stripe(struct r5conf *conf)
+static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group)
 {
-	struct stripe_head *sh;
+	struct stripe_head *sh = NULL, *tmp;
+	struct list_head *handle_list = NULL;
+	struct r5worker_group *wg = NULL;
+
+	if (conf->worker_cnt_per_group == 0) {
+		handle_list = &conf->handle_list;
+	} else if (group != ANY_GROUP) {
+		handle_list = &conf->worker_groups[group].handle_list;
+		wg = &conf->worker_groups[group];
+	} else {
+		int i;
+		for (i = 0; i < conf->group_cnt; i++) {
+			handle_list = &conf->worker_groups[i].handle_list;
+			wg = &conf->worker_groups[i];
+			if (!list_empty(handle_list))
+				break;
+		}
+	}
 
 	pr_debug("%s: handle: %s hold: %s full_writes: %d bypass_count: %d\n",
 		  __func__,
-		  list_empty(&conf->handle_list) ? "empty" : "busy",
+		  list_empty(handle_list) ? "empty" : "busy",
 		  list_empty(&conf->hold_list) ? "empty" : "busy",
 		  atomic_read(&conf->pending_full_writes), conf->bypass_count);
 
-	if (!list_empty(&conf->handle_list)) {
-		sh = list_entry(conf->handle_list.next, typeof(*sh), lru);
+	if (!list_empty(handle_list)) {
+		sh = list_entry(handle_list->next, typeof(*sh), lru);
 
 		if (list_empty(&conf->hold_list))
 			conf->bypass_count = 0;
@@ -4087,14 +4215,32 @@
 		   ((conf->bypass_threshold &&
 		     conf->bypass_count > conf->bypass_threshold) ||
 		    atomic_read(&conf->pending_full_writes) == 0)) {
-		sh = list_entry(conf->hold_list.next,
-				typeof(*sh), lru);
-		conf->bypass_count -= conf->bypass_threshold;
-		if (conf->bypass_count < 0)
-			conf->bypass_count = 0;
-	} else
+
+		list_for_each_entry(tmp, &conf->hold_list,  lru) {
+			if (conf->worker_cnt_per_group == 0 ||
+			    group == ANY_GROUP ||
+			    !cpu_online(tmp->cpu) ||
+			    cpu_to_group(tmp->cpu) == group) {
+				sh = tmp;
+				break;
+			}
+		}
+
+		if (sh) {
+			conf->bypass_count -= conf->bypass_threshold;
+			if (conf->bypass_count < 0)
+				conf->bypass_count = 0;
+		}
+		wg = NULL;
+	}
+
+	if (!sh)
 		return NULL;
 
+	if (wg) {
+		wg->stripes_cnt--;
+		sh->group = NULL;
+	}
 	list_del_init(&sh->lru);
 	atomic_inc(&sh->count);
 	BUG_ON(atomic_read(&sh->count) != 1);
@@ -4127,6 +4273,10 @@
 			 */
 			smp_mb__before_clear_bit();
 			clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state);
+			/*
+			 * STRIPE_ON_RELEASE_LIST could be set here. In that
+			 * case, the count is always > 1 here
+			 */
 			__release_stripe(conf, sh);
 			cnt++;
 		}
@@ -4286,8 +4436,10 @@
 	for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
 		DEFINE_WAIT(w);
 		int previous;
+		int seq;
 
 	retry:
+		seq = read_seqcount_begin(&conf->gen_lock);
 		previous = 0;
 		prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
 		if (unlikely(conf->reshape_progress != MaxSector)) {
@@ -4320,7 +4472,7 @@
 						  previous,
 						  &dd_idx, NULL);
 		pr_debug("raid456: make_request, sector %llu logical %llu\n",
-			(unsigned long long)new_sector, 
+			(unsigned long long)new_sector,
 			(unsigned long long)logical_sector);
 
 		sh = get_active_stripe(conf, new_sector, previous,
@@ -4349,6 +4501,13 @@
 					goto retry;
 				}
 			}
+			if (read_seqcount_retry(&conf->gen_lock, seq)) {
+				/* Might have got the wrong stripe_head
+				 * by accident
+				 */
+				release_stripe(sh);
+				goto retry;
+			}
 
 			if (rw == WRITE &&
 			    logical_sector >= mddev->suspend_lo &&
@@ -4788,14 +4947,14 @@
 	return handled;
 }
 
-#define MAX_STRIPE_BATCH 8
-static int handle_active_stripes(struct r5conf *conf)
+static int handle_active_stripes(struct r5conf *conf, int group,
+				 struct r5worker *worker)
 {
 	struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
 	int i, batch_size = 0;
 
 	while (batch_size < MAX_STRIPE_BATCH &&
-			(sh = __get_priority_stripe(conf)) != NULL)
+			(sh = __get_priority_stripe(conf, group)) != NULL)
 		batch[batch_size++] = sh;
 
 	if (batch_size == 0)
@@ -4813,6 +4972,39 @@
 	return batch_size;
 }
 
+static void raid5_do_work(struct work_struct *work)
+{
+	struct r5worker *worker = container_of(work, struct r5worker, work);
+	struct r5worker_group *group = worker->group;
+	struct r5conf *conf = group->conf;
+	int group_id = group - conf->worker_groups;
+	int handled;
+	struct blk_plug plug;
+
+	pr_debug("+++ raid5worker active\n");
+
+	blk_start_plug(&plug);
+	handled = 0;
+	spin_lock_irq(&conf->device_lock);
+	while (1) {
+		int batch_size, released;
+
+		released = release_stripe_list(conf);
+
+		batch_size = handle_active_stripes(conf, group_id, worker);
+		worker->working = false;
+		if (!batch_size && !released)
+			break;
+		handled += batch_size;
+	}
+	pr_debug("%d stripes handled\n", handled);
+
+	spin_unlock_irq(&conf->device_lock);
+	blk_finish_plug(&plug);
+
+	pr_debug("--- raid5worker inactive\n");
+}
+
 /*
  * This is our raid5 kernel thread.
  *
@@ -4836,7 +5028,9 @@
 	spin_lock_irq(&conf->device_lock);
 	while (1) {
 		struct bio *bio;
-		int batch_size;
+		int batch_size, released;
+
+		released = release_stripe_list(conf);
 
 		if (
 		    !list_empty(&conf->bitmap_list)) {
@@ -4860,8 +5054,8 @@
 			handled++;
 		}
 
-		batch_size = handle_active_stripes(conf);
-		if (!batch_size)
+		batch_size = handle_active_stripes(conf, ANY_GROUP, NULL);
+		if (!batch_size && !released)
 			break;
 		handled += batch_size;
 
@@ -4989,10 +5183,70 @@
 static struct md_sysfs_entry
 raid5_stripecache_active = __ATTR_RO(stripe_cache_active);
 
+static ssize_t
+raid5_show_group_thread_cnt(struct mddev *mddev, char *page)
+{
+	struct r5conf *conf = mddev->private;
+	if (conf)
+		return sprintf(page, "%d\n", conf->worker_cnt_per_group);
+	else
+		return 0;
+}
+
+static int alloc_thread_groups(struct r5conf *conf, int cnt);
+static ssize_t
+raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
+{
+	struct r5conf *conf = mddev->private;
+	unsigned long new;
+	int err;
+	struct r5worker_group *old_groups;
+	int old_group_cnt;
+
+	if (len >= PAGE_SIZE)
+		return -EINVAL;
+	if (!conf)
+		return -ENODEV;
+
+	if (kstrtoul(page, 10, &new))
+		return -EINVAL;
+
+	if (new == conf->worker_cnt_per_group)
+		return len;
+
+	mddev_suspend(mddev);
+
+	old_groups = conf->worker_groups;
+	old_group_cnt = conf->worker_cnt_per_group;
+
+	conf->worker_groups = NULL;
+	err = alloc_thread_groups(conf, new);
+	if (err) {
+		conf->worker_groups = old_groups;
+		conf->worker_cnt_per_group = old_group_cnt;
+	} else {
+		if (old_groups)
+			kfree(old_groups[0].workers);
+		kfree(old_groups);
+	}
+
+	mddev_resume(mddev);
+
+	if (err)
+		return err;
+	return len;
+}
+
+static struct md_sysfs_entry
+raid5_group_thread_cnt = __ATTR(group_thread_cnt, S_IRUGO | S_IWUSR,
+				raid5_show_group_thread_cnt,
+				raid5_store_group_thread_cnt);
+
 static struct attribute *raid5_attrs[] =  {
 	&raid5_stripecache_size.attr,
 	&raid5_stripecache_active.attr,
 	&raid5_preread_bypass_threshold.attr,
+	&raid5_group_thread_cnt.attr,
 	NULL,
 };
 static struct attribute_group raid5_attrs_group = {
@@ -5000,6 +5254,54 @@
 	.attrs = raid5_attrs,
 };
 
+static int alloc_thread_groups(struct r5conf *conf, int cnt)
+{
+	int i, j;
+	ssize_t size;
+	struct r5worker *workers;
+
+	conf->worker_cnt_per_group = cnt;
+	if (cnt == 0) {
+		conf->worker_groups = NULL;
+		return 0;
+	}
+	conf->group_cnt = num_possible_nodes();
+	size = sizeof(struct r5worker) * cnt;
+	workers = kzalloc(size * conf->group_cnt, GFP_NOIO);
+	conf->worker_groups = kzalloc(sizeof(struct r5worker_group) *
+				conf->group_cnt, GFP_NOIO);
+	if (!conf->worker_groups || !workers) {
+		kfree(workers);
+		kfree(conf->worker_groups);
+		conf->worker_groups = NULL;
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < conf->group_cnt; i++) {
+		struct r5worker_group *group;
+
+		group = &conf->worker_groups[i];
+		INIT_LIST_HEAD(&group->handle_list);
+		group->conf = conf;
+		group->workers = workers + i * cnt;
+
+		for (j = 0; j < cnt; j++) {
+			group->workers[j].group = group;
+			INIT_WORK(&group->workers[j].work, raid5_do_work);
+		}
+	}
+
+	return 0;
+}
+
+static void free_thread_groups(struct r5conf *conf)
+{
+	if (conf->worker_groups)
+		kfree(conf->worker_groups[0].workers);
+	kfree(conf->worker_groups);
+	conf->worker_groups = NULL;
+}
+
 static sector_t
 raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks)
 {
@@ -5040,6 +5342,7 @@
 
 static void free_conf(struct r5conf *conf)
 {
+	free_thread_groups(conf);
 	shrink_stripes(conf);
 	raid5_free_percpu(conf);
 	kfree(conf->disks);
@@ -5168,7 +5471,11 @@
 	conf = kzalloc(sizeof(struct r5conf), GFP_KERNEL);
 	if (conf == NULL)
 		goto abort;
+	/* Don't enable multi-threading by default*/
+	if (alloc_thread_groups(conf, 0))
+		goto abort;
 	spin_lock_init(&conf->device_lock);
+	seqcount_init(&conf->gen_lock);
 	init_waitqueue_head(&conf->wait_for_stripe);
 	init_waitqueue_head(&conf->wait_for_overlap);
 	INIT_LIST_HEAD(&conf->handle_list);
@@ -5176,6 +5483,7 @@
 	INIT_LIST_HEAD(&conf->delayed_list);
 	INIT_LIST_HEAD(&conf->bitmap_list);
 	INIT_LIST_HEAD(&conf->inactive_list);
+	init_llist_head(&conf->released_stripes);
 	atomic_set(&conf->active_stripes, 0);
 	atomic_set(&conf->preread_active_stripes, 0);
 	atomic_set(&conf->active_aligned_reads, 0);
@@ -5980,6 +6288,7 @@
 
 	atomic_set(&conf->reshape_stripes, 0);
 	spin_lock_irq(&conf->device_lock);
+	write_seqcount_begin(&conf->gen_lock);
 	conf->previous_raid_disks = conf->raid_disks;
 	conf->raid_disks += mddev->delta_disks;
 	conf->prev_chunk_sectors = conf->chunk_sectors;
@@ -5996,8 +6305,16 @@
 	else
 		conf->reshape_progress = 0;
 	conf->reshape_safe = conf->reshape_progress;
+	write_seqcount_end(&conf->gen_lock);
 	spin_unlock_irq(&conf->device_lock);
 
+	/* Now make sure any requests that proceeded on the assumption
+	 * the reshape wasn't running - like Discard or Read - have
+	 * completed.
+	 */
+	mddev_suspend(mddev);
+	mddev_resume(mddev);
+
 	/* Add some new drives, as many as will fit.
 	 * We know there are enough to make the newly sized array work.
 	 * Don't add devices if we are reducing the number of
@@ -6472,6 +6789,10 @@
 
 static int __init raid5_init(void)
 {
+	raid5_wq = alloc_workqueue("raid5wq",
+		WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE|WQ_SYSFS, 0);
+	if (!raid5_wq)
+		return -ENOMEM;
 	register_md_personality(&raid6_personality);
 	register_md_personality(&raid5_personality);
 	register_md_personality(&raid4_personality);
@@ -6483,6 +6804,7 @@
 	unregister_md_personality(&raid6_personality);
 	unregister_md_personality(&raid5_personality);
 	unregister_md_personality(&raid4_personality);
+	destroy_workqueue(raid5_wq);
 }
 
 module_init(raid5_init);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 70c4932..2113ffa 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -197,6 +197,7 @@
 struct stripe_head {
 	struct hlist_node	hash;
 	struct list_head	lru;	      /* inactive_list or handle_list */
+	struct llist_node	release_list;
 	struct r5conf		*raid_conf;
 	short			generation;	/* increments with every
 						 * reshape */
@@ -211,6 +212,8 @@
 	enum check_states	check_state;
 	enum reconstruct_states reconstruct_state;
 	spinlock_t		stripe_lock;
+	int			cpu;
+	struct r5worker_group	*group;
 	/**
 	 * struct stripe_operations
 	 * @target - STRIPE_OP_COMPUTE_BLK target
@@ -321,6 +324,7 @@
 	STRIPE_OPS_REQ_PENDING,
 	STRIPE_ON_UNPLUG_LIST,
 	STRIPE_DISCARD,
+	STRIPE_ON_RELEASE_LIST,
 };
 
 /*
@@ -363,6 +367,19 @@
 	struct md_rdev	*rdev, *replacement;
 };
 
+struct r5worker {
+	struct work_struct work;
+	struct r5worker_group *group;
+	bool working;
+};
+
+struct r5worker_group {
+	struct list_head handle_list;
+	struct r5conf *conf;
+	struct r5worker *workers;
+	int stripes_cnt;
+};
+
 struct r5conf {
 	struct hlist_head	*stripe_hashtbl;
 	struct mddev		*mddev;
@@ -386,6 +403,7 @@
 	int			prev_chunk_sectors;
 	int			prev_algo;
 	short			generation; /* increments with every reshape */
+	seqcount_t		gen_lock;	/* lock against generation changes */
 	unsigned long		reshape_checkpoint; /* Time we last updated
 						     * metadata */
 	long long		min_offset_diff; /* minimum difference between
@@ -445,6 +463,7 @@
 	 */
 	atomic_t		active_stripes;
 	struct list_head	inactive_list;
+	struct llist_head	released_stripes;
 	wait_queue_head_t	wait_for_stripe;
 	wait_queue_head_t	wait_for_overlap;
 	int			inactive_blocked;	/* release of inactive stripes blocked,
@@ -458,6 +477,9 @@
 	 * the new thread here until we fully activate the array.
 	 */
 	struct md_thread	*thread;
+	struct r5worker_group	*worker_groups;
+	int			group_cnt;
+	int			worker_cnt_per_group;
 };
 
 /*
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index cd0b7f4..1a3163f 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -812,7 +812,7 @@
  * Otherwise we don't understand what happened, so abort.
  */
 static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req,
-	struct mmc_blk_request *brq, int *ecc_err)
+	struct mmc_blk_request *brq, int *ecc_err, int *gen_err)
 {
 	bool prev_cmd_status_valid = true;
 	u32 status, stop_status = 0;
@@ -850,6 +850,16 @@
 	    (brq->cmd.resp[0] & R1_CARD_ECC_FAILED))
 		*ecc_err = 1;
 
+	/* Flag General errors */
+	if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ)
+		if ((status & R1_ERROR) ||
+			(brq->stop.resp[0] & R1_ERROR)) {
+			pr_err("%s: %s: general error sending stop or status command, stop cmd response %#x, card status %#x\n",
+			       req->rq_disk->disk_name, __func__,
+			       brq->stop.resp[0], status);
+			*gen_err = 1;
+		}
+
 	/*
 	 * Check the current card state.  If it is in some data transfer
 	 * mode, tell it to stop (and hopefully transition back to TRAN.)
@@ -869,6 +879,13 @@
 			return ERR_ABORT;
 		if (stop_status & R1_CARD_ECC_FAILED)
 			*ecc_err = 1;
+		if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ)
+			if (stop_status & R1_ERROR) {
+				pr_err("%s: %s: general error sending stop command, stop cmd response %#x\n",
+				       req->rq_disk->disk_name, __func__,
+				       stop_status);
+				*gen_err = 1;
+			}
 	}
 
 	/* Check for set block count errors */
@@ -1097,7 +1114,7 @@
 						    mmc_active);
 	struct mmc_blk_request *brq = &mq_mrq->brq;
 	struct request *req = mq_mrq->req;
-	int ecc_err = 0;
+	int ecc_err = 0, gen_err = 0;
 
 	/*
 	 * sbc.error indicates a problem with the set block count
@@ -1111,7 +1128,7 @@
 	 */
 	if (brq->sbc.error || brq->cmd.error || brq->stop.error ||
 	    brq->data.error) {
-		switch (mmc_blk_cmd_recovery(card, req, brq, &ecc_err)) {
+		switch (mmc_blk_cmd_recovery(card, req, brq, &ecc_err, &gen_err)) {
 		case ERR_RETRY:
 			return MMC_BLK_RETRY;
 		case ERR_ABORT:
@@ -1143,6 +1160,14 @@
 		u32 status;
 		unsigned long timeout;
 
+		/* Check stop command response */
+		if (brq->stop.resp[0] & R1_ERROR) {
+			pr_err("%s: %s: general error sending stop command, stop cmd response %#x\n",
+			       req->rq_disk->disk_name, __func__,
+			       brq->stop.resp[0]);
+			gen_err = 1;
+		}
+
 		timeout = jiffies + msecs_to_jiffies(MMC_BLK_TIMEOUT_MS);
 		do {
 			int err = get_card_status(card, &status, 5);
@@ -1152,6 +1177,13 @@
 				return MMC_BLK_CMD_ERR;
 			}
 
+			if (status & R1_ERROR) {
+				pr_err("%s: %s: general error sending status command, card status %#x\n",
+				       req->rq_disk->disk_name, __func__,
+				       status);
+				gen_err = 1;
+			}
+
 			/* Timeout if the device never becomes ready for data
 			 * and never leaves the program state.
 			 */
@@ -1171,6 +1203,13 @@
 			 (R1_CURRENT_STATE(status) == R1_STATE_PRG));
 	}
 
+	/* if general error occurs, retry the write operation. */
+	if (gen_err) {
+		pr_warn("%s: retrying write for general error\n",
+				req->rq_disk->disk_name);
+		return MMC_BLK_RETRY;
+	}
+
 	if (brq->data.error) {
 		pr_err("%s: error %d transferring data, sector %u, nr %u, cmd response %#x, card status %#x\n",
 		       req->rq_disk->disk_name, brq->data.error,
@@ -2191,10 +2230,10 @@
 		 * is freeing the queue that stops new requests
 		 * from being accepted.
 		 */
+		card = md->queue.card;
 		mmc_cleanup_queue(&md->queue);
 		if (md->flags & MMC_BLK_PACKED_CMD)
 			mmc_packed_clean(&md->queue);
-		card = md->queue.card;
 		if (md->disk->flags & GENHD_FL_UP) {
 			device_remove_file(disk_to_dev(md->disk), &md->force_ro);
 			if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c
index a69df52..0c0fc52 100644
--- a/drivers/mmc/card/mmc_test.c
+++ b/drivers/mmc/card/mmc_test.c
@@ -2849,18 +2849,12 @@
 	struct seq_file *sf = (struct seq_file *)file->private_data;
 	struct mmc_card *card = (struct mmc_card *)sf->private;
 	struct mmc_test_card *test;
-	char lbuf[12];
 	long testcase;
+	int ret;
 
-	if (count >= sizeof(lbuf))
-		return -EINVAL;
-
-	if (copy_from_user(lbuf, buf, count))
-		return -EFAULT;
-	lbuf[count] = '\0';
-
-	if (strict_strtol(lbuf, 10, &testcase))
-		return -EINVAL;
+	ret = kstrtol_from_user(buf, count, 10, &testcase);
+	if (ret)
+		return ret;
 
 	test = kzalloc(sizeof(struct mmc_test_card), GFP_KERNEL);
 	if (!test)
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 5d08855..bf18b6b 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -27,6 +27,7 @@
 #include <linux/fault-inject.h>
 #include <linux/random.h>
 #include <linux/slab.h>
+#include <linux/of.h>
 
 #include <linux/mmc/card.h>
 #include <linux/mmc/host.h>
@@ -1196,6 +1197,49 @@
 }
 EXPORT_SYMBOL(mmc_vddrange_to_ocrmask);
 
+#ifdef CONFIG_OF
+
+/**
+ * mmc_of_parse_voltage - return mask of supported voltages
+ * @np: The device node need to be parsed.
+ * @mask: mask of voltages available for MMC/SD/SDIO
+ *
+ * 1. Return zero on success.
+ * 2. Return negative errno: voltage-range is invalid.
+ */
+int mmc_of_parse_voltage(struct device_node *np, u32 *mask)
+{
+	const u32 *voltage_ranges;
+	int num_ranges, i;
+
+	voltage_ranges = of_get_property(np, "voltage-ranges", &num_ranges);
+	num_ranges = num_ranges / sizeof(*voltage_ranges) / 2;
+	if (!voltage_ranges || !num_ranges) {
+		pr_info("%s: voltage-ranges unspecified\n", np->full_name);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < num_ranges; i++) {
+		const int j = i * 2;
+		u32 ocr_mask;
+
+		ocr_mask = mmc_vddrange_to_ocrmask(
+				be32_to_cpu(voltage_ranges[j]),
+				be32_to_cpu(voltage_ranges[j + 1]));
+		if (!ocr_mask) {
+			pr_err("%s: voltage-range #%d is invalid\n",
+				np->full_name, i);
+			return -EINVAL;
+		}
+		*mask |= ocr_mask;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(mmc_of_parse_voltage);
+
+#endif /* CONFIG_OF */
+
 #ifdef CONFIG_REGULATOR
 
 /**
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 6fb6f77..49bc403 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -374,7 +374,7 @@
 			if (!(flags & OF_GPIO_ACTIVE_LOW))
 				gpio_inv_cd = true;
 
-			ret = mmc_gpio_request_cd(host, gpio);
+			ret = mmc_gpio_request_cd(host, gpio, 0);
 			if (ret < 0) {
 				dev_err(host->parent,
 					"Failed to request CD GPIO #%d: %d!\n",
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 837fc73..ef18348 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -531,6 +531,7 @@
 
 	data.sg = &sg;
 	data.sg_len = 1;
+	mmc_set_data_timeout(&data, card);
 	sg_init_one(&sg, data_buf, len);
 	mmc_wait_for_req(host, &mrq);
 	err = 0;
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 176d125..5e8823d 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -215,7 +215,7 @@
 static int mmc_read_ssr(struct mmc_card *card)
 {
 	unsigned int au, es, et, eo;
-	int err, i;
+	int err, i, max_au;
 	u32 *ssr;
 
 	if (!(card->csd.cmdclass & CCC_APP_SPEC)) {
@@ -239,12 +239,15 @@
 	for (i = 0; i < 16; i++)
 		ssr[i] = be32_to_cpu(ssr[i]);
 
+	/* SD3.0 increases max AU size to 64MB (0xF) from 4MB (0x9) */
+	max_au = card->scr.sda_spec3 ? 0xF : 0x9;
+
 	/*
 	 * UNSTUFF_BITS only works with four u32s so we have to offset the
 	 * bitfield positions accordingly.
 	 */
 	au = UNSTUFF_BITS(ssr, 428 - 384, 4);
-	if (au > 0 && au <= 9) {
+	if (au > 0 && au <= max_au) {
 		card->ssr.au = 1 << (au + 4);
 		es = UNSTUFF_BITS(ssr, 408 - 384, 16);
 		et = UNSTUFF_BITS(ssr, 402 - 384, 6);
@@ -942,13 +945,13 @@
 	if (!mmc_host_is_spi(host)) {
 		err = mmc_send_relative_addr(host, &card->rca);
 		if (err)
-			return err;
+			goto free_card;
 	}
 
 	if (!oldcard) {
 		err = mmc_sd_get_csd(host, card);
 		if (err)
-			return err;
+			goto free_card;
 
 		mmc_decode_cid(card);
 	}
@@ -959,7 +962,7 @@
 	if (!mmc_host_is_spi(host)) {
 		err = mmc_select_card(card);
 		if (err)
-			return err;
+			goto free_card;
 	}
 
 	err = mmc_sd_setup_card(host, card, oldcard != NULL);
diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c
index 3242351..46596b71 100644
--- a/drivers/mmc/core/slot-gpio.c
+++ b/drivers/mmc/core/slot-gpio.c
@@ -135,6 +135,7 @@
  * mmc_gpio_request_cd - request a gpio for card-detection
  * @host: mmc host
  * @gpio: gpio number requested
+ * @debounce: debounce time in microseconds
  *
  * As devm_* managed functions are used in mmc_gpio_request_cd(), client
  * drivers do not need to explicitly call mmc_gpio_free_cd() for freeing up,
@@ -143,9 +144,14 @@
  * switching for card-detection, they are responsible for calling
  * mmc_gpio_request_cd() and mmc_gpio_free_cd() as a pair on their own.
  *
+ * If GPIO debouncing is desired, set the debounce parameter to a non-zero
+ * value. The caller is responsible for ensuring that the GPIO driver associated
+ * with the GPIO supports debouncing, otherwise an error will be returned.
+ *
  * Returns zero on success, else an error.
  */
-int mmc_gpio_request_cd(struct mmc_host *host, unsigned int gpio)
+int mmc_gpio_request_cd(struct mmc_host *host, unsigned int gpio,
+			unsigned int debounce)
 {
 	struct mmc_gpio *ctx;
 	int irq = gpio_to_irq(gpio);
@@ -167,6 +173,12 @@
 		 */
 		return ret;
 
+	if (debounce) {
+		ret = gpio_set_debounce(gpio, debounce);
+		if (ret < 0)
+			return ret;
+	}
+
 	/*
 	 * Even if gpio_to_irq() returns a valid IRQ number, the platform might
 	 * still prefer to poll, e.g., because that IRQ number is already used
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 8a4c066..b7fd5ab 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -284,11 +284,11 @@
 
 config MMC_OMAP_HS
 	tristate "TI OMAP High Speed Multimedia Card Interface support"
-	depends on SOC_OMAP2430 || ARCH_OMAP3 || ARCH_OMAP4
+	depends on ARCH_OMAP2PLUS || COMPILE_TEST
 	help
 	  This selects the TI OMAP High Speed Multimedia card Interface.
-	  If you have an OMAP2430 or OMAP3 board or OMAP4 board with a
-	  Multimedia Card slot, say Y or M here.
+	  If you have an omap2plus board with a Multimedia Card slot,
+	  say Y or M here.
 
 	  If unsure, say N.
 
@@ -530,7 +530,7 @@
 
 config MMC_DW
 	tristate "Synopsys DesignWare Memory Card Interface"
-	depends on ARM
+	depends on ARC || ARM
 	help
 	  This selects support for the Synopsys DesignWare Mobile Storage IP
 	  block, this provides host support for SD and MMC interfaces, in both
@@ -569,7 +569,7 @@
 
 config MMC_DW_SOCFPGA
 	tristate "SOCFPGA specific extensions for Synopsys DW Memory Card Interface"
-	depends on MMC_DW
+	depends on MMC_DW && MFD_SYSCON
 	select MMC_DW_PLTFM
 	help
 	  This selects support for Altera SoCFPGA specific extensions to the
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index d422e21..c41d0c3 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -52,8 +52,6 @@
 
 obj-$(CONFIG_MMC_REALTEK_PCI)	+= rtsx_pci_sdmmc.o
 
-obj-$(CONFIG_MMC_REALTEK_PCI)	+= rtsx_pci_sdmmc.o
-
 obj-$(CONFIG_MMC_SDHCI_PLTFM)		+= sdhci-pltfm.o
 obj-$(CONFIG_MMC_SDHCI_CNS3XXX)		+= sdhci-cns3xxx.o
 obj-$(CONFIG_MMC_SDHCI_ESDHC_IMX)	+= sdhci-esdhc-imx.o
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index bdb84da..69e438e 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -378,6 +378,8 @@
 {
 	struct atmel_mci	*host = s->private;
 	u32			*buf;
+	int			ret = 0;
+
 
 	buf = kmalloc(ATMCI_REGS_SIZE, GFP_KERNEL);
 	if (!buf)
@@ -388,12 +390,16 @@
 	 * not disabling interrupts, so IMR and SR may not be
 	 * consistent.
 	 */
+	ret = clk_prepare_enable(host->mck);
+	if (ret)
+		goto out;
+
 	spin_lock_bh(&host->lock);
-	clk_enable(host->mck);
 	memcpy_fromio(buf, host->regs, ATMCI_REGS_SIZE);
-	clk_disable(host->mck);
 	spin_unlock_bh(&host->lock);
 
+	clk_disable_unprepare(host->mck);
+
 	seq_printf(s, "MR:\t0x%08x%s%s ",
 			buf[ATMCI_MR / 4],
 			buf[ATMCI_MR / 4] & ATMCI_MR_RDPROOF ? " RDPROOF" : "",
@@ -442,9 +448,10 @@
 				val & ATMCI_CFG_LSYNC ? " LSYNC" : "");
 	}
 
+out:
 	kfree(buf);
 
-	return 0;
+	return ret;
 }
 
 static int atmci_regs_open(struct inode *inode, struct file *file)
@@ -1262,6 +1269,7 @@
 	struct atmel_mci_slot	*slot = mmc_priv(mmc);
 	struct atmel_mci	*host = slot->host;
 	unsigned int		i;
+	bool			unprepare_clk;
 
 	slot->sdc_reg &= ~ATMCI_SDCBUS_MASK;
 	switch (ios->bus_width) {
@@ -1277,9 +1285,13 @@
 		unsigned int clock_min = ~0U;
 		u32 clkdiv;
 
+		clk_prepare(host->mck);
+		unprepare_clk = true;
+
 		spin_lock_bh(&host->lock);
 		if (!host->mode_reg) {
 			clk_enable(host->mck);
+			unprepare_clk = false;
 			atmci_writel(host, ATMCI_CR, ATMCI_CR_SWRST);
 			atmci_writel(host, ATMCI_CR, ATMCI_CR_MCIEN);
 			if (host->caps.has_cfg_reg)
@@ -1347,6 +1359,8 @@
 	} else {
 		bool any_slot_active = false;
 
+		unprepare_clk = false;
+
 		spin_lock_bh(&host->lock);
 		slot->clock = 0;
 		for (i = 0; i < ATMCI_MAX_NR_SLOTS; i++) {
@@ -1360,12 +1374,16 @@
 			if (host->mode_reg) {
 				atmci_readl(host, ATMCI_MR);
 				clk_disable(host->mck);
+				unprepare_clk = true;
 			}
 			host->mode_reg = 0;
 		}
 		spin_unlock_bh(&host->lock);
 	}
 
+	if (unprepare_clk)
+		clk_unprepare(host->mck);
+
 	switch (ios->power_mode) {
 	case MMC_POWER_UP:
 		set_bit(ATMCI_CARD_NEED_INIT, &slot->flags);
@@ -2376,10 +2394,12 @@
 	if (!host->regs)
 		goto err_ioremap;
 
-	clk_enable(host->mck);
+	ret = clk_prepare_enable(host->mck);
+	if (ret)
+		goto err_request_irq;
 	atmci_writel(host, ATMCI_CR, ATMCI_CR_SWRST);
 	host->bus_hz = clk_get_rate(host->mck);
-	clk_disable(host->mck);
+	clk_disable_unprepare(host->mck);
 
 	host->mapbase = regs->start;
 
@@ -2482,11 +2502,11 @@
 			atmci_cleanup_slot(host->slot[i], i);
 	}
 
-	clk_enable(host->mck);
+	clk_prepare_enable(host->mck);
 	atmci_writel(host, ATMCI_IDR, ~0UL);
 	atmci_writel(host, ATMCI_CR, ATMCI_CR_MCIDIS);
 	atmci_readl(host, ATMCI_SR);
-	clk_disable(host->mck);
+	clk_disable_unprepare(host->mck);
 
 	if (host->dma.chan)
 		dma_release_channel(host->dma.chan);
diff --git a/drivers/mmc/host/dw_mmc-exynos.c b/drivers/mmc/host/dw_mmc-exynos.c
index 866edef..6a1fa21 100644
--- a/drivers/mmc/host/dw_mmc-exynos.c
+++ b/drivers/mmc/host/dw_mmc-exynos.c
@@ -39,6 +39,7 @@
 	DW_MCI_TYPE_EXYNOS4210,
 	DW_MCI_TYPE_EXYNOS4412,
 	DW_MCI_TYPE_EXYNOS5250,
+	DW_MCI_TYPE_EXYNOS5420,
 };
 
 /* Exynos implementation specific driver private data */
@@ -62,6 +63,9 @@
 	}, {
 		.compatible	= "samsung,exynos5250-dw-mshc",
 		.ctrl_type	= DW_MCI_TYPE_EXYNOS5250,
+	}, {
+		.compatible	= "samsung,exynos5420-dw-mshc",
+		.ctrl_type	= DW_MCI_TYPE_EXYNOS5420,
 	},
 };
 
@@ -90,7 +94,8 @@
 {
 	struct dw_mci_exynos_priv_data *priv = host->priv;
 
-	if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS5250)
+	if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS5250 ||
+		priv->ctrl_type == DW_MCI_TYPE_EXYNOS5420)
 		host->bus_hz /= (priv->ciu_div + 1);
 	else if (priv->ctrl_type == DW_MCI_TYPE_EXYNOS4412)
 		host->bus_hz /= EXYNOS4412_FIXED_CIU_CLK_DIV;
@@ -173,6 +178,8 @@
 			.data = &exynos_drv_data, },
 	{ .compatible = "samsung,exynos5250-dw-mshc",
 			.data = &exynos_drv_data, },
+	{ .compatible = "samsung,exynos5420-dw-mshc",
+			.data = &exynos_drv_data, },
 	{},
 };
 MODULE_DEVICE_TABLE(of, dw_mci_exynos_match);
diff --git a/drivers/mmc/host/dw_mmc-pci.c b/drivers/mmc/host/dw_mmc-pci.c
index b456b0c..f70546a 100644
--- a/drivers/mmc/host/dw_mmc-pci.c
+++ b/drivers/mmc/host/dw_mmc-pci.c
@@ -59,7 +59,9 @@
 	if (ret)
 		return ret;
 
-	host->regs = pcim_iomap_table(pdev)[0];
+	host->regs = pcim_iomap_table(pdev)[PCI_BAR_NO];
+
+	pci_set_master(pdev);
 
 	ret = dw_mci_probe(host);
 	if (ret)
diff --git a/drivers/mmc/host/dw_mmc-pltfm.c b/drivers/mmc/host/dw_mmc-pltfm.c
index ee52556..2089752 100644
--- a/drivers/mmc/host/dw_mmc-pltfm.c
+++ b/drivers/mmc/host/dw_mmc-pltfm.c
@@ -23,6 +23,7 @@
 #include <linux/of.h>
 
 #include "dw_mmc.h"
+#include "dw_mmc-pltfm.h"
 
 static void dw_mci_rockchip_prepare_command(struct dw_mci *host, u32 *cmdr)
 {
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 5424073..018f365 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -1601,18 +1601,17 @@
 
 	pending = mci_readl(host, MINTSTS); /* read-only mask reg */
 
+	/*
+	 * DTO fix - version 2.10a and below, and only if internal DMA
+	 * is configured.
+	 */
+	if (host->quirks & DW_MCI_QUIRK_IDMAC_DTO) {
+		if (!pending &&
+		    ((mci_readl(host, STATUS) >> 17) & 0x1fff))
+			pending |= SDMMC_INT_DATA_OVER;
+	}
+
 	if (pending) {
-
-		/*
-		 * DTO fix - version 2.10a and below, and only if internal DMA
-		 * is configured.
-		 */
-		if (host->quirks & DW_MCI_QUIRK_IDMAC_DTO) {
-			if (!pending &&
-			    ((mci_readl(host, STATUS) >> 17) & 0x1fff))
-				pending |= SDMMC_INT_DATA_OVER;
-		}
-
 		if (pending & DW_MCI_CMD_ERROR_FLAGS) {
 			mci_writel(host, RINTSTS, DW_MCI_CMD_ERROR_FLAGS);
 			host->cmd_status = pending;
diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c
index 0308c9f..6651633 100644
--- a/drivers/mmc/host/jz4740_mmc.c
+++ b/drivers/mmc/host/jz4740_mmc.c
@@ -713,7 +713,7 @@
 		mmc->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH;
 
 	if (gpio_is_valid(pdata->gpio_card_detect)) {
-		ret = mmc_gpio_request_cd(mmc, pdata->gpio_card_detect);
+		ret = mmc_gpio_request_cd(mmc, pdata->gpio_card_detect, 0);
 		if (ret)
 			return ret;
 	}
@@ -783,9 +783,8 @@
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	host->base = devm_ioremap_resource(&pdev->dev, res);
-	if (!host->base) {
-		ret = -EBUSY;
-		dev_err(&pdev->dev, "Failed to ioremap base memory\n");
+	if (IS_ERR(host->base)) {
+		ret = PTR_ERR(host->base);
 		goto err_free_host;
 	}
 
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 74145d1..0a87e56 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -36,6 +36,7 @@
 
 #include <linux/mmc/host.h>
 #include <linux/mmc/mmc.h>		/* for R1_SPI_* bit values */
+#include <linux/mmc/slot-gpio.h>
 
 #include <linux/spi/spi.h>
 #include <linux/spi/mmc_spi.h>
@@ -1272,33 +1273,11 @@
 	}
 }
 
-static int mmc_spi_get_ro(struct mmc_host *mmc)
-{
-	struct mmc_spi_host *host = mmc_priv(mmc);
-
-	if (host->pdata && host->pdata->get_ro)
-		return !!host->pdata->get_ro(mmc->parent);
-	/*
-	 * Board doesn't support read only detection; let the mmc core
-	 * decide what to do.
-	 */
-	return -ENOSYS;
-}
-
-static int mmc_spi_get_cd(struct mmc_host *mmc)
-{
-	struct mmc_spi_host *host = mmc_priv(mmc);
-
-	if (host->pdata && host->pdata->get_cd)
-		return !!host->pdata->get_cd(mmc->parent);
-	return -ENOSYS;
-}
-
 static const struct mmc_host_ops mmc_spi_ops = {
 	.request	= mmc_spi_request,
 	.set_ios	= mmc_spi_set_ios,
-	.get_ro		= mmc_spi_get_ro,
-	.get_cd		= mmc_spi_get_cd,
+	.get_ro		= mmc_gpio_get_ro,
+	.get_cd		= mmc_gpio_get_cd,
 };
 
 
@@ -1324,6 +1303,7 @@
 	struct mmc_host		*mmc;
 	struct mmc_spi_host	*host;
 	int			status;
+	bool			has_ro = false;
 
 	/* We rely on full duplex transfers, mostly to reduce
 	 * per-transfer overheads (by making fewer transfers).
@@ -1448,18 +1428,33 @@
 	}
 
 	/* pass platform capabilities, if any */
-	if (host->pdata)
+	if (host->pdata) {
 		mmc->caps |= host->pdata->caps;
+		mmc->caps2 |= host->pdata->caps2;
+	}
 
 	status = mmc_add_host(mmc);
 	if (status != 0)
 		goto fail_add_host;
 
+	if (host->pdata && host->pdata->flags & MMC_SPI_USE_CD_GPIO) {
+		status = mmc_gpio_request_cd(mmc, host->pdata->cd_gpio,
+					     host->pdata->cd_debounce);
+		if (status != 0)
+			goto fail_add_host;
+	}
+
+	if (host->pdata && host->pdata->flags & MMC_SPI_USE_RO_GPIO) {
+		has_ro = true;
+		status = mmc_gpio_request_ro(mmc, host->pdata->ro_gpio);
+		if (status != 0)
+			goto fail_add_host;
+	}
+
 	dev_info(&spi->dev, "SD/MMC host %s%s%s%s%s\n",
 			dev_name(&mmc->class_dev),
 			host->dma_dev ? "" : ", no DMA",
-			(host->pdata && host->pdata->get_ro)
-				? "" : ", no WP",
+			has_ro ? "" : ", no WP",
 			(host->pdata && host->pdata->setpower)
 				? "" : ", no poweroff",
 			(mmc->caps & MMC_CAP_NEEDS_POLL)
diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c
index 4ddd83f..06c5b0b 100644
--- a/drivers/mmc/host/mvsdio.c
+++ b/drivers/mmc/host/mvsdio.c
@@ -757,7 +757,8 @@
 		if (mvsd_data->gpio_card_detect &&
 		    gpio_is_valid(mvsd_data->gpio_card_detect)) {
 			ret = mmc_gpio_request_cd(mmc,
-						  mvsd_data->gpio_card_detect);
+						  mvsd_data->gpio_card_detect,
+						  0);
 			if (ret)
 				goto out;
 		} else {
diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c
index f38d75f..e1fa3ef 100644
--- a/drivers/mmc/host/mxs-mmc.c
+++ b/drivers/mmc/host/mxs-mmc.c
@@ -102,12 +102,15 @@
 		  BM_SSP_STATUS_CARD_DETECT) ^ host->cd_inverted;
 }
 
-static void mxs_mmc_reset(struct mxs_mmc_host *host)
+static int mxs_mmc_reset(struct mxs_mmc_host *host)
 {
 	struct mxs_ssp *ssp = &host->ssp;
 	u32 ctrl0, ctrl1;
+	int ret;
 
-	stmp_reset_block(ssp->base);
+	ret = stmp_reset_block(ssp->base);
+	if (ret)
+		return ret;
 
 	ctrl0 = BM_SSP_CTRL0_IGNORE_CRC;
 	ctrl1 = BF_SSP(0x3, CTRL1_SSP_MODE) |
@@ -132,6 +135,7 @@
 
 	writel(ctrl0, ssp->base + HW_SSP_CTRL0);
 	writel(ctrl1, ssp->base + HW_SSP_CTRL1(ssp));
+	return 0;
 }
 
 static void mxs_mmc_start_cmd(struct mxs_mmc_host *host,
@@ -618,21 +622,25 @@
 		}
 	}
 
-	ssp->clk = clk_get(&pdev->dev, NULL);
+	ssp->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(ssp->clk)) {
 		ret = PTR_ERR(ssp->clk);
 		goto out_mmc_free;
 	}
 	clk_prepare_enable(ssp->clk);
 
-	mxs_mmc_reset(host);
+	ret = mxs_mmc_reset(host);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to reset mmc: %d\n", ret);
+		goto out_clk_disable;
+	}
 
 	ssp->dmach = dma_request_slave_channel(&pdev->dev, "rx-tx");
 	if (!ssp->dmach) {
 		dev_err(mmc_dev(host->mmc),
 			"%s: failed to request dma\n", __func__);
 		ret = -ENODEV;
-		goto out_clk_put;
+		goto out_clk_disable;
 	}
 
 	/* set mmc core parameters */
@@ -685,9 +693,8 @@
 out_free_dma:
 	if (ssp->dmach)
 		dma_release_channel(ssp->dmach);
-out_clk_put:
+out_clk_disable:
 	clk_disable_unprepare(ssp->clk);
-	clk_put(ssp->clk);
 out_mmc_free:
 	mmc_free_host(mmc);
 	return ret;
@@ -705,7 +712,6 @@
 		dma_release_channel(ssp->dmach);
 
 	clk_disable_unprepare(ssp->clk);
-	clk_put(ssp->clk);
 
 	mmc_free_host(mmc);
 
diff --git a/drivers/mmc/host/of_mmc_spi.c b/drivers/mmc/host/of_mmc_spi.c
index d720b5e..6e218fb 100644
--- a/drivers/mmc/host/of_mmc_spi.c
+++ b/drivers/mmc/host/of_mmc_spi.c
@@ -50,25 +50,6 @@
 	return container_of(dev->platform_data, struct of_mmc_spi, pdata);
 }
 
-static int of_mmc_spi_read_gpio(struct device *dev, int gpio_num)
-{
-	struct of_mmc_spi *oms = to_of_mmc_spi(dev);
-	bool active_low = oms->alow_gpios[gpio_num];
-	bool value = gpio_get_value(oms->gpios[gpio_num]);
-
-	return active_low ^ value;
-}
-
-static int of_mmc_spi_get_cd(struct device *dev)
-{
-	return of_mmc_spi_read_gpio(dev, CD_GPIO);
-}
-
-static int of_mmc_spi_get_ro(struct device *dev)
-{
-	return of_mmc_spi_read_gpio(dev, WP_GPIO);
-}
-
 static int of_mmc_spi_init(struct device *dev,
 			   irqreturn_t (*irqhandler)(int, void *), void *mmc)
 {
@@ -130,20 +111,22 @@
 		if (!gpio_is_valid(oms->gpios[i]))
 			continue;
 
-		ret = gpio_request(oms->gpios[i], dev_name(dev));
-		if (ret < 0) {
-			oms->gpios[i] = -EINVAL;
-			continue;
-		}
-
 		if (gpio_flags & OF_GPIO_ACTIVE_LOW)
 			oms->alow_gpios[i] = true;
 	}
 
-	if (gpio_is_valid(oms->gpios[CD_GPIO]))
-		oms->pdata.get_cd = of_mmc_spi_get_cd;
-	if (gpio_is_valid(oms->gpios[WP_GPIO]))
-		oms->pdata.get_ro = of_mmc_spi_get_ro;
+	if (gpio_is_valid(oms->gpios[CD_GPIO])) {
+		oms->pdata.cd_gpio = oms->gpios[CD_GPIO];
+		oms->pdata.flags |= MMC_SPI_USE_CD_GPIO;
+		if (!oms->alow_gpios[CD_GPIO])
+			oms->pdata.caps2 |= MMC_CAP2_CD_ACTIVE_HIGH;
+	}
+	if (gpio_is_valid(oms->gpios[WP_GPIO])) {
+		oms->pdata.ro_gpio = oms->gpios[WP_GPIO];
+		oms->pdata.flags |= MMC_SPI_USE_RO_GPIO;
+		if (!oms->alow_gpios[WP_GPIO])
+			oms->pdata.caps2 |= MMC_CAP2_RO_ACTIVE_HIGH;
+	}
 
 	oms->detect_irq = irq_of_parse_and_map(np, 0);
 	if (oms->detect_irq != 0) {
@@ -166,15 +149,10 @@
 	struct device *dev = &spi->dev;
 	struct device_node *np = dev->of_node;
 	struct of_mmc_spi *oms = to_of_mmc_spi(dev);
-	int i;
 
 	if (!dev->platform_data || !np)
 		return;
 
-	for (i = 0; i < ARRAY_SIZE(oms->gpios); i++) {
-		if (gpio_is_valid(oms->gpios[i]))
-			gpio_free(oms->gpios[i]);
-	}
 	kfree(oms);
 	dev->platform_data = NULL;
 }
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 1865321..6ac63df 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -21,6 +21,7 @@
 #include <linux/debugfs.h>
 #include <linux/dmaengine.h>
 #include <linux/seq_file.h>
+#include <linux/sizes.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
@@ -1041,6 +1042,7 @@
 		}
 	}
 
+	OMAP_HSMMC_WRITE(host->base, STAT, status);
 	if (end_cmd || ((status & CC_EN) && host->cmd))
 		omap_hsmmc_cmd_done(host, host->cmd);
 	if ((end_trans || (status & TC_EN)) && host->mrq)
@@ -1060,7 +1062,6 @@
 		omap_hsmmc_do_irq(host, status);
 
 		/* Flush posted write */
-		OMAP_HSMMC_WRITE(host->base, STAT, status);
 		status = OMAP_HSMMC_READ(host->base, STAT);
 	}
 
diff --git a/drivers/mmc/host/sdhci-bcm2835.c b/drivers/mmc/host/sdhci-bcm2835.c
index 0584a1c..36fa2df 100644
--- a/drivers/mmc/host/sdhci-bcm2835.c
+++ b/drivers/mmc/host/sdhci-bcm2835.c
@@ -119,7 +119,7 @@
 	return byte;
 }
 
-unsigned int bcm2835_sdhci_get_min_clock(struct sdhci_host *host)
+static unsigned int bcm2835_sdhci_get_min_clock(struct sdhci_host *host)
 {
 	return MIN_FREQ;
 }
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 1dd5ba8..abc8cf0 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -616,7 +616,7 @@
 	/* card_detect */
 	switch (boarddata->cd_type) {
 	case ESDHC_CD_GPIO:
-		err = mmc_gpio_request_cd(host->mmc, boarddata->cd_gpio);
+		err = mmc_gpio_request_cd(host->mmc, boarddata->cd_gpio, 0);
 		if (err) {
 			dev_err(mmc_dev(host->mmc),
 				"failed to request card-detect gpio!\n");
diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c
index 15039e2..e328252 100644
--- a/drivers/mmc/host/sdhci-of-esdhc.c
+++ b/drivers/mmc/host/sdhci-of-esdhc.c
@@ -316,6 +316,7 @@
 
 	/* call to generic mmc_of_parse to support additional capabilities */
 	mmc_of_parse(host->mmc);
+	mmc_of_parse_voltage(np, &host->ocr_mask);
 
 	ret = sdhci_add_host(host);
 	if (ret)
diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c
index bf99359..793dacd 100644
--- a/drivers/mmc/host/sdhci-pxav3.c
+++ b/drivers/mmc/host/sdhci-pxav3.c
@@ -278,7 +278,8 @@
 			host->mmc->pm_caps |= pdata->pm_caps;
 
 		if (gpio_is_valid(pdata->ext_cd_gpio)) {
-			ret = mmc_gpio_request_cd(host->mmc, pdata->ext_cd_gpio);
+			ret = mmc_gpio_request_cd(host->mmc, pdata->ext_cd_gpio,
+						  0);
 			if (ret) {
 				dev_err(mmc_dev(host->mmc),
 					"failed to allocate card detect gpio\n");
diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c
index 926aaf6..6debda95 100644
--- a/drivers/mmc/host/sdhci-s3c.c
+++ b/drivers/mmc/host/sdhci-s3c.c
@@ -296,9 +296,12 @@
 	unsigned long timeout;
 	u16 clk = 0;
 
-	/* don't bother if the clock is going off */
-	if (clock == 0)
+	/* If the clock is going off, set to 0 at clock control register */
+	if (clock == 0) {
+		sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL);
+		host->clock = clock;
 		return;
+	}
 
 	sdhci_s3c_set_clock(host, clock);
 
@@ -608,6 +611,7 @@
 	host->hw_name = "samsung-hsmmc";
 	host->ops = &sdhci_s3c_ops;
 	host->quirks = 0;
+	host->quirks2 = 0;
 	host->irq = irq;
 
 	/* Setup quirks for the controller */
diff --git a/drivers/mmc/host/sdhci-sirf.c b/drivers/mmc/host/sdhci-sirf.c
index 62a4a83..696122c 100644
--- a/drivers/mmc/host/sdhci-sirf.c
+++ b/drivers/mmc/host/sdhci-sirf.c
@@ -84,7 +84,7 @@
 	 * gets setup in sdhci_add_host() and we oops.
 	 */
 	if (gpio_is_valid(priv->gpio_cd)) {
-		ret = mmc_gpio_request_cd(host->mmc, priv->gpio_cd);
+		ret = mmc_gpio_request_cd(host->mmc, priv->gpio_cd, 0);
 		if (ret) {
 			dev_err(&pdev->dev, "card detect irq request failed: %d\n",
 				ret);
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index dd2c083..7a7fb4f 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -3119,6 +3119,9 @@
 				   SDHCI_MAX_CURRENT_MULTIPLIER;
 	}
 
+	if (host->ocr_mask)
+		ocr_avail = host->ocr_mask;
+
 	mmc->ocr_avail = ocr_avail;
 	mmc->ocr_avail_sdio = ocr_avail;
 	if (host->ocr_avail_sdio)
@@ -3213,6 +3216,8 @@
 		host->tuning_timer.function = sdhci_tuning_timer;
 	}
 
+	sdhci_init(host, 0);
+
 	ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED,
 		mmc_hostname(mmc), host);
 	if (ret) {
@@ -3221,8 +3226,6 @@
 		goto untasklet;
 	}
 
-	sdhci_init(host, 0);
-
 #ifdef CONFIG_MMC_DEBUG
 	sdhci_dumpregs(host);
 #endif
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 6706b5e..36629a0 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -61,6 +61,7 @@
 #include <linux/platform_device.h>
 #include <linux/pm_qos.h>
 #include <linux/pm_runtime.h>
+#include <linux/sh_dma.h>
 #include <linux/spinlock.h>
 #include <linux/module.h>
 
@@ -133,6 +134,8 @@
 				 INT_BUFWEN | INT_CMD12DRE | INT_BUFRE | \
 				 INT_DTRANE | INT_CMD12RBE | INT_CMD12CRE)
 
+#define INT_CCS			(INT_CCSTO | INT_CCSRCV | INT_CCSDE)
+
 /* CE_INT_MASK */
 #define MASK_ALL		0x00000000
 #define MASK_MCCSDE		(1 << 29)
@@ -161,7 +164,7 @@
 
 #define MASK_START_CMD		(MASK_MCMDVIO | MASK_MBUFVIO | MASK_MWDATERR | \
 				 MASK_MRDATERR | MASK_MRIDXERR | MASK_MRSPERR | \
-				 MASK_MCCSTO | MASK_MCRCSTO | MASK_MWDATTO | \
+				 MASK_MCRCSTO | MASK_MWDATTO | \
 				 MASK_MRDATTO | MASK_MRBSYTO | MASK_MRSPTO)
 
 #define MASK_CLEAN		(INT_ERR_STS | MASK_MRBSYE | MASK_MCRSPE |	\
@@ -243,6 +246,8 @@
 	int sg_blkidx;
 	bool power;
 	bool card_present;
+	bool ccs_enable;		/* Command Completion Signal support */
+	bool clk_ctrl2_enable;
 	struct mutex thread_lock;
 
 	/* DMA support */
@@ -386,25 +391,29 @@
 
 	host->dma_active = false;
 
-	if (!pdata)
+	if (pdata) {
+		if (pdata->slave_id_tx <= 0 || pdata->slave_id_rx <= 0)
+			return;
+	} else if (!host->pd->dev.of_node) {
 		return;
-
-	if (pdata->slave_id_tx <= 0 || pdata->slave_id_rx <= 0)
-		return;
+	}
 
 	/* We can only either use DMA for both Tx and Rx or not use it at all */
 	dma_cap_zero(mask);
 	dma_cap_set(DMA_SLAVE, mask);
 
-	host->chan_tx = dma_request_channel(mask, shdma_chan_filter,
-					    (void *)pdata->slave_id_tx);
+	host->chan_tx = dma_request_slave_channel_compat(mask, shdma_chan_filter,
+				pdata ? (void *)pdata->slave_id_tx : NULL,
+				&host->pd->dev, "tx");
 	dev_dbg(&host->pd->dev, "%s: TX: got channel %p\n", __func__,
 		host->chan_tx);
 
 	if (!host->chan_tx)
 		return;
 
-	cfg.slave_id = pdata->slave_id_tx;
+	/* In the OF case the driver will get the slave ID from the DT */
+	if (pdata)
+		cfg.slave_id = pdata->slave_id_tx;
 	cfg.direction = DMA_MEM_TO_DEV;
 	cfg.dst_addr = res->start + MMCIF_CE_DATA;
 	cfg.src_addr = 0;
@@ -412,15 +421,17 @@
 	if (ret < 0)
 		goto ecfgtx;
 
-	host->chan_rx = dma_request_channel(mask, shdma_chan_filter,
-					    (void *)pdata->slave_id_rx);
+	host->chan_rx = dma_request_slave_channel_compat(mask, shdma_chan_filter,
+				pdata ? (void *)pdata->slave_id_rx : NULL,
+				&host->pd->dev, "rx");
 	dev_dbg(&host->pd->dev, "%s: RX: got channel %p\n", __func__,
 		host->chan_rx);
 
 	if (!host->chan_rx)
 		goto erqrx;
 
-	cfg.slave_id = pdata->slave_id_rx;
+	if (pdata)
+		cfg.slave_id = pdata->slave_id_rx;
 	cfg.direction = DMA_DEV_TO_MEM;
 	cfg.dst_addr = 0;
 	cfg.src_addr = res->start + MMCIF_CE_DATA;
@@ -485,8 +496,12 @@
 
 	sh_mmcif_writel(host->addr, MMCIF_CE_VERSION, SOFT_RST_ON);
 	sh_mmcif_writel(host->addr, MMCIF_CE_VERSION, SOFT_RST_OFF);
+	if (host->ccs_enable)
+		tmp |= SCCSTO_29;
+	if (host->clk_ctrl2_enable)
+		sh_mmcif_writel(host->addr, MMCIF_CE_CLK_CTRL2, 0x0F0F0000);
 	sh_mmcif_bitset(host, MMCIF_CE_CLK_CTRL, tmp |
-		SRSPTO_256 | SRBSYTO_29 | SRWDTO_29 | SCCSTO_29);
+		SRSPTO_256 | SRBSYTO_29 | SRWDTO_29);
 	/* byte swap on */
 	sh_mmcif_bitset(host, MMCIF_CE_BUF_ACC, BUF_ACC_ATYP);
 }
@@ -866,6 +881,9 @@
 		break;
 	}
 
+	if (host->ccs_enable)
+		mask |= MASK_MCCSTO;
+
 	if (mrq->data) {
 		sh_mmcif_writel(host->addr, MMCIF_CE_BLOCK_SET, 0);
 		sh_mmcif_writel(host->addr, MMCIF_CE_BLOCK_SET,
@@ -873,7 +891,10 @@
 	}
 	opc = sh_mmcif_set_cmd(host, mrq);
 
-	sh_mmcif_writel(host->addr, MMCIF_CE_INT, 0xD80430C0);
+	if (host->ccs_enable)
+		sh_mmcif_writel(host->addr, MMCIF_CE_INT, 0xD80430C0);
+	else
+		sh_mmcif_writel(host->addr, MMCIF_CE_INT, 0xD80430C0 | INT_CCS);
 	sh_mmcif_writel(host->addr, MMCIF_CE_INT_MASK, mask);
 	/* set arg */
 	sh_mmcif_writel(host->addr, MMCIF_CE_ARG, cmd->arg);
@@ -956,11 +977,8 @@
 
 static void sh_mmcif_set_power(struct sh_mmcif_host *host, struct mmc_ios *ios)
 {
-	struct sh_mmcif_plat_data *pd = host->pd->dev.platform_data;
 	struct mmc_host *mmc = host->mmc;
 
-	if (pd && pd->set_pwr)
-		pd->set_pwr(host->pd, ios->power_mode != MMC_POWER_OFF);
 	if (!IS_ERR(mmc->supply.vmmc))
 		/* Errors ignored... */
 		mmc_regulator_set_ocr(mmc, mmc->supply.vmmc,
@@ -1241,11 +1259,14 @@
 static irqreturn_t sh_mmcif_intr(int irq, void *dev_id)
 {
 	struct sh_mmcif_host *host = dev_id;
-	u32 state;
+	u32 state, mask;
 
 	state = sh_mmcif_readl(host->addr, MMCIF_CE_INT);
-	sh_mmcif_writel(host->addr, MMCIF_CE_INT,
-			~(state & sh_mmcif_readl(host->addr, MMCIF_CE_INT_MASK)));
+	mask = sh_mmcif_readl(host->addr, MMCIF_CE_INT_MASK);
+	if (host->ccs_enable)
+		sh_mmcif_writel(host->addr, MMCIF_CE_INT, ~(state & mask));
+	else
+		sh_mmcif_writel(host->addr, MMCIF_CE_INT, INT_CCS | ~(state & mask));
 	sh_mmcif_bitclr(host, MMCIF_CE_INT_MASK, state & MASK_CLEAN);
 
 	if (state & ~MASK_CLEAN)
@@ -1379,6 +1400,8 @@
 	host->mmc	= mmc;
 	host->addr	= reg;
 	host->timeout	= msecs_to_jiffies(1000);
+	host->ccs_enable = !pd || !pd->ccs_unsupported;
+	host->clk_ctrl2_enable = pd && pd->clk_ctrl2_present;
 
 	host->pd = pdev;
 
@@ -1436,7 +1459,7 @@
 	}
 
 	if (pd && pd->use_cd_gpio) {
-		ret = mmc_gpio_request_cd(mmc, pd->cd_gpio);
+		ret = mmc_gpio_request_cd(mmc, pd->cd_gpio, 0);
 		if (ret < 0)
 			goto erqcd;
 	}
diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c
index ebea749..87ed3fb 100644
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c
@@ -70,20 +70,6 @@
 	clk_disable(priv->clk);
 }
 
-static void sh_mobile_sdhi_set_pwr(struct platform_device *pdev, int state)
-{
-	struct sh_mobile_sdhi_info *p = pdev->dev.platform_data;
-
-	p->set_pwr(pdev, state);
-}
-
-static int sh_mobile_sdhi_get_cd(struct platform_device *pdev)
-{
-	struct sh_mobile_sdhi_info *p = pdev->dev.platform_data;
-
-	return p->get_cd(pdev);
-}
-
 static int sh_mobile_sdhi_wait_idle(struct tmio_mmc_host *host)
 {
 	int timeout = 1000;
@@ -129,7 +115,12 @@
 static const struct of_device_id sh_mobile_sdhi_of_match[] = {
 	{ .compatible = "renesas,shmobile-sdhi" },
 	{ .compatible = "renesas,sh7372-sdhi" },
+	{ .compatible = "renesas,sh73a0-sdhi", .data = &sh_mobile_sdhi_of_cfg[0], },
+	{ .compatible = "renesas,r8a73a4-sdhi", .data = &sh_mobile_sdhi_of_cfg[0], },
 	{ .compatible = "renesas,r8a7740-sdhi", .data = &sh_mobile_sdhi_of_cfg[0], },
+	{ .compatible = "renesas,r8a7778-sdhi", .data = &sh_mobile_sdhi_of_cfg[0], },
+	{ .compatible = "renesas,r8a7779-sdhi", .data = &sh_mobile_sdhi_of_cfg[0], },
+	{ .compatible = "renesas,r8a7790-sdhi", .data = &sh_mobile_sdhi_of_cfg[0], },
 	{},
 };
 MODULE_DEVICE_TABLE(of, sh_mobile_sdhi_of_match);
@@ -180,10 +171,6 @@
 		mmc_data->capabilities |= p->tmio_caps;
 		mmc_data->capabilities2 |= p->tmio_caps2;
 		mmc_data->cd_gpio = p->cd_gpio;
-		if (p->set_pwr)
-			mmc_data->set_pwr = sh_mobile_sdhi_set_pwr;
-		if (p->get_cd)
-			mmc_data->get_cd = sh_mobile_sdhi_get_cd;
 
 		if (p->dma_slave_tx > 0 && p->dma_slave_rx > 0) {
 			/*
diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c
index 47bdb8f..65edb4a 100644
--- a/drivers/mmc/host/tmio_mmc_dma.c
+++ b/drivers/mmc/host/tmio_mmc_dma.c
@@ -104,6 +104,7 @@
 pio:
 	if (!desc) {
 		/* DMA failed, fall back to PIO */
+		tmio_mmc_enable_dma(host, false);
 		if (ret >= 0)
 			ret = -EIO;
 		host->chan_rx = NULL;
@@ -116,7 +117,6 @@
 		}
 		dev_warn(&host->pdev->dev,
 			 "DMA failed: %d, falling back to PIO\n", ret);
-		tmio_mmc_enable_dma(host, false);
 	}
 
 	dev_dbg(&host->pdev->dev, "%s(): desc %p, cookie %d, sg[%d]\n", __func__,
@@ -185,6 +185,7 @@
 pio:
 	if (!desc) {
 		/* DMA failed, fall back to PIO */
+		tmio_mmc_enable_dma(host, false);
 		if (ret >= 0)
 			ret = -EIO;
 		host->chan_tx = NULL;
@@ -197,7 +198,6 @@
 		}
 		dev_warn(&host->pdev->dev,
 			 "DMA failed: %d, falling back to PIO\n", ret);
-		tmio_mmc_enable_dma(host, false);
 	}
 
 	dev_dbg(&host->pdev->dev, "%s(): desc %p, cookie %d\n", __func__,
diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index b72edb7..b380225 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -795,9 +795,13 @@
 	 * omap_hsmmc.c driver does.
 	 */
 	if (!IS_ERR(mmc->supply.vqmmc) && !ret) {
-		regulator_enable(mmc->supply.vqmmc);
+		ret = regulator_enable(mmc->supply.vqmmc);
 		udelay(200);
 	}
+
+	if (ret < 0)
+		dev_dbg(&host->pdev->dev, "Regulators failed to power up: %d\n",
+			ret);
 }
 
 static void tmio_mmc_power_off(struct tmio_mmc_host *host)
@@ -932,25 +936,11 @@
 		 (sd_ctrl_read32(host, CTL_STATUS) & TMIO_STAT_WRPROTECT));
 }
 
-static int tmio_mmc_get_cd(struct mmc_host *mmc)
-{
-	struct tmio_mmc_host *host = mmc_priv(mmc);
-	struct tmio_mmc_data *pdata = host->pdata;
-	int ret = mmc_gpio_get_cd(mmc);
-	if (ret >= 0)
-		return ret;
-
-	if (!pdata->get_cd)
-		return -ENOSYS;
-	else
-		return pdata->get_cd(host->pdev);
-}
-
 static const struct mmc_host_ops tmio_mmc_ops = {
 	.request	= tmio_mmc_request,
 	.set_ios	= tmio_mmc_set_ios,
 	.get_ro         = tmio_mmc_get_ro,
-	.get_cd		= tmio_mmc_get_cd,
+	.get_cd		= mmc_gpio_get_cd,
 	.enable_sdio_irq = tmio_mmc_enable_sdio_irq,
 };
 
@@ -1106,7 +1096,7 @@
 	dev_pm_qos_expose_latency_limit(&pdev->dev, 100);
 
 	if (pdata->flags & TMIO_MMC_USE_GPIO_CD) {
-		ret = mmc_gpio_request_cd(mmc, pdata->cd_gpio);
+		ret = mmc_gpio_request_cd(mmc, pdata->cd_gpio, 0);
 		if (ret < 0) {
 			tmio_mmc_host_remove(_host);
 			return ret;
diff --git a/drivers/mmc/host/vub300.c b/drivers/mmc/host/vub300.c
index cb9f361..e9028ad 100644
--- a/drivers/mmc/host/vub300.c
+++ b/drivers/mmc/host/vub300.c
@@ -2079,7 +2079,7 @@
 	kref_put(&vub300->kref, vub300_delete);
 }
 
-void vub300_init_card(struct mmc_host *mmc, struct mmc_card *card)
+static void vub300_init_card(struct mmc_host *mmc, struct mmc_card *card)
 {				/* NOT irq */
 	struct vub300_mmc_host *vub300 = mmc_priv(mmc);
 	dev_info(&vub300->udev->dev, "NO host QUIRKS for this card\n");
diff --git a/drivers/mtd/bcm63xxpart.c b/drivers/mtd/bcm63xxpart.c
index 6eeb84c..5c81390 100644
--- a/drivers/mtd/bcm63xxpart.c
+++ b/drivers/mtd/bcm63xxpart.c
@@ -4,7 +4,7 @@
  * Copyright © 2006-2008  Florian Fainelli <florian@openwrt.org>
  *			  Mike Albon <malbon@openwrt.org>
  * Copyright © 2009-2010  Daniel Dickinson <openwrt@cshore.neomailbox.net>
- * Copyright © 2011-2012  Jonas Gorski <jonas.gorski@gmail.com>
+ * Copyright © 2011-2013  Jonas Gorski <jonas.gorski@gmail.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -27,17 +27,19 @@
 #include <linux/crc32.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/sizes.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 
+#include <asm/mach-bcm63xx/bcm63xx_nvram.h>
 #include <asm/mach-bcm63xx/bcm963xx_tag.h>
 #include <asm/mach-bcm63xx/board_bcm963xx.h>
 
 #define BCM63XX_EXTENDED_SIZE	0xBFC00000	/* Extended flash address */
 
-#define BCM63XX_CFE_BLOCK_SIZE	0x10000		/* always at least 64KiB */
+#define BCM63XX_CFE_BLOCK_SIZE	SZ_64K		/* always at least 64KiB */
 
 #define BCM63XX_CFE_MAGIC_OFFSET 0x4e0
 
@@ -90,7 +92,8 @@
 			      BCM63XX_CFE_BLOCK_SIZE);
 
 	cfelen = cfe_erasesize;
-	nvramlen = cfe_erasesize;
+	nvramlen = bcm63xx_nvram_get_psi_size() * SZ_1K;
+	nvramlen = roundup(nvramlen, cfe_erasesize);
 
 	/* Allocate memory for buffer */
 	buf = vmalloc(sizeof(struct bcm_tag));
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index fff665d..89b9d68 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -1571,8 +1571,8 @@
 	xip_enable(map, chip, adr);
 	/* FIXME - should have reset delay before continuing */
 
-	printk(KERN_WARNING "MTD %s(): software timeout\n",
-	       __func__ );
+	printk(KERN_WARNING "MTD %s(): software timeout, address:0x%.8lx.\n",
+	       __func__, adr);
 
 	ret = -EIO;
  op_done:
diff --git a/drivers/mtd/chips/gen_probe.c b/drivers/mtd/chips/gen_probe.c
index 74dbb6b..ffb36ba 100644
--- a/drivers/mtd/chips/gen_probe.c
+++ b/drivers/mtd/chips/gen_probe.c
@@ -211,9 +211,7 @@
 
 	probe_function = __symbol_get(probename);
 	if (!probe_function) {
-		char modname[sizeof("cfi_cmdset_%4.4X")];
-		sprintf(modname, "cfi_cmdset_%4.4X", type);
-		request_module(modname);
+		request_module("cfi_cmdset_%4.4X", type);
 		probe_function = __symbol_get(probename);
 	}
 
diff --git a/drivers/mtd/chips/jedec_probe.c b/drivers/mtd/chips/jedec_probe.c
index c443f52..7c0b27d 100644
--- a/drivers/mtd/chips/jedec_probe.c
+++ b/drivers/mtd/chips/jedec_probe.c
@@ -120,7 +120,7 @@
 #define PM49FL008	0x006A
 
 /* Sharp */
-#define LH28F640BF	0x00b0
+#define LH28F640BF	0x00B0
 
 /* ST - www.st.com */
 #define M29F800AB	0x0058
@@ -1299,13 +1299,14 @@
 		.mfr_id		= CFI_MFR_SHARP,
 		.dev_id		= LH28F640BF,
 		.name		= "LH28F640BF",
-		.devtypes	= CFI_DEVICETYPE_X8,
+		.devtypes	= CFI_DEVICETYPE_X16,
 		.uaddr		= MTD_UADDR_UNNECESSARY,
-		.dev_size	= SIZE_4MiB,
-		.cmd_set	= P_ID_INTEL_STD,
-		.nr_regions	= 1,
+		.dev_size	= SIZE_8MiB,
+		.cmd_set	= P_ID_INTEL_EXT,
+		.nr_regions	= 2,
 		.regions	= {
-			ERASEINFO(0x40000,16),
+			ERASEINFO(0x10000, 127),
+			ERASEINFO(0x02000, 8),
 		}
 	}, {
 		.mfr_id		= CFI_MFR_SST,
diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index 2a4d55e..74ab4b7 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -224,59 +224,4 @@
 	default 4
 endif
 
-config MTD_DOCPROBE
-	tristate
-	select MTD_DOCECC
-
-config MTD_DOCECC
-	tristate
-
-config MTD_DOCPROBE_ADVANCED
-	bool "Advanced detection options for DiskOnChip"
-	depends on MTD_DOCPROBE
-	help
-	  This option allows you to specify nonstandard address at which to
-	  probe for a DiskOnChip, or to change the detection options.  You
-	  are unlikely to need any of this unless you are using LinuxBIOS.
-	  Say 'N'.
-
-config MTD_DOCPROBE_ADDRESS
-	hex "Physical address of DiskOnChip" if MTD_DOCPROBE_ADVANCED
-	depends on MTD_DOCPROBE
-	default "0x0"
-	---help---
-	  By default, the probe for DiskOnChip devices will look for a
-	  DiskOnChip at every multiple of 0x2000 between 0xC8000 and 0xEE000.
-	  This option allows you to specify a single address at which to probe
-	  for the device, which is useful if you have other devices in that
-	  range which get upset when they are probed.
-
-	  (Note that on PowerPC, the normal probe will only check at
-	  0xE4000000.)
-
-	  Normally, you should leave this set to zero, to allow the probe at
-	  the normal addresses.
-
-config MTD_DOCPROBE_HIGH
-	bool "Probe high addresses"
-	depends on MTD_DOCPROBE_ADVANCED
-	help
-	  By default, the probe for DiskOnChip devices will look for a
-	  DiskOnChip at every multiple of 0x2000 between 0xC8000 and 0xEE000.
-	  This option changes to make it probe between 0xFFFC8000 and
-	  0xFFFEE000.  Unless you are using LinuxBIOS, this is unlikely to be
-	  useful to you.  Say 'N'.
-
-config MTD_DOCPROBE_55AA
-	bool "Probe for 0x55 0xAA BIOS Extension Signature"
-	depends on MTD_DOCPROBE_ADVANCED
-	help
-	  Check for the 0x55 0xAA signature of a DiskOnChip, and do not
-	  continue with probing if it is absent.  The signature will always be
-	  present for a DiskOnChip 2000 or a normal DiskOnChip Millennium.
-	  Only if you have overwritten the first block of a DiskOnChip
-	  Millennium will it be absent.  Enable this option if you are using
-	  LinuxBIOS or if you need to recover a DiskOnChip Millennium on which
-	  you have managed to wipe the first block.
-
 endmenu
diff --git a/drivers/mtd/devices/bcm47xxsflash.c b/drivers/mtd/devices/bcm47xxsflash.c
index 18e7761..77de29b 100644
--- a/drivers/mtd/devices/bcm47xxsflash.c
+++ b/drivers/mtd/devices/bcm47xxsflash.c
@@ -1,6 +1,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/delay.h>
 #include <linux/mtd/mtd.h>
 #include <linux/platform_device.h>
 #include <linux/bcma/bcma.h>
@@ -12,6 +13,93 @@
 
 static const char * const probes[] = { "bcm47xxpart", NULL };
 
+/**************************************************
+ * Various helpers
+ **************************************************/
+
+static void bcm47xxsflash_cmd(struct bcm47xxsflash *b47s, u32 opcode)
+{
+	int i;
+
+	b47s->cc_write(b47s, BCMA_CC_FLASHCTL, BCMA_CC_FLASHCTL_START | opcode);
+	for (i = 0; i < 1000; i++) {
+		if (!(b47s->cc_read(b47s, BCMA_CC_FLASHCTL) &
+		      BCMA_CC_FLASHCTL_BUSY))
+			return;
+		cpu_relax();
+	}
+	pr_err("Control command failed (timeout)!\n");
+}
+
+static int bcm47xxsflash_poll(struct bcm47xxsflash *b47s, int timeout)
+{
+	unsigned long deadline = jiffies + timeout;
+
+	do {
+		switch (b47s->type) {
+		case BCM47XXSFLASH_TYPE_ST:
+			bcm47xxsflash_cmd(b47s, OPCODE_ST_RDSR);
+			if (!(b47s->cc_read(b47s, BCMA_CC_FLASHDATA) &
+			      SR_ST_WIP))
+				return 0;
+			break;
+		case BCM47XXSFLASH_TYPE_ATMEL:
+			bcm47xxsflash_cmd(b47s, OPCODE_AT_STATUS);
+			if (b47s->cc_read(b47s, BCMA_CC_FLASHDATA) &
+			    SR_AT_READY)
+				return 0;
+			break;
+		}
+
+		cpu_relax();
+		udelay(1);
+	} while (!time_after_eq(jiffies, deadline));
+
+	pr_err("Timeout waiting for flash to be ready!\n");
+
+	return -EBUSY;
+}
+
+/**************************************************
+ * MTD ops
+ **************************************************/
+
+static int bcm47xxsflash_erase(struct mtd_info *mtd, struct erase_info *erase)
+{
+	struct bcm47xxsflash *b47s = mtd->priv;
+	int err;
+
+	switch (b47s->type) {
+	case BCM47XXSFLASH_TYPE_ST:
+		bcm47xxsflash_cmd(b47s, OPCODE_ST_WREN);
+		b47s->cc_write(b47s, BCMA_CC_FLASHADDR, erase->addr);
+		/* Newer flashes have "sub-sectors" which can be erased
+		 * independently with a new command: ST_SSE. The ST_SE command
+		 * erases 64KB just as before.
+		 */
+		if (b47s->blocksize < (64 * 1024))
+			bcm47xxsflash_cmd(b47s, OPCODE_ST_SSE);
+		else
+			bcm47xxsflash_cmd(b47s, OPCODE_ST_SE);
+		break;
+	case BCM47XXSFLASH_TYPE_ATMEL:
+		b47s->cc_write(b47s, BCMA_CC_FLASHADDR, erase->addr << 1);
+		bcm47xxsflash_cmd(b47s, OPCODE_AT_PAGE_ERASE);
+		break;
+	}
+
+	err = bcm47xxsflash_poll(b47s, HZ);
+	if (err)
+		erase->state = MTD_ERASE_FAILED;
+	else
+		erase->state = MTD_ERASE_DONE;
+
+	if (erase->callback)
+		erase->callback(erase);
+
+	return err;
+}
+
 static int bcm47xxsflash_read(struct mtd_info *mtd, loff_t from, size_t len,
 			      size_t *retlen, u_char *buf)
 {
@@ -28,6 +116,127 @@
 	return len;
 }
 
+static int bcm47xxsflash_write_st(struct mtd_info *mtd, u32 offset, size_t len,
+				  const u_char *buf)
+{
+	struct bcm47xxsflash *b47s = mtd->priv;
+	int written = 0;
+
+	/* Enable writes */
+	bcm47xxsflash_cmd(b47s, OPCODE_ST_WREN);
+
+	/* Write first byte */
+	b47s->cc_write(b47s, BCMA_CC_FLASHADDR, offset);
+	b47s->cc_write(b47s, BCMA_CC_FLASHDATA, *buf++);
+
+	/* Program page */
+	if (b47s->bcma_cc->core->id.rev < 20) {
+		bcm47xxsflash_cmd(b47s, OPCODE_ST_PP);
+		return 1; /* 1B written */
+	}
+
+	/* Program page and set CSA (on newer chips we can continue writing) */
+	bcm47xxsflash_cmd(b47s, OPCODE_ST_CSA | OPCODE_ST_PP);
+	offset++;
+	len--;
+	written++;
+
+	while (len > 0) {
+		/* Page boundary, another function call is needed */
+		if ((offset & 0xFF) == 0)
+			break;
+
+		bcm47xxsflash_cmd(b47s, OPCODE_ST_CSA | *buf++);
+		offset++;
+		len--;
+		written++;
+	}
+
+	/* All done, drop CSA & poll */
+	b47s->cc_write(b47s, BCMA_CC_FLASHCTL, 0);
+	udelay(1);
+	if (bcm47xxsflash_poll(b47s, HZ / 10))
+		pr_err("Flash rejected dropping CSA\n");
+
+	return written;
+}
+
+static int bcm47xxsflash_write_at(struct mtd_info *mtd, u32 offset, size_t len,
+				  const u_char *buf)
+{
+	struct bcm47xxsflash *b47s = mtd->priv;
+	u32 mask = b47s->blocksize - 1;
+	u32 page = (offset & ~mask) << 1;
+	u32 byte = offset & mask;
+	int written = 0;
+
+	/* If we don't overwrite whole page, read it to the buffer first */
+	if (byte || (len < b47s->blocksize)) {
+		int err;
+
+		b47s->cc_write(b47s, BCMA_CC_FLASHADDR, page);
+		bcm47xxsflash_cmd(b47s, OPCODE_AT_BUF1_LOAD);
+		/* 250 us for AT45DB321B */
+		err = bcm47xxsflash_poll(b47s, HZ / 1000);
+		if (err) {
+			pr_err("Timeout reading page 0x%X info buffer\n", page);
+			return err;
+		}
+	}
+
+	/* Change buffer content with our data */
+	while (len > 0) {
+		/* Page boundary, another function call is needed */
+		if (byte == b47s->blocksize)
+			break;
+
+		b47s->cc_write(b47s, BCMA_CC_FLASHADDR, byte++);
+		b47s->cc_write(b47s, BCMA_CC_FLASHDATA, *buf++);
+		bcm47xxsflash_cmd(b47s, OPCODE_AT_BUF1_WRITE);
+		len--;
+		written++;
+	}
+
+	/* Program page with the buffer content */
+	b47s->cc_write(b47s, BCMA_CC_FLASHADDR, page);
+	bcm47xxsflash_cmd(b47s, OPCODE_AT_BUF1_PROGRAM);
+
+	return written;
+}
+
+static int bcm47xxsflash_write(struct mtd_info *mtd, loff_t to, size_t len,
+			       size_t *retlen, const u_char *buf)
+{
+	struct bcm47xxsflash *b47s = mtd->priv;
+	int written;
+
+	/* Writing functions can return without writing all passed data, for
+	 * example when the hardware is too old or when we git page boundary.
+	 */
+	while (len > 0) {
+		switch (b47s->type) {
+		case BCM47XXSFLASH_TYPE_ST:
+			written = bcm47xxsflash_write_st(mtd, to, len, buf);
+			break;
+		case BCM47XXSFLASH_TYPE_ATMEL:
+			written = bcm47xxsflash_write_at(mtd, to, len, buf);
+			break;
+		default:
+			BUG_ON(1);
+		}
+		if (written < 0) {
+			pr_err("Error writing at offset 0x%llX\n", to);
+			return written;
+		}
+		to += (loff_t)written;
+		len -= written;
+		*retlen += written;
+		buf += written;
+	}
+
+	return 0;
+}
+
 static void bcm47xxsflash_fill_mtd(struct bcm47xxsflash *b47s)
 {
 	struct mtd_info *mtd = &b47s->mtd;
@@ -35,33 +244,48 @@
 	mtd->priv = b47s;
 	mtd->name = "bcm47xxsflash";
 	mtd->owner = THIS_MODULE;
-	mtd->type = MTD_ROM;
-	mtd->size = b47s->size;
-	mtd->_read = bcm47xxsflash_read;
 
-	/* TODO: implement writing support and verify/change following code */
-	mtd->flags = MTD_CAP_ROM;
-	mtd->writebufsize = mtd->writesize = 1;
+	mtd->type = MTD_NORFLASH;
+	mtd->flags = MTD_CAP_NORFLASH;
+	mtd->size = b47s->size;
+	mtd->erasesize = b47s->blocksize;
+	mtd->writesize = 1;
+	mtd->writebufsize = 1;
+
+	mtd->_erase = bcm47xxsflash_erase;
+	mtd->_read = bcm47xxsflash_read;
+	mtd->_write = bcm47xxsflash_write;
 }
 
 /**************************************************
  * BCMA
  **************************************************/
 
+static int bcm47xxsflash_bcma_cc_read(struct bcm47xxsflash *b47s, u16 offset)
+{
+	return bcma_cc_read32(b47s->bcma_cc, offset);
+}
+
+static void bcm47xxsflash_bcma_cc_write(struct bcm47xxsflash *b47s, u16 offset,
+					u32 value)
+{
+	bcma_cc_write32(b47s->bcma_cc, offset, value);
+}
+
 static int bcm47xxsflash_bcma_probe(struct platform_device *pdev)
 {
 	struct bcma_sflash *sflash = dev_get_platdata(&pdev->dev);
 	struct bcm47xxsflash *b47s;
 	int err;
 
-	b47s = kzalloc(sizeof(*b47s), GFP_KERNEL);
-	if (!b47s) {
-		err = -ENOMEM;
-		goto out;
-	}
+	b47s = devm_kzalloc(&pdev->dev, sizeof(*b47s), GFP_KERNEL);
+	if (!b47s)
+		return -ENOMEM;
 	sflash->priv = b47s;
 
 	b47s->bcma_cc = container_of(sflash, struct bcma_drv_cc, sflash);
+	b47s->cc_read = bcm47xxsflash_bcma_cc_read;
+	b47s->cc_write = bcm47xxsflash_bcma_cc_write;
 
 	switch (b47s->bcma_cc->capabilities & BCMA_CC_CAP_FLASHT) {
 	case BCMA_CC_FLASHT_STSER:
@@ -81,15 +305,13 @@
 	err = mtd_device_parse_register(&b47s->mtd, probes, NULL, NULL, 0);
 	if (err) {
 		pr_err("Failed to register MTD device: %d\n", err);
-		goto err_dev_reg;
+		return err;
 	}
 
-	return 0;
+	if (bcm47xxsflash_poll(b47s, HZ / 10))
+		pr_warn("Serial flash busy\n");
 
-err_dev_reg:
-	kfree(&b47s->mtd);
-out:
-	return err;
+	return 0;
 }
 
 static int bcm47xxsflash_bcma_remove(struct platform_device *pdev)
@@ -98,7 +320,6 @@
 	struct bcm47xxsflash *b47s = sflash->priv;
 
 	mtd_device_unregister(&b47s->mtd);
-	kfree(b47s);
 
 	return 0;
 }
@@ -116,22 +337,4 @@
  * Init
  **************************************************/
 
-static int __init bcm47xxsflash_init(void)
-{
-	int err;
-
-	err = platform_driver_register(&bcma_sflash_driver);
-	if (err)
-		pr_err("Failed to register BCMA serial flash driver: %d\n",
-		       err);
-
-	return err;
-}
-
-static void __exit bcm47xxsflash_exit(void)
-{
-	platform_driver_unregister(&bcma_sflash_driver);
-}
-
-module_init(bcm47xxsflash_init);
-module_exit(bcm47xxsflash_exit);
+module_platform_driver(bcma_sflash_driver);
diff --git a/drivers/mtd/devices/bcm47xxsflash.h b/drivers/mtd/devices/bcm47xxsflash.h
index f22f8c4..fe93daf 100644
--- a/drivers/mtd/devices/bcm47xxsflash.h
+++ b/drivers/mtd/devices/bcm47xxsflash.h
@@ -60,6 +60,8 @@
 
 struct bcm47xxsflash {
 	struct bcma_drv_cc *bcma_cc;
+	int (*cc_read)(struct bcm47xxsflash *b47s, u16 offset);
+	void (*cc_write)(struct bcm47xxsflash *b47s, u16 offset, u32 value);
 
 	enum bcm47xxsflash_type type;
 
diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c
index e081bfe..5cb4c04 100644
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -6,6 +6,9 @@
  *
  * Licence: GPL
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/blkdev.h>
@@ -18,10 +21,6 @@
 #include <linux/mount.h>
 #include <linux/slab.h>
 
-#define ERROR(fmt, args...) printk(KERN_ERR "block2mtd: " fmt "\n" , ## args)
-#define INFO(fmt, args...) printk(KERN_INFO "block2mtd: " fmt "\n" , ## args)
-
-
 /* Info for the block device */
 struct block2mtd_dev {
 	struct list_head list;
@@ -84,7 +83,7 @@
 	err = _block2mtd_erase(dev, from, len);
 	mutex_unlock(&dev->write_mutex);
 	if (err) {
-		ERROR("erase failed err = %d", err);
+		pr_err("erase failed err = %d\n", err);
 		instr->state = MTD_ERASE_FAILED;
 	} else
 		instr->state = MTD_ERASE_DONE;
@@ -239,13 +238,13 @@
 #endif
 
 	if (IS_ERR(bdev)) {
-		ERROR("error: cannot open device %s", devname);
+		pr_err("error: cannot open device %s\n", devname);
 		goto devinit_err;
 	}
 	dev->blkdev = bdev;
 
 	if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) {
-		ERROR("attempting to use an MTD device as a block device");
+		pr_err("attempting to use an MTD device as a block device\n");
 		goto devinit_err;
 	}
 
@@ -277,9 +276,10 @@
 		goto devinit_err;
 	}
 	list_add(&dev->list, &blkmtd_device_list);
-	INFO("mtd%d: [%s] erase_size = %dKiB [%d]", dev->mtd.index,
-			dev->mtd.name + strlen("block2mtd: "),
-			dev->mtd.erasesize >> 10, dev->mtd.erasesize);
+	pr_info("mtd%d: [%s] erase_size = %dKiB [%d]\n",
+		dev->mtd.index,
+		dev->mtd.name + strlen("block2mtd: "),
+		dev->mtd.erasesize >> 10, dev->mtd.erasesize);
 	return dev;
 
 devinit_err:
@@ -339,17 +339,11 @@
 }
 
 
-#define parse_err(fmt, args...) do {	\
-	ERROR(fmt, ## args);		\
-	return 0;			\
-} while (0)
-
 #ifndef MODULE
 static int block2mtd_init_called = 0;
 static char block2mtd_paramline[80 + 12]; /* 80 for device, 12 for erase size */
 #endif
 
-
 static int block2mtd_setup2(const char *val)
 {
 	char buf[80 + 12]; /* 80 for device, 12 for erase size */
@@ -359,8 +353,10 @@
 	size_t erase_size = PAGE_SIZE;
 	int i, ret;
 
-	if (strnlen(val, sizeof(buf)) >= sizeof(buf))
-		parse_err("parameter too long");
+	if (strnlen(val, sizeof(buf)) >= sizeof(buf)) {
+		pr_err("parameter too long\n");
+		return 0;
+	}
 
 	strcpy(str, val);
 	kill_final_newline(str);
@@ -368,20 +364,27 @@
 	for (i = 0; i < 2; i++)
 		token[i] = strsep(&str, ",");
 
-	if (str)
-		parse_err("too many arguments");
+	if (str) {
+		pr_err("too many arguments\n");
+		return 0;
+	}
 
-	if (!token[0])
-		parse_err("no argument");
+	if (!token[0]) {
+		pr_err("no argument\n");
+		return 0;
+	}
 
 	name = token[0];
-	if (strlen(name) + 1 > 80)
-		parse_err("device name too long");
+	if (strlen(name) + 1 > 80) {
+		pr_err("device name too long\n");
+		return 0;
+	}
 
 	if (token[1]) {
 		ret = parse_num(&erase_size, token[1]);
 		if (ret) {
-			parse_err("illegal erase size");
+			pr_err("illegal erase size\n");
+			return 0;
 		}
 	}
 
@@ -444,8 +447,9 @@
 		struct block2mtd_dev *dev = list_entry(pos, typeof(*dev), list);
 		block2mtd_sync(&dev->mtd);
 		mtd_device_unregister(&dev->mtd);
-		INFO("mtd%d: [%s] removed", dev->mtd.index,
-				dev->mtd.name + strlen("block2mtd: "));
+		pr_info("mtd%d: [%s] removed\n",
+			dev->mtd.index,
+			dev->mtd.name + strlen("block2mtd: "));
 		list_del(&dev->list);
 		block2mtd_free_device(dev);
 	}
diff --git a/drivers/mtd/devices/elm.c b/drivers/mtd/devices/elm.c
index dccef9f..d1dd6a3 100644
--- a/drivers/mtd/devices/elm.c
+++ b/drivers/mtd/devices/elm.c
@@ -20,14 +20,21 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/of.h>
+#include <linux/sched.h>
 #include <linux/pm_runtime.h>
 #include <linux/platform_data/elm.h>
 
+#define ELM_SYSCONFIG			0x010
 #define ELM_IRQSTATUS			0x018
 #define ELM_IRQENABLE			0x01c
 #define ELM_LOCATION_CONFIG		0x020
 #define ELM_PAGE_CTRL			0x080
 #define ELM_SYNDROME_FRAGMENT_0		0x400
+#define ELM_SYNDROME_FRAGMENT_1		0x404
+#define ELM_SYNDROME_FRAGMENT_2		0x408
+#define ELM_SYNDROME_FRAGMENT_3		0x40c
+#define ELM_SYNDROME_FRAGMENT_4		0x410
+#define ELM_SYNDROME_FRAGMENT_5		0x414
 #define ELM_SYNDROME_FRAGMENT_6		0x418
 #define ELM_LOCATION_STATUS		0x800
 #define ELM_ERROR_LOCATION_0		0x880
@@ -56,12 +63,27 @@
 #define SYNDROME_FRAGMENT_REG_SIZE	0x40
 #define ERROR_LOCATION_SIZE		0x100
 
+struct elm_registers {
+	u32 elm_irqenable;
+	u32 elm_sysconfig;
+	u32 elm_location_config;
+	u32 elm_page_ctrl;
+	u32 elm_syndrome_fragment_6[ERROR_VECTOR_MAX];
+	u32 elm_syndrome_fragment_5[ERROR_VECTOR_MAX];
+	u32 elm_syndrome_fragment_4[ERROR_VECTOR_MAX];
+	u32 elm_syndrome_fragment_3[ERROR_VECTOR_MAX];
+	u32 elm_syndrome_fragment_2[ERROR_VECTOR_MAX];
+	u32 elm_syndrome_fragment_1[ERROR_VECTOR_MAX];
+	u32 elm_syndrome_fragment_0[ERROR_VECTOR_MAX];
+};
+
 struct elm_info {
 	struct device *dev;
 	void __iomem *elm_base;
 	struct completion elm_completion;
 	struct list_head list;
 	enum bch_ecc bch_type;
+	struct elm_registers elm_regs;
 };
 
 static LIST_HEAD(elm_devices);
@@ -346,14 +368,9 @@
 	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(&pdev->dev, "no memory resource defined\n");
-		return -ENODEV;
-	}
-
-	info->elm_base = devm_request_and_ioremap(&pdev->dev, res);
-	if (!info->elm_base)
-		return -EADDRNOTAVAIL;
+	info->elm_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(info->elm_base))
+		return PTR_ERR(info->elm_base);
 
 	ret = devm_request_irq(&pdev->dev, irq->start, elm_isr, 0,
 			pdev->name, info);
@@ -381,10 +398,103 @@
 {
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
-	platform_set_drvdata(pdev, NULL);
 	return 0;
 }
 
+/**
+ * elm_context_save
+ * saves ELM configurations to preserve them across Hardware powered-down
+ */
+static int elm_context_save(struct elm_info *info)
+{
+	struct elm_registers *regs = &info->elm_regs;
+	enum bch_ecc bch_type = info->bch_type;
+	u32 offset = 0, i;
+
+	regs->elm_irqenable       = elm_read_reg(info, ELM_IRQENABLE);
+	regs->elm_sysconfig       = elm_read_reg(info, ELM_SYSCONFIG);
+	regs->elm_location_config = elm_read_reg(info, ELM_LOCATION_CONFIG);
+	regs->elm_page_ctrl       = elm_read_reg(info, ELM_PAGE_CTRL);
+	for (i = 0; i < ERROR_VECTOR_MAX; i++) {
+		offset = i * SYNDROME_FRAGMENT_REG_SIZE;
+		switch (bch_type) {
+		case BCH8_ECC:
+			regs->elm_syndrome_fragment_3[i] = elm_read_reg(info,
+					ELM_SYNDROME_FRAGMENT_3 + offset);
+			regs->elm_syndrome_fragment_2[i] = elm_read_reg(info,
+					ELM_SYNDROME_FRAGMENT_2 + offset);
+		case BCH4_ECC:
+			regs->elm_syndrome_fragment_1[i] = elm_read_reg(info,
+					ELM_SYNDROME_FRAGMENT_1 + offset);
+			regs->elm_syndrome_fragment_0[i] = elm_read_reg(info,
+					ELM_SYNDROME_FRAGMENT_0 + offset);
+		default:
+			return -EINVAL;
+		}
+		/* ELM SYNDROME_VALID bit in SYNDROME_FRAGMENT_6[] needs
+		 * to be saved for all BCH schemes*/
+		regs->elm_syndrome_fragment_6[i] = elm_read_reg(info,
+					ELM_SYNDROME_FRAGMENT_6 + offset);
+	}
+	return 0;
+}
+
+/**
+ * elm_context_restore
+ * writes configurations saved duing power-down back into ELM registers
+ */
+static int elm_context_restore(struct elm_info *info)
+{
+	struct elm_registers *regs = &info->elm_regs;
+	enum bch_ecc bch_type = info->bch_type;
+	u32 offset = 0, i;
+
+	elm_write_reg(info, ELM_IRQENABLE,	 regs->elm_irqenable);
+	elm_write_reg(info, ELM_SYSCONFIG,	 regs->elm_sysconfig);
+	elm_write_reg(info, ELM_LOCATION_CONFIG, regs->elm_location_config);
+	elm_write_reg(info, ELM_PAGE_CTRL,	 regs->elm_page_ctrl);
+	for (i = 0; i < ERROR_VECTOR_MAX; i++) {
+		offset = i * SYNDROME_FRAGMENT_REG_SIZE;
+		switch (bch_type) {
+		case BCH8_ECC:
+			elm_write_reg(info, ELM_SYNDROME_FRAGMENT_3 + offset,
+					regs->elm_syndrome_fragment_3[i]);
+			elm_write_reg(info, ELM_SYNDROME_FRAGMENT_2 + offset,
+					regs->elm_syndrome_fragment_2[i]);
+		case BCH4_ECC:
+			elm_write_reg(info, ELM_SYNDROME_FRAGMENT_1 + offset,
+					regs->elm_syndrome_fragment_1[i]);
+			elm_write_reg(info, ELM_SYNDROME_FRAGMENT_0 + offset,
+					regs->elm_syndrome_fragment_0[i]);
+		default:
+			return -EINVAL;
+		}
+		/* ELM_SYNDROME_VALID bit to be set in last to trigger FSM */
+		elm_write_reg(info, ELM_SYNDROME_FRAGMENT_6 + offset,
+					regs->elm_syndrome_fragment_6[i] &
+							 ELM_SYNDROME_VALID);
+	}
+	return 0;
+}
+
+static int elm_suspend(struct device *dev)
+{
+	struct elm_info *info = dev_get_drvdata(dev);
+	elm_context_save(info);
+	pm_runtime_put_sync(dev);
+	return 0;
+}
+
+static int elm_resume(struct device *dev)
+{
+	struct elm_info *info = dev_get_drvdata(dev);
+	pm_runtime_get_sync(dev);
+	elm_context_restore(info);
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(elm_pm_ops, elm_suspend, elm_resume);
+
 #ifdef CONFIG_OF
 static const struct of_device_id elm_of_match[] = {
 	{ .compatible = "ti,am3352-elm" },
@@ -398,6 +508,7 @@
 		.name	= "elm",
 		.owner	= THIS_MODULE,
 		.of_match_table = of_match_ptr(elm_of_match),
+		.pm	= &elm_pm_ops,
 	},
 	.probe	= elm_probe,
 	.remove	= elm_remove,
diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 2f3d2a5..26b14f9 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -43,17 +43,24 @@
 #define	OPCODE_FAST_READ	0x0b	/* Read data bytes (high frequency) */
 #define	OPCODE_PP		0x02	/* Page program (up to 256 bytes) */
 #define	OPCODE_BE_4K		0x20	/* Erase 4KiB block */
+#define	OPCODE_BE_4K_PMC	0xd7	/* Erase 4KiB block on PMC chips */
 #define	OPCODE_BE_32K		0x52	/* Erase 32KiB block */
 #define	OPCODE_CHIP_ERASE	0xc7	/* Erase whole flash chip */
 #define	OPCODE_SE		0xd8	/* Sector erase (usually 64KiB) */
 #define	OPCODE_RDID		0x9f	/* Read JEDEC ID */
 
+/* 4-byte address opcodes - used on Spansion and some Macronix flashes. */
+#define	OPCODE_NORM_READ_4B	0x13	/* Read data bytes (low frequency) */
+#define	OPCODE_FAST_READ_4B	0x0c	/* Read data bytes (high frequency) */
+#define	OPCODE_PP_4B		0x12	/* Page program (up to 256 bytes) */
+#define	OPCODE_SE_4B		0xdc	/* Sector erase (usually 64KiB) */
+
 /* Used for SST flashes only. */
 #define	OPCODE_BP		0x02	/* Byte program */
 #define	OPCODE_WRDI		0x04	/* Write disable */
 #define	OPCODE_AAI_WP		0xad	/* Auto address increment word program */
 
-/* Used for Macronix flashes only. */
+/* Used for Macronix and Winbond flashes. */
 #define	OPCODE_EN4B		0xb7	/* Enter 4-byte mode */
 #define	OPCODE_EX4B		0xe9	/* Exit 4-byte mode */
 
@@ -84,6 +91,8 @@
 	u16			page_size;
 	u16			addr_width;
 	u8			erase_opcode;
+	u8			read_opcode;
+	u8			program_opcode;
 	u8			*command;
 	bool			fast_read;
 };
@@ -161,6 +170,7 @@
 {
 	switch (JEDEC_MFR(jedec_id)) {
 	case CFI_MFR_MACRONIX:
+	case CFI_MFR_ST: /* Micron, actually */
 	case 0xEF /* winbond */:
 		flash->command[0] = enable ? OPCODE_EN4B : OPCODE_EX4B;
 		return spi_write(flash->spi, flash->command, 1);
@@ -371,7 +381,7 @@
 	 */
 
 	/* Set up the write data buffer. */
-	opcode = flash->fast_read ? OPCODE_FAST_READ : OPCODE_NORM_READ;
+	opcode = flash->read_opcode;
 	flash->command[0] = opcode;
 	m25p_addr2cmd(flash, from, flash->command);
 
@@ -422,7 +432,7 @@
 	write_enable(flash);
 
 	/* Set up the opcode in the write buffer. */
-	flash->command[0] = OPCODE_PP;
+	flash->command[0] = flash->program_opcode;
 	m25p_addr2cmd(flash, to, flash->command);
 
 	page_offset = to & (flash->page_size - 1);
@@ -682,6 +692,8 @@
 #define	SECT_4K		0x01		/* OPCODE_BE_4K works uniformly */
 #define	M25P_NO_ERASE	0x02		/* No erase command needed */
 #define	SST_WRITE	0x04		/* use SST byte programming */
+#define	M25P_NO_FR	0x08		/* Can't do fastread */
+#define	SECT_4K_PMC	0x10		/* OPCODE_BE_4K_PMC works uniformly */
 };
 
 #define INFO(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags)	\
@@ -694,13 +706,13 @@
 		.flags = (_flags),					\
 	})
 
-#define CAT25_INFO(_sector_size, _n_sectors, _page_size, _addr_width)	\
+#define CAT25_INFO(_sector_size, _n_sectors, _page_size, _addr_width, _flags)	\
 	((kernel_ulong_t)&(struct flash_info) {				\
 		.sector_size = (_sector_size),				\
 		.n_sectors = (_n_sectors),				\
 		.page_size = (_page_size),				\
 		.addr_width = (_addr_width),				\
-		.flags = M25P_NO_ERASE,					\
+		.flags = (_flags),					\
 	})
 
 /* NOTE: double check command sets and memory organization when you add
@@ -732,7 +744,8 @@
 	{ "en25qh256", INFO(0x1c7019, 0, 64 * 1024, 512, 0) },
 
 	/* Everspin */
-	{ "mr25h256", CAT25_INFO(  32 * 1024, 1, 256, 2) },
+	{ "mr25h256", CAT25_INFO(  32 * 1024, 1, 256, 2, M25P_NO_ERASE | M25P_NO_FR) },
+	{ "mr25h10", CAT25_INFO(128 * 1024, 1, 256, 3, M25P_NO_ERASE | M25P_NO_FR) },
 
 	/* GigaDevice */
 	{ "gd25q32", INFO(0xc84016, 0, 64 * 1024,  64, SECT_4K) },
@@ -762,6 +775,11 @@
 	{ "n25q128a13",  INFO(0x20ba18, 0, 64 * 1024, 256, 0) },
 	{ "n25q256a", INFO(0x20ba19, 0, 64 * 1024, 512, SECT_4K) },
 
+	/* PMC */
+	{ "pm25lv512", INFO(0, 0, 32 * 1024, 2, SECT_4K_PMC) },
+	{ "pm25lv010", INFO(0, 0, 32 * 1024, 4, SECT_4K_PMC) },
+	{ "pm25lq032", INFO(0x7f9d46, 0, 64 * 1024,  64, SECT_4K) },
+
 	/* Spansion -- single (large) sector size only, at least
 	 * for the chips listed here (without boot sectors).
 	 */
@@ -840,17 +858,18 @@
 	{ "w25q32dw", INFO(0xef6016, 0, 64 * 1024,  64, SECT_4K) },
 	{ "w25x64", INFO(0xef3017, 0, 64 * 1024, 128, SECT_4K) },
 	{ "w25q64", INFO(0xef4017, 0, 64 * 1024, 128, SECT_4K) },
+	{ "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) },
 	{ "w25q80", INFO(0xef5014, 0, 64 * 1024,  16, SECT_4K) },
 	{ "w25q80bl", INFO(0xef4014, 0, 64 * 1024,  16, SECT_4K) },
 	{ "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) },
 	{ "w25q256", INFO(0xef4019, 0, 64 * 1024, 512, SECT_4K) },
 
 	/* Catalyst / On Semiconductor -- non-JEDEC */
-	{ "cat25c11", CAT25_INFO(  16, 8, 16, 1) },
-	{ "cat25c03", CAT25_INFO(  32, 8, 16, 2) },
-	{ "cat25c09", CAT25_INFO( 128, 8, 32, 2) },
-	{ "cat25c17", CAT25_INFO( 256, 8, 32, 2) },
-	{ "cat25128", CAT25_INFO(2048, 8, 64, 2) },
+	{ "cat25c11", CAT25_INFO(  16, 8, 16, 1, M25P_NO_ERASE | M25P_NO_FR) },
+	{ "cat25c03", CAT25_INFO(  32, 8, 16, 2, M25P_NO_ERASE | M25P_NO_FR) },
+	{ "cat25c09", CAT25_INFO( 128, 8, 32, 2, M25P_NO_ERASE | M25P_NO_FR) },
+	{ "cat25c17", CAT25_INFO( 256, 8, 32, 2, M25P_NO_ERASE | M25P_NO_FR) },
+	{ "cat25128", CAT25_INFO(2048, 8, 64, 2, M25P_NO_ERASE | M25P_NO_FR) },
 	{ },
 };
 MODULE_DEVICE_TABLE(spi, m25p_ids);
@@ -920,7 +939,7 @@
 	 * a chip ID, try the JEDEC id commands; they'll work for most
 	 * newer chips, even if we don't recognize the particular chip.
 	 */
-	data = spi->dev.platform_data;
+	data = dev_get_platdata(&spi->dev);
 	if (data && data->type) {
 		const struct spi_device_id *plat_id;
 
@@ -972,7 +991,7 @@
 
 	flash->spi = spi;
 	mutex_init(&flash->lock);
-	dev_set_drvdata(&spi->dev, flash);
+	spi_set_drvdata(spi, flash);
 
 	/*
 	 * Atmel, SST and Intel/Numonyx serial flash tend to power
@@ -1014,6 +1033,9 @@
 	if (info->flags & SECT_4K) {
 		flash->erase_opcode = OPCODE_BE_4K;
 		flash->mtd.erasesize = 4096;
+	} else if (info->flags & SECT_4K_PMC) {
+		flash->erase_opcode = OPCODE_BE_4K_PMC;
+		flash->mtd.erasesize = 4096;
 	} else {
 		flash->erase_opcode = OPCODE_SE;
 		flash->mtd.erasesize = info->sector_size;
@@ -1028,24 +1050,41 @@
 	flash->mtd.writebufsize = flash->page_size;
 
 	flash->fast_read = false;
-#ifdef CONFIG_OF
 	if (np && of_property_read_bool(np, "m25p,fast-read"))
 		flash->fast_read = true;
-#endif
 
 #ifdef CONFIG_M25PXX_USE_FAST_READ
 	flash->fast_read = true;
 #endif
+	if (info->flags & M25P_NO_FR)
+		flash->fast_read = false;
+
+	/* Default commands */
+	if (flash->fast_read)
+		flash->read_opcode = OPCODE_FAST_READ;
+	else
+		flash->read_opcode = OPCODE_NORM_READ;
+
+	flash->program_opcode = OPCODE_PP;
 
 	if (info->addr_width)
 		flash->addr_width = info->addr_width;
-	else {
+	else if (flash->mtd.size > 0x1000000) {
 		/* enable 4-byte addressing if the device exceeds 16MiB */
-		if (flash->mtd.size > 0x1000000) {
-			flash->addr_width = 4;
-			set_4byte(flash, info->jedec_id, 1);
+		flash->addr_width = 4;
+		if (JEDEC_MFR(info->jedec_id) == CFI_MFR_AMD) {
+			/* Dedicated 4-byte command set */
+			flash->read_opcode = flash->fast_read ?
+				OPCODE_FAST_READ_4B :
+				OPCODE_NORM_READ_4B;
+			flash->program_opcode = OPCODE_PP_4B;
+			/* No small sector erase for 4-byte command set */
+			flash->erase_opcode = OPCODE_SE_4B;
+			flash->mtd.erasesize = info->sector_size;
 		} else
-			flash->addr_width = 3;
+			set_4byte(flash, info->jedec_id, 1);
+	} else {
+		flash->addr_width = 3;
 	}
 
 	dev_info(&spi->dev, "%s (%lld Kbytes)\n", id->name,
@@ -1080,7 +1119,7 @@
 
 static int m25p_remove(struct spi_device *spi)
 {
-	struct m25p	*flash = dev_get_drvdata(&spi->dev);
+	struct m25p	*flash = spi_get_drvdata(spi);
 	int		status;
 
 	/* Clean up MTD stuff. */
diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index 28779b6..0e8cbfe 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -622,7 +622,7 @@
 	struct dataflash		*priv;
 	struct mtd_info			*device;
 	struct mtd_part_parser_data	ppdata;
-	struct flash_platform_data	*pdata = spi->dev.platform_data;
+	struct flash_platform_data	*pdata = dev_get_platdata(&spi->dev);
 	char				*otp_tag = "";
 	int				err = 0;
 
@@ -661,7 +661,7 @@
 	dev_info(&spi->dev, "%s (%lld KBytes) pagesize %d bytes%s\n",
 			name, (long long)((device->size + 1023) >> 10),
 			pagesize, otp_tag);
-	dev_set_drvdata(&spi->dev, priv);
+	spi_set_drvdata(spi, priv);
 
 	ppdata.of_node = spi->dev.of_node;
 	err = mtd_device_parse_register(device, NULL, &ppdata,
@@ -671,7 +671,7 @@
 	if (!err)
 		return 0;
 
-	dev_set_drvdata(&spi->dev, NULL);
+	spi_set_drvdata(spi, NULL);
 	kfree(priv);
 	return err;
 }
@@ -895,14 +895,14 @@
 
 static int dataflash_remove(struct spi_device *spi)
 {
-	struct dataflash	*flash = dev_get_drvdata(&spi->dev);
+	struct dataflash	*flash = spi_get_drvdata(spi);
 	int			status;
 
 	pr_debug("%s: remove\n", dev_name(&spi->dev));
 
 	status = mtd_device_unregister(&flash->mtd);
 	if (status == 0) {
-		dev_set_drvdata(&spi->dev, NULL);
+		spi_set_drvdata(spi, NULL);
 		kfree(flash);
 	}
 	return status;
diff --git a/drivers/mtd/devices/spear_smi.c b/drivers/mtd/devices/spear_smi.c
index 8a82b8b..4238214 100644
--- a/drivers/mtd/devices/spear_smi.c
+++ b/drivers/mtd/devices/spear_smi.c
@@ -550,7 +550,7 @@
 {
 	struct spear_snor_flash *flash = get_flash_data(mtd);
 	struct spear_smi *dev = mtd->priv;
-	void *src;
+	void __iomem *src;
 	u32 ctrlreg1, val;
 	int ret;
 
@@ -583,7 +583,7 @@
 
 	writel(val, dev->io_base + SMI_CR1);
 
-	memcpy_fromio(buf, (u8 *)src, len);
+	memcpy_fromio(buf, src, len);
 
 	/* restore ctrl reg1 */
 	writel(ctrlreg1, dev->io_base + SMI_CR1);
@@ -596,7 +596,7 @@
 }
 
 static inline int spear_smi_cpy_toio(struct spear_smi *dev, u32 bank,
-		void *dest, const void *src, size_t len)
+		void __iomem *dest, const void *src, size_t len)
 {
 	int ret;
 	u32 ctrlreg1;
@@ -643,7 +643,7 @@
 {
 	struct spear_snor_flash *flash = get_flash_data(mtd);
 	struct spear_smi *dev = mtd->priv;
-	void *dest;
+	void __iomem *dest;
 	u32 page_offset, page_size;
 	int ret;
 
@@ -995,14 +995,12 @@
 		ret = spear_smi_setup_banks(pdev, i, pdata->np[i]);
 		if (ret) {
 			dev_err(&dev->pdev->dev, "bank setup failed\n");
-			goto err_bank_setup;
+			goto err_irq;
 		}
 	}
 
 	return 0;
 
-err_bank_setup:
-	platform_set_drvdata(pdev, NULL);
 err_irq:
 	clk_disable_unprepare(dev->clk);
 err:
@@ -1040,12 +1038,11 @@
 	}
 
 	clk_disable_unprepare(dev->clk);
-	platform_set_drvdata(pdev, NULL);
 
 	return 0;
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int spear_smi_suspend(struct device *dev)
 {
 	struct spear_smi *sdev = dev_get_drvdata(dev);
@@ -1068,9 +1065,9 @@
 		spear_smi_hw_init(sdev);
 	return ret;
 }
+#endif
 
 static SIMPLE_DEV_PM_OPS(spear_smi_pm_ops, spear_smi_suspend, spear_smi_resume);
-#endif
 
 #ifdef CONFIG_OF
 static const struct of_device_id spear_smi_id_table[] = {
@@ -1086,9 +1083,7 @@
 		.bus = &platform_bus_type,
 		.owner = THIS_MODULE,
 		.of_match_table = of_match_ptr(spear_smi_id_table),
-#ifdef CONFIG_PM
 		.pm = &spear_smi_pm_ops,
-#endif
 	},
 	.probe = spear_smi_probe,
 	.remove = spear_smi_remove,
diff --git a/drivers/mtd/devices/sst25l.c b/drivers/mtd/devices/sst25l.c
index 8091b01..a42f1f0 100644
--- a/drivers/mtd/devices/sst25l.c
+++ b/drivers/mtd/devices/sst25l.c
@@ -370,9 +370,9 @@
 
 	flash->spi = spi;
 	mutex_init(&flash->lock);
-	dev_set_drvdata(&spi->dev, flash);
+	spi_set_drvdata(spi, flash);
 
-	data = spi->dev.platform_data;
+	data = dev_get_platdata(&spi->dev);
 	if (data && data->name)
 		flash->mtd.name = data->name;
 	else
@@ -404,7 +404,7 @@
 					data ? data->nr_parts : 0);
 	if (ret) {
 		kfree(flash);
-		dev_set_drvdata(&spi->dev, NULL);
+		spi_set_drvdata(spi, NULL);
 		return -ENODEV;
 	}
 
@@ -413,7 +413,7 @@
 
 static int sst25l_remove(struct spi_device *spi)
 {
-	struct sst25l_flash *flash = dev_get_drvdata(&spi->dev);
+	struct sst25l_flash *flash = spi_get_drvdata(spi);
 	int ret;
 
 	ret = mtd_device_unregister(&flash->mtd);
diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index 8b27ca0..310dc7c 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -157,24 +157,6 @@
 	help
 	  This provides a driver for the NOR flash attached to a PXA2xx chip.
 
-config MTD_OCTAGON
-	tristate "JEDEC Flash device mapped on Octagon 5066 SBC"
-	depends on X86 && MTD_JEDEC && MTD_COMPLEX_MAPPINGS
-	help
-	  This provides a 'mapping' driver which supports the way in which
-	  the flash chips are connected in the Octagon-5066 Single Board
-	  Computer. More information on the board is available at
-	  <http://www.octagonsystems.com/products/5066.aspx>.
-
-config MTD_VMAX
-	tristate "JEDEC Flash device mapped on Tempustech VMAX SBC301"
-	depends on X86 && MTD_JEDEC && MTD_COMPLEX_MAPPINGS
-	help
-	  This provides a 'mapping' driver which supports the way in which
-	  the flash chips are connected in the Tempustech VMAX SBC301 Single
-	  Board Computer. More information on the board is available at
-	  <http://www.tempustech.com/>.
-
 config MTD_SCx200_DOCFLASH
 	tristate "Flash device mapped with DOCCS on NatSemi SCx200"
 	depends on SCx200 && MTD_CFI
diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile
index 9fdbd4b..141c91a 100644
--- a/drivers/mtd/maps/Makefile
+++ b/drivers/mtd/maps/Makefile
@@ -16,7 +16,6 @@
 obj-$(CONFIG_MTD_CK804XROM)	+= ck804xrom.o
 obj-$(CONFIG_MTD_TSUNAMI)	+= tsunami_flash.o
 obj-$(CONFIG_MTD_PXA2XX)	+= pxa2xx-flash.o
-obj-$(CONFIG_MTD_OCTAGON)	+= octagon-5066.o
 obj-$(CONFIG_MTD_PHYSMAP)	+= physmap.o
 obj-$(CONFIG_MTD_PHYSMAP_OF)	+= physmap_of.o
 obj-$(CONFIG_MTD_PISMO)		+= pismo.o
@@ -28,7 +27,6 @@
 obj-$(CONFIG_MTD_NETSC520)	+= netsc520.o
 obj-$(CONFIG_MTD_TS5500)	+= ts5500_flash.o
 obj-$(CONFIG_MTD_SUN_UFLASH)	+= sun_uflash.o
-obj-$(CONFIG_MTD_VMAX)		+= vmax301.o
 obj-$(CONFIG_MTD_SCx200_DOCFLASH)+= scx200_docflash.o
 obj-$(CONFIG_MTD_SOLUTIONENGINE)+= solutionengine.o
 obj-$(CONFIG_MTD_PCI)		+= pci.o
diff --git a/drivers/mtd/maps/bfin-async-flash.c b/drivers/mtd/maps/bfin-async-flash.c
index 319b04a..5434d8d 100644
--- a/drivers/mtd/maps/bfin-async-flash.c
+++ b/drivers/mtd/maps/bfin-async-flash.c
@@ -128,7 +128,7 @@
 static int bfin_flash_probe(struct platform_device *pdev)
 {
 	int ret;
-	struct physmap_flash_data *pdata = pdev->dev.platform_data;
+	struct physmap_flash_data *pdata = dev_get_platdata(&pdev->dev);
 	struct resource *memory = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	struct resource *flash_ambctl = platform_get_resource(pdev, IORESOURCE_MEM, 1);
 	struct async_state *state;
diff --git a/drivers/mtd/maps/cfi_flagadm.c b/drivers/mtd/maps/cfi_flagadm.c
index d16fc9d..d504b3d 100644
--- a/drivers/mtd/maps/cfi_flagadm.c
+++ b/drivers/mtd/maps/cfi_flagadm.c
@@ -55,13 +55,13 @@
 #define FLASH_PARTITION3_SIZE 0x001C0000
 
 
-struct map_info flagadm_map = {
+static struct map_info flagadm_map = {
 		.name =		"FlagaDM flash device",
 		.size =		FLASH_SIZE,
 		.bankwidth =	2,
 };
 
-struct mtd_partition flagadm_parts[] = {
+static struct mtd_partition flagadm_parts[] = {
 	{
 		.name =		"Bootloader",
 		.offset	=	FLASH_PARTITION0_ADDR,
@@ -112,7 +112,7 @@
 		return 0;
 	}
 
-	iounmap((void *)flagadm_map.virt);
+	iounmap((void __iomem *)flagadm_map.virt);
 	return -ENXIO;
 }
 
@@ -123,8 +123,8 @@
 		map_destroy(mymtd);
 	}
 	if (flagadm_map.virt) {
-		iounmap((void *)flagadm_map.virt);
-		flagadm_map.virt = 0;
+		iounmap((void __iomem *)flagadm_map.virt);
+		flagadm_map.virt = NULL;
 	}
 }
 
diff --git a/drivers/mtd/maps/gpio-addr-flash.c b/drivers/mtd/maps/gpio-addr-flash.c
index 5ede282..1adba86 100644
--- a/drivers/mtd/maps/gpio-addr-flash.c
+++ b/drivers/mtd/maps/gpio-addr-flash.c
@@ -196,7 +196,7 @@
 	struct resource *gpios;
 	struct async_state *state;
 
-	pdata = pdev->dev.platform_data;
+	pdata = dev_get_platdata(&pdev->dev);
 	memory = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	gpios = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
 
diff --git a/drivers/mtd/maps/impa7.c b/drivers/mtd/maps/impa7.c
index 4968674..15bbda0 100644
--- a/drivers/mtd/maps/impa7.c
+++ b/drivers/mtd/maps/impa7.c
@@ -79,7 +79,7 @@
 		}
 		simple_map_init(&impa7_map[i]);
 
-		impa7_mtd[i] = 0;
+		impa7_mtd[i] = NULL;
 		type = rom_probe_types;
 		for(; !impa7_mtd[i] && *type; type++) {
 			impa7_mtd[i] = do_map_probe(*type, &impa7_map[i]);
@@ -91,9 +91,9 @@
 			mtd_device_parse_register(impa7_mtd[i], NULL, NULL,
 						  partitions,
 						  ARRAY_SIZE(partitions));
+		} else {
+			iounmap((void __iomem *)impa7_map[i].virt);
 		}
-		else
-			iounmap((void *)impa7_map[i].virt);
 	}
 	return devicesfound == 0 ? -ENXIO : 0;
 }
@@ -105,8 +105,8 @@
 		if (impa7_mtd[i]) {
 			mtd_device_unregister(impa7_mtd[i]);
 			map_destroy(impa7_mtd[i]);
-			iounmap((void *)impa7_map[i].virt);
-			impa7_map[i].virt = 0;
+			iounmap((void __iomem *)impa7_map[i].virt);
+			impa7_map[i].virt = NULL;
 		}
 	}
 }
diff --git a/drivers/mtd/maps/ixp4xx.c b/drivers/mtd/maps/ixp4xx.c
index 52b3410..10debfe 100644
--- a/drivers/mtd/maps/ixp4xx.c
+++ b/drivers/mtd/maps/ixp4xx.c
@@ -152,11 +152,9 @@
 
 static int ixp4xx_flash_remove(struct platform_device *dev)
 {
-	struct flash_platform_data *plat = dev->dev.platform_data;
+	struct flash_platform_data *plat = dev_get_platdata(&dev->dev);
 	struct ixp4xx_flash_info *info = platform_get_drvdata(dev);
 
-	platform_set_drvdata(dev, NULL);
-
 	if(!info)
 		return 0;
 
@@ -180,7 +178,7 @@
 
 static int ixp4xx_flash_probe(struct platform_device *dev)
 {
-	struct flash_platform_data *plat = dev->dev.platform_data;
+	struct flash_platform_data *plat = dev_get_platdata(&dev->dev);
 	struct ixp4xx_flash_info *info;
 	struct mtd_part_parser_data ppdata = {
 		.origin = dev->resource->start,
diff --git a/drivers/mtd/maps/latch-addr-flash.c b/drivers/mtd/maps/latch-addr-flash.c
index ab0fead..98bb5d5 100644
--- a/drivers/mtd/maps/latch-addr-flash.c
+++ b/drivers/mtd/maps/latch-addr-flash.c
@@ -102,9 +102,8 @@
 	info = platform_get_drvdata(dev);
 	if (info == NULL)
 		return 0;
-	platform_set_drvdata(dev, NULL);
 
-	latch_addr_data = dev->dev.platform_data;
+	latch_addr_data = dev_get_platdata(&dev->dev);
 
 	if (info->mtd != NULL) {
 		mtd_device_unregister(info->mtd);
@@ -135,7 +134,7 @@
 	int chipsel;
 	int err;
 
-	latch_addr_data = dev->dev.platform_data;
+	latch_addr_data = dev_get_platdata(&dev->dev);
 	if (latch_addr_data == NULL)
 		return -ENODEV;
 
diff --git a/drivers/mtd/maps/octagon-5066.c b/drivers/mtd/maps/octagon-5066.c
deleted file mode 100644
index 807ac2a..0000000
--- a/drivers/mtd/maps/octagon-5066.c
+++ /dev/null
@@ -1,246 +0,0 @@
-/* ######################################################################
-
-   Octagon 5066 MTD Driver.
-
-   The Octagon 5066 is a SBC based on AMD's 586-WB running at 133 MHZ. It
-   comes with a builtin AMD 29F016 flash chip and a socketed EEPROM that
-   is replacable by flash. Both units are mapped through a multiplexer
-   into a 32k memory window at 0xe8000. The control register for the
-   multiplexing unit is located at IO 0x208 with a bit map of
-     0-5 Page Selection in 32k increments
-     6-7 Device selection:
-        00 SSD off
-        01 SSD 0 (Socket)
-        10 SSD 1 (Flash chip)
-        11 undefined
-
-   On each SSD, the first 128k is reserved for use by the bios
-   (actually it IS the bios..) This only matters if you are booting off the
-   flash, you must not put a file system starting there.
-
-   The driver tries to do a detection algorithm to guess what sort of devices
-   are plugged into the sockets.
-
-   ##################################################################### */
-
-#include <linux/module.h>
-#include <linux/ioport.h>
-#include <linux/init.h>
-#include <asm/io.h>
-
-#include <linux/mtd/map.h>
-#include <linux/mtd/mtd.h>
-
-#define WINDOW_START 0xe8000
-#define WINDOW_LENGTH 0x8000
-#define WINDOW_SHIFT 27
-#define WINDOW_MASK 0x7FFF
-#define PAGE_IO 0x208
-
-static volatile char page_n_dev = 0;
-static unsigned long iomapadr;
-static DEFINE_SPINLOCK(oct5066_spin);
-
-/*
- * We use map_priv_1 to identify which device we are.
- */
-
-static void __oct5066_page(struct map_info *map, __u8 byte)
-{
-	outb(byte,PAGE_IO);
-	page_n_dev = byte;
-}
-
-static inline void oct5066_page(struct map_info *map, unsigned long ofs)
-{
-	__u8 byte = map->map_priv_1 | (ofs >> WINDOW_SHIFT);
-
-	if (page_n_dev != byte)
-		__oct5066_page(map, byte);
-}
-
-
-static map_word oct5066_read8(struct map_info *map, unsigned long ofs)
-{
-	map_word ret;
-	spin_lock(&oct5066_spin);
-	oct5066_page(map, ofs);
-	ret.x[0] = readb(iomapadr + (ofs & WINDOW_MASK));
-	spin_unlock(&oct5066_spin);
-	return ret;
-}
-
-static void oct5066_copy_from(struct map_info *map, void *to, unsigned long from, ssize_t len)
-{
-	while(len) {
-		unsigned long thislen = len;
-		if (len > (WINDOW_LENGTH - (from & WINDOW_MASK)))
-			thislen = WINDOW_LENGTH-(from & WINDOW_MASK);
-
-		spin_lock(&oct5066_spin);
-		oct5066_page(map, from);
-		memcpy_fromio(to, iomapadr + from, thislen);
-		spin_unlock(&oct5066_spin);
-		to += thislen;
-		from += thislen;
-		len -= thislen;
-	}
-}
-
-static void oct5066_write8(struct map_info *map, map_word d, unsigned long adr)
-{
-	spin_lock(&oct5066_spin);
-	oct5066_page(map, adr);
-	writeb(d.x[0], iomapadr + (adr & WINDOW_MASK));
-	spin_unlock(&oct5066_spin);
-}
-
-static void oct5066_copy_to(struct map_info *map, unsigned long to, const void *from, ssize_t len)
-{
-	while(len) {
-		unsigned long thislen = len;
-		if (len > (WINDOW_LENGTH - (to & WINDOW_MASK)))
-			thislen = WINDOW_LENGTH-(to & WINDOW_MASK);
-
-		spin_lock(&oct5066_spin);
-		oct5066_page(map, to);
-		memcpy_toio(iomapadr + to, from, thislen);
-		spin_unlock(&oct5066_spin);
-		to += thislen;
-		from += thislen;
-		len -= thislen;
-	}
-}
-
-static struct map_info oct5066_map[2] = {
-	{
-		.name = "Octagon 5066 Socket",
-		.phys = NO_XIP,
-		.size = 512 * 1024,
-		.bankwidth = 1,
-		.read = oct5066_read8,
-		.copy_from = oct5066_copy_from,
-		.write = oct5066_write8,
-		.copy_to = oct5066_copy_to,
-		.map_priv_1 = 1<<6
-	},
-	{
-		.name = "Octagon 5066 Internal Flash",
-		.phys = NO_XIP,
-		.size = 2 * 1024 * 1024,
-		.bankwidth = 1,
-		.read = oct5066_read8,
-		.copy_from = oct5066_copy_from,
-		.write = oct5066_write8,
-		.copy_to = oct5066_copy_to,
-		.map_priv_1 = 2<<6
-	}
-};
-
-static struct mtd_info *oct5066_mtd[2] = {NULL, NULL};
-
-// OctProbe - Sense if this is an octagon card
-// ---------------------------------------------------------------------
-/* Perform a simple validity test, we map the window select SSD0 and
-   change pages while monitoring the window. A change in the window,
-   controlled by the PAGE_IO port is a functioning 5066 board. This will
-   fail if the thing in the socket is set to a uniform value. */
-static int __init OctProbe(void)
-{
-   unsigned int Base = (1 << 6);
-   unsigned long I;
-   unsigned long Values[10];
-   for (I = 0; I != 20; I++)
-   {
-      outb(Base + (I%10),PAGE_IO);
-      if (I < 10)
-      {
-	 // Record the value and check for uniqueness
-	 Values[I%10] = readl(iomapadr);
-	 if (I > 0 && Values[I%10] == Values[0])
-	    return -EAGAIN;
-      }
-      else
-      {
-	 // Make sure we get the same values on the second pass
-	 if (Values[I%10] != readl(iomapadr))
-	    return -EAGAIN;
-      }
-   }
-   return 0;
-}
-
-void cleanup_oct5066(void)
-{
-	int i;
-	for (i=0; i<2; i++) {
-		if (oct5066_mtd[i]) {
-			mtd_device_unregister(oct5066_mtd[i]);
-			map_destroy(oct5066_mtd[i]);
-		}
-	}
-	iounmap((void *)iomapadr);
-	release_region(PAGE_IO, 1);
-}
-
-static int __init init_oct5066(void)
-{
-	int i;
-	int ret = 0;
-
-	// Do an autoprobe sequence
-	if (!request_region(PAGE_IO,1,"Octagon SSD")) {
-		printk(KERN_NOTICE "5066: Page Register in Use\n");
-		return -EAGAIN;
-	}
-	iomapadr = (unsigned long)ioremap(WINDOW_START, WINDOW_LENGTH);
-	if (!iomapadr) {
-		printk(KERN_NOTICE "Failed to ioremap memory region\n");
-		ret = -EIO;
-		goto out_rel;
-	}
-	if (OctProbe() != 0) {
-		printk(KERN_NOTICE "5066: Octagon Probe Failed, is this an Octagon 5066 SBC?\n");
-		iounmap((void *)iomapadr);
-		ret = -EAGAIN;
-		goto out_unmap;
-	}
-
-	// Print out our little header..
-	printk("Octagon 5066 SSD IO:0x%x MEM:0x%x-0x%x\n",PAGE_IO,WINDOW_START,
-	       WINDOW_START+WINDOW_LENGTH);
-
-	for (i=0; i<2; i++) {
-		oct5066_mtd[i] = do_map_probe("cfi_probe", &oct5066_map[i]);
-		if (!oct5066_mtd[i])
-			oct5066_mtd[i] = do_map_probe("jedec", &oct5066_map[i]);
-		if (!oct5066_mtd[i])
-			oct5066_mtd[i] = do_map_probe("map_ram", &oct5066_map[i]);
-		if (!oct5066_mtd[i])
-			oct5066_mtd[i] = do_map_probe("map_rom", &oct5066_map[i]);
-		if (oct5066_mtd[i]) {
-			oct5066_mtd[i]->owner = THIS_MODULE;
-			mtd_device_register(oct5066_mtd[i], NULL, 0);
-		}
-	}
-
-	if (!oct5066_mtd[0] && !oct5066_mtd[1]) {
-		cleanup_oct5066();
-		return -ENXIO;
-	}
-
-	return 0;
-
- out_unmap:
-	iounmap((void *)iomapadr);
- out_rel:
-	release_region(PAGE_IO, 1);
-	return ret;
-}
-
-module_init(init_oct5066);
-module_exit(cleanup_oct5066);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jason Gunthorpe <jgg@deltatee.com>, David Woodhouse <dwmw2@infradead.org>");
-MODULE_DESCRIPTION("MTD map driver for Octagon 5066 Single Board Computer");
diff --git a/drivers/mtd/maps/physmap.c b/drivers/mtd/maps/physmap.c
index e7a592c..f73cd46 100644
--- a/drivers/mtd/maps/physmap.c
+++ b/drivers/mtd/maps/physmap.c
@@ -40,9 +40,8 @@
 	info = platform_get_drvdata(dev);
 	if (info == NULL)
 		return 0;
-	platform_set_drvdata(dev, NULL);
 
-	physmap_data = dev->dev.platform_data;
+	physmap_data = dev_get_platdata(&dev->dev);
 
 	if (info->cmtd) {
 		mtd_device_unregister(info->cmtd);
@@ -69,7 +68,7 @@
 	unsigned long flags;
 
 	pdev = (struct platform_device *)map->map_priv_1;
-	physmap_data = pdev->dev.platform_data;
+	physmap_data = dev_get_platdata(&pdev->dev);
 
 	if (!physmap_data->set_vpp)
 		return;
@@ -103,7 +102,7 @@
 	int i;
 	int devices_found = 0;
 
-	physmap_data = dev->dev.platform_data;
+	physmap_data = dev_get_platdata(&dev->dev);
 	if (physmap_data == NULL)
 		return -ENODEV;
 
diff --git a/drivers/mtd/maps/plat-ram.c b/drivers/mtd/maps/plat-ram.c
index 71fdda2..6762716 100644
--- a/drivers/mtd/maps/plat-ram.c
+++ b/drivers/mtd/maps/plat-ram.c
@@ -84,8 +84,6 @@
 {
 	struct platram_info *info = to_platram_info(pdev);
 
-	platform_set_drvdata(pdev, NULL);
-
 	dev_dbg(&pdev->dev, "removing device\n");
 
 	if (info == NULL)
@@ -130,13 +128,13 @@
 
 	dev_dbg(&pdev->dev, "probe entered\n");
 
-	if (pdev->dev.platform_data == NULL) {
+	if (dev_get_platdata(&pdev->dev) == NULL) {
 		dev_err(&pdev->dev, "no platform data supplied\n");
 		err = -ENOENT;
 		goto exit_error;
 	}
 
-	pdata = pdev->dev.platform_data;
+	pdata = dev_get_platdata(&pdev->dev);
 
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
 	if (info == NULL) {
diff --git a/drivers/mtd/maps/pxa2xx-flash.c b/drivers/mtd/maps/pxa2xx-flash.c
index acb1dbc..d210d13 100644
--- a/drivers/mtd/maps/pxa2xx-flash.c
+++ b/drivers/mtd/maps/pxa2xx-flash.c
@@ -49,7 +49,7 @@
 
 static int pxa2xx_flash_probe(struct platform_device *pdev)
 {
-	struct flash_platform_data *flash = pdev->dev.platform_data;
+	struct flash_platform_data *flash = dev_get_platdata(&pdev->dev);
 	struct pxa2xx_flash_info *info;
 	struct resource *res;
 
@@ -107,8 +107,6 @@
 {
 	struct pxa2xx_flash_info *info = platform_get_drvdata(dev);
 
-	platform_set_drvdata(dev, NULL);
-
 	mtd_device_unregister(info->mtd);
 
 	map_destroy(info->mtd);
diff --git a/drivers/mtd/maps/rbtx4939-flash.c b/drivers/mtd/maps/rbtx4939-flash.c
index ac02fbf..9352512 100644
--- a/drivers/mtd/maps/rbtx4939-flash.c
+++ b/drivers/mtd/maps/rbtx4939-flash.c
@@ -34,10 +34,9 @@
 	info = platform_get_drvdata(dev);
 	if (!info)
 		return 0;
-	platform_set_drvdata(dev, NULL);
 
 	if (info->mtd) {
-		struct rbtx4939_flash_data *pdata = dev->dev.platform_data;
+		struct rbtx4939_flash_data *pdata = dev_get_platdata(&dev->dev);
 
 		mtd_device_unregister(info->mtd);
 		map_destroy(info->mtd);
@@ -57,7 +56,7 @@
 	int err = 0;
 	unsigned long size;
 
-	pdata = dev->dev.platform_data;
+	pdata = dev_get_platdata(&dev->dev);
 	if (!pdata)
 		return -ENODEV;
 
diff --git a/drivers/mtd/maps/sa1100-flash.c b/drivers/mtd/maps/sa1100-flash.c
index 29e3dca..8fc06bf 100644
--- a/drivers/mtd/maps/sa1100-flash.c
+++ b/drivers/mtd/maps/sa1100-flash.c
@@ -248,7 +248,7 @@
 
 static int sa1100_mtd_probe(struct platform_device *pdev)
 {
-	struct flash_platform_data *plat = pdev->dev.platform_data;
+	struct flash_platform_data *plat = dev_get_platdata(&pdev->dev);
 	struct sa_info *info;
 	int err;
 
@@ -277,9 +277,8 @@
 static int __exit sa1100_mtd_remove(struct platform_device *pdev)
 {
 	struct sa_info *info = platform_get_drvdata(pdev);
-	struct flash_platform_data *plat = pdev->dev.platform_data;
+	struct flash_platform_data *plat = dev_get_platdata(&pdev->dev);
 
-	platform_set_drvdata(pdev, NULL);
 	sa1100_destroy(info, plat);
 
 	return 0;
diff --git a/drivers/mtd/maps/vmax301.c b/drivers/mtd/maps/vmax301.c
deleted file mode 100644
index 5e68de7..0000000
--- a/drivers/mtd/maps/vmax301.c
+++ /dev/null
@@ -1,196 +0,0 @@
-/* ######################################################################
-
-   Tempustech VMAX SBC301 MTD Driver.
-
-   The VMAx 301 is a SBC based on . It
-   comes with three builtin AMD 29F016B flash chips and a socket for SRAM or
-   more flash. Each unit has it's own 8k mapping into a settable region
-   (0xD8000). There are two 8k mappings for each MTD, the first is always set
-   to the lower 8k of the device the second is paged. Writing a 16 bit page
-   value to anywhere in the first 8k will cause the second 8k to page around.
-
-   To boot the device a bios extension must be installed into the first 8k
-   of flash that is smart enough to copy itself down, page in the rest of
-   itself and begin executing.
-
-   ##################################################################### */
-
-#include <linux/module.h>
-#include <linux/ioport.h>
-#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <asm/io.h>
-
-#include <linux/mtd/map.h>
-#include <linux/mtd/mtd.h>
-
-
-#define WINDOW_START 0xd8000
-#define WINDOW_LENGTH 0x2000
-#define WINDOW_SHIFT 25
-#define WINDOW_MASK 0x1FFF
-
-/* Actually we could use two spinlocks, but we'd have to have
-   more private space in the struct map_info. We lose a little
-   performance like this, but we'd probably lose more by having
-   the extra indirection from having one of the map->map_priv
-   fields pointing to yet another private struct.
-*/
-static DEFINE_SPINLOCK(vmax301_spin);
-
-static void __vmax301_page(struct map_info *map, unsigned long page)
-{
-	writew(page, map->map_priv_2 - WINDOW_LENGTH);
-	map->map_priv_1 = page;
-}
-
-static inline void vmax301_page(struct map_info *map,
-				  unsigned long ofs)
-{
-	unsigned long page = (ofs >> WINDOW_SHIFT);
-	if (map->map_priv_1 != page)
-		__vmax301_page(map, page);
-}
-
-static map_word vmax301_read8(struct map_info *map, unsigned long ofs)
-{
-	map_word ret;
-	spin_lock(&vmax301_spin);
-	vmax301_page(map, ofs);
-	ret.x[0] = readb(map->map_priv_2 + (ofs & WINDOW_MASK));
-	spin_unlock(&vmax301_spin);
-	return ret;
-}
-
-static void vmax301_copy_from(struct map_info *map, void *to, unsigned long from, ssize_t len)
-{
-	while(len) {
-		unsigned long thislen = len;
-		if (len > (WINDOW_LENGTH - (from & WINDOW_MASK)))
-			thislen = WINDOW_LENGTH-(from & WINDOW_MASK);
-		spin_lock(&vmax301_spin);
-		vmax301_page(map, from);
-		memcpy_fromio(to, map->map_priv_2 + from, thislen);
-		spin_unlock(&vmax301_spin);
-		to += thislen;
-		from += thislen;
-		len -= thislen;
-	}
-}
-
-static void vmax301_write8(struct map_info *map, map_word d, unsigned long adr)
-{
-	spin_lock(&vmax301_spin);
-	vmax301_page(map, adr);
-	writeb(d.x[0], map->map_priv_2 + (adr & WINDOW_MASK));
-	spin_unlock(&vmax301_spin);
-}
-
-static void vmax301_copy_to(struct map_info *map, unsigned long to, const void *from, ssize_t len)
-{
-	while(len) {
-		unsigned long thislen = len;
-		if (len > (WINDOW_LENGTH - (to & WINDOW_MASK)))
-			thislen = WINDOW_LENGTH-(to & WINDOW_MASK);
-
-		spin_lock(&vmax301_spin);
-		vmax301_page(map, to);
-		memcpy_toio(map->map_priv_2 + to, from, thislen);
-		spin_unlock(&vmax301_spin);
-		to += thislen;
-		from += thislen;
-		len -= thislen;
-	}
-}
-
-static struct map_info vmax_map[2] = {
-	{
-		.name = "VMAX301 Internal Flash",
-		.phys = NO_XIP,
-		.size = 3*2*1024*1024,
-		.bankwidth = 1,
-		.read = vmax301_read8,
-		.copy_from = vmax301_copy_from,
-		.write = vmax301_write8,
-		.copy_to = vmax301_copy_to,
-		.map_priv_1 = WINDOW_START + WINDOW_LENGTH,
-		.map_priv_2 = 0xFFFFFFFF
-	},
-	{
-		.name = "VMAX301 Socket",
-		.phys = NO_XIP,
-		.size = 0,
-		.bankwidth = 1,
-		.read = vmax301_read8,
-		.copy_from = vmax301_copy_from,
-		.write = vmax301_write8,
-		.copy_to = vmax301_copy_to,
-		.map_priv_1 = WINDOW_START + (3*WINDOW_LENGTH),
-		.map_priv_2 = 0xFFFFFFFF
-	}
-};
-
-static struct mtd_info *vmax_mtd[2] = {NULL, NULL};
-
-static void __exit cleanup_vmax301(void)
-{
-	int i;
-
-	for (i=0; i<2; i++) {
-		if (vmax_mtd[i]) {
-			mtd_device_unregister(vmax_mtd[i]);
-			map_destroy(vmax_mtd[i]);
-		}
-	}
-	iounmap((void *)vmax_map[0].map_priv_1 - WINDOW_START);
-}
-
-static int __init init_vmax301(void)
-{
-	int i;
-	unsigned long iomapadr;
-	// Print out our little header..
-	printk("Tempustech VMAX 301 MEM:0x%x-0x%x\n",WINDOW_START,
-	       WINDOW_START+4*WINDOW_LENGTH);
-
-	iomapadr = (unsigned long)ioremap(WINDOW_START, WINDOW_LENGTH*4);
-	if (!iomapadr) {
-		printk("Failed to ioremap memory region\n");
-		return -EIO;
-	}
-	/* Put the address in the map's private data area.
-	   We store the actual MTD IO address rather than the
-	   address of the first half, because it's used more
-	   often.
-	*/
-	vmax_map[0].map_priv_2 = iomapadr + WINDOW_START;
-	vmax_map[1].map_priv_2 = iomapadr + (3*WINDOW_START);
-
-	for (i=0; i<2; i++) {
-		vmax_mtd[i] = do_map_probe("cfi_probe", &vmax_map[i]);
-		if (!vmax_mtd[i])
-			vmax_mtd[i] = do_map_probe("jedec", &vmax_map[i]);
-		if (!vmax_mtd[i])
-			vmax_mtd[i] = do_map_probe("map_ram", &vmax_map[i]);
-		if (!vmax_mtd[i])
-			vmax_mtd[i] = do_map_probe("map_rom", &vmax_map[i]);
-		if (vmax_mtd[i]) {
-			vmax_mtd[i]->owner = THIS_MODULE;
-			mtd_device_register(vmax_mtd[i], NULL, 0);
-		}
-	}
-
-	if (!vmax_mtd[0] && !vmax_mtd[1]) {
-		iounmap((void *)iomapadr);
-		return -ENXIO;
-	}
-
-	return 0;
-}
-
-module_init(init_vmax301);
-module_exit(cleanup_vmax301);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("David Woodhouse <dwmw2@infradead.org>");
-MODULE_DESCRIPTION("MTD map driver for Tempustech VMAX SBC301 board");
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 048c823..5e14d54 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -285,6 +285,16 @@
 		   mtd_bitflip_threshold_show,
 		   mtd_bitflip_threshold_store);
 
+static ssize_t mtd_ecc_step_size_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct mtd_info *mtd = dev_get_drvdata(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", mtd->ecc_step_size);
+
+}
+static DEVICE_ATTR(ecc_step_size, S_IRUGO, mtd_ecc_step_size_show, NULL);
+
 static struct attribute *mtd_attrs[] = {
 	&dev_attr_type.attr,
 	&dev_attr_flags.attr,
@@ -296,6 +306,7 @@
 	&dev_attr_numeraseregions.attr,
 	&dev_attr_name.attr,
 	&dev_attr_ecc_strength.attr,
+	&dev_attr_ecc_step_size.attr,
 	&dev_attr_bitflip_threshold.attr,
 	NULL,
 };
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 3014933..6e732c3 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -516,6 +516,7 @@
 	}
 
 	slave->mtd.ecclayout = master->ecclayout;
+	slave->mtd.ecc_step_size = master->ecc_step_size;
 	slave->mtd.ecc_strength = master->ecc_strength;
 	slave->mtd.bitflip_threshold = master->bitflip_threshold;
 
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index c92f0f6..8b33b26 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -1425,7 +1425,7 @@
 		return;
 
 	while ((this_opt = strsep(&parts, ",")) != NULL) {
-		if (strict_strtoul(this_opt, 0, &part) < 0)
+		if (kstrtoul(this_opt, 0, &part) < 0)
 			return;
 
 		if (mtd->index == part)
diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig
index 50543f1..d885298 100644
--- a/drivers/mtd/nand/Kconfig
+++ b/drivers/mtd/nand/Kconfig
@@ -43,6 +43,7 @@
 
 config MTD_NAND_DENALI
         tristate "Support Denali NAND controller"
+        depends on HAS_DMA
         help
 	  Enable support for the Denali NAND controller.  This should be
 	  combined with either the PCI or platform drivers to provide device
@@ -75,7 +76,7 @@
 
 config MTD_NAND_GPIO
 	tristate "GPIO NAND Flash driver"
-	depends on GPIOLIB && ARM
+	depends on GPIOLIB
 	help
 	  This enables a GPIO based NAND flash driver.
 
@@ -354,7 +355,7 @@
 
 config MTD_NAND_PXA3xx
 	tristate "Support for NAND flash devices on PXA3xx"
-	depends on PXA3xx || ARCH_MMP
+	depends on PXA3xx || ARCH_MMP || PLAT_ORION
 	help
 	  This enables the driver for the NAND flash device found on
 	  PXA3xx processors
@@ -432,13 +433,6 @@
 	  devices. You will need to provide platform-specific functions
 	  via platform_data.
 
-config MTD_ALAUDA
-	tristate "MTD driver for Olympus MAUSB-10 and Fujifilm DPC-R1"
-	depends on USB
-	help
-	  These two (and possibly other) Alauda-based cardreaders for
-	  SmartMedia and xD allow raw flash access.
-
 config MTD_NAND_ORION
 	tristate "NAND Flash support for Marvell Orion SoC"
 	depends on PLAT_ORION
diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile
index bb81891..542b568 100644
--- a/drivers/mtd/nand/Makefile
+++ b/drivers/mtd/nand/Makefile
@@ -31,7 +31,6 @@
 obj-$(CONFIG_MTD_NAND_PXA3xx)		+= pxa3xx_nand.o
 obj-$(CONFIG_MTD_NAND_TMIO)		+= tmio_nand.o
 obj-$(CONFIG_MTD_NAND_PLATFORM)		+= plat_nand.o
-obj-$(CONFIG_MTD_ALAUDA)		+= alauda.o
 obj-$(CONFIG_MTD_NAND_PASEMI)		+= pasemi_nand.o
 obj-$(CONFIG_MTD_NAND_ORION)		+= orion_nand.o
 obj-$(CONFIG_MTD_NAND_FSL_ELBC)		+= fsl_elbc_nand.o
diff --git a/drivers/mtd/nand/alauda.c b/drivers/mtd/nand/alauda.c
deleted file mode 100644
index 60a0dfd..0000000
--- a/drivers/mtd/nand/alauda.c
+++ /dev/null
@@ -1,723 +0,0 @@
-/*
- * MTD driver for Alauda chips
- *
- * Copyright (C) 2007 Joern Engel <joern@logfs.org>
- *
- * Based on drivers/usb/usb-skeleton.c which is:
- * Copyright (C) 2001-2004 Greg Kroah-Hartman (greg@kroah.com)
- * and on drivers/usb/storage/alauda.c, which is:
- *   (c) 2005 Daniel Drake <dsd@gentoo.org>
- *
- * Idea and initial work by Arnd Bergmann <arnd@arndb.de>
- */
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/kref.h>
-#include <linux/usb.h>
-#include <linux/mutex.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/nand_ecc.h>
-
-/* Control commands */
-#define ALAUDA_GET_XD_MEDIA_STATUS	0x08
-#define ALAUDA_ACK_XD_MEDIA_CHANGE	0x0a
-#define ALAUDA_GET_XD_MEDIA_SIG		0x86
-
-/* Common prefix */
-#define ALAUDA_BULK_CMD			0x40
-
-/* The two ports */
-#define ALAUDA_PORT_XD			0x00
-#define ALAUDA_PORT_SM			0x01
-
-/* Bulk commands */
-#define ALAUDA_BULK_READ_PAGE		0x84
-#define ALAUDA_BULK_READ_OOB		0x85 /* don't use, there's a chip bug */
-#define ALAUDA_BULK_READ_BLOCK		0x94
-#define ALAUDA_BULK_ERASE_BLOCK		0xa3
-#define ALAUDA_BULK_WRITE_PAGE		0xa4
-#define ALAUDA_BULK_WRITE_BLOCK		0xb4
-#define ALAUDA_BULK_RESET_MEDIA		0xe0
-
-/* Address shifting */
-#define PBA_LO(pba) ((pba & 0xF) << 5)
-#define PBA_HI(pba) (pba >> 3)
-#define PBA_ZONE(pba) (pba >> 11)
-
-#define TIMEOUT HZ
-
-static const struct usb_device_id alauda_table[] = {
-	{ USB_DEVICE(0x0584, 0x0008) },	/* Fujifilm DPC-R1 */
-	{ USB_DEVICE(0x07b4, 0x010a) },	/* Olympus MAUSB-10 */
-	{ }
-};
-MODULE_DEVICE_TABLE(usb, alauda_table);
-
-struct alauda_card {
-	u8	id;		/* id byte */
-	u8	chipshift;	/* 1<<chipshift total size */
-	u8	pageshift;	/* 1<<pageshift page size */
-	u8	blockshift;	/* 1<<blockshift block size */
-};
-
-struct alauda {
-	struct usb_device	*dev;
-	struct usb_interface	*interface;
-	struct mtd_info		*mtd;
-	struct alauda_card	*card;
-	struct mutex		card_mutex;
-	u32			pagemask;
-	u32			bytemask;
-	u32			blockmask;
-	unsigned int		write_out;
-	unsigned int		bulk_in;
-	unsigned int		bulk_out;
-	u8			port;
-	struct kref		kref;
-};
-
-static struct alauda_card alauda_card_ids[] = {
-	/* NAND flash */
-	{ 0x6e, 20, 8, 12},	/* 1 MB */
-	{ 0xe8, 20, 8, 12},	/* 1 MB */
-	{ 0xec, 20, 8, 12},	/* 1 MB */
-	{ 0x64, 21, 8, 12},	/* 2 MB */
-	{ 0xea, 21, 8, 12},	/* 2 MB */
-	{ 0x6b, 22, 9, 13},	/* 4 MB */
-	{ 0xe3, 22, 9, 13},	/* 4 MB */
-	{ 0xe5, 22, 9, 13},	/* 4 MB */
-	{ 0xe6, 23, 9, 13},	/* 8 MB */
-	{ 0x73, 24, 9, 14},	/* 16 MB */
-	{ 0x75, 25, 9, 14},	/* 32 MB */
-	{ 0x76, 26, 9, 14},	/* 64 MB */
-	{ 0x79, 27, 9, 14},	/* 128 MB */
-	{ 0x71, 28, 9, 14},	/* 256 MB */
-
-	/* MASK ROM */
-	{ 0x5d, 21, 9, 13},	/* 2 MB */
-	{ 0xd5, 22, 9, 13},	/* 4 MB */
-	{ 0xd6, 23, 9, 13},	/* 8 MB */
-	{ 0x57, 24, 9, 13},	/* 16 MB */
-	{ 0x58, 25, 9, 13},	/* 32 MB */
-	{ }
-};
-
-static struct alauda_card *get_card(u8 id)
-{
-	struct alauda_card *card;
-
-	for (card = alauda_card_ids; card->id; card++)
-		if (card->id == id)
-			return card;
-	return NULL;
-}
-
-static void alauda_delete(struct kref *kref)
-{
-	struct alauda *al = container_of(kref, struct alauda, kref);
-
-	if (al->mtd) {
-		mtd_device_unregister(al->mtd);
-		kfree(al->mtd);
-	}
-	usb_put_dev(al->dev);
-	kfree(al);
-}
-
-static int alauda_get_media_status(struct alauda *al, void *buf)
-{
-	int ret;
-
-	mutex_lock(&al->card_mutex);
-	ret = usb_control_msg(al->dev, usb_rcvctrlpipe(al->dev, 0),
-			ALAUDA_GET_XD_MEDIA_STATUS, 0xc0, 0, 1, buf, 2, HZ);
-	mutex_unlock(&al->card_mutex);
-	return ret;
-}
-
-static int alauda_ack_media(struct alauda *al)
-{
-	int ret;
-
-	mutex_lock(&al->card_mutex);
-	ret = usb_control_msg(al->dev, usb_sndctrlpipe(al->dev, 0),
-			ALAUDA_ACK_XD_MEDIA_CHANGE, 0x40, 0, 1, NULL, 0, HZ);
-	mutex_unlock(&al->card_mutex);
-	return ret;
-}
-
-static int alauda_get_media_signatures(struct alauda *al, void *buf)
-{
-	int ret;
-
-	mutex_lock(&al->card_mutex);
-	ret = usb_control_msg(al->dev, usb_rcvctrlpipe(al->dev, 0),
-			ALAUDA_GET_XD_MEDIA_SIG, 0xc0, 0, 0, buf, 4, HZ);
-	mutex_unlock(&al->card_mutex);
-	return ret;
-}
-
-static void alauda_reset(struct alauda *al)
-{
-	u8 command[] = {
-		ALAUDA_BULK_CMD, ALAUDA_BULK_RESET_MEDIA, 0, 0,
-		0, 0, 0, 0, al->port
-	};
-	mutex_lock(&al->card_mutex);
-	usb_bulk_msg(al->dev, al->bulk_out, command, 9, NULL, HZ);
-	mutex_unlock(&al->card_mutex);
-}
-
-static void correct_data(void *buf, void *read_ecc,
-		int *corrected, int *uncorrected)
-{
-	u8 calc_ecc[3];
-	int err;
-
-	nand_calculate_ecc(NULL, buf, calc_ecc);
-	err = nand_correct_data(NULL, buf, read_ecc, calc_ecc);
-	if (err) {
-		if (err > 0)
-			(*corrected)++;
-		else
-			(*uncorrected)++;
-	}
-}
-
-struct alauda_sg_request {
-	struct urb *urb[3];
-	struct completion comp;
-};
-
-static void alauda_complete(struct urb *urb)
-{
-	struct completion *comp = urb->context;
-
-	if (comp)
-		complete(comp);
-}
-
-static int __alauda_read_page(struct mtd_info *mtd, loff_t from, void *buf,
-		void *oob)
-{
-	struct alauda_sg_request sg;
-	struct alauda *al = mtd->priv;
-	u32 pba = from >> al->card->blockshift;
-	u32 page = (from >> al->card->pageshift) & al->pagemask;
-	u8 command[] = {
-		ALAUDA_BULK_CMD, ALAUDA_BULK_READ_PAGE, PBA_HI(pba),
-		PBA_ZONE(pba), 0, PBA_LO(pba) + page, 1, 0, al->port
-	};
-	int i, err;
-
-	for (i=0; i<3; i++)
-		sg.urb[i] = NULL;
-
-	err = -ENOMEM;
-	for (i=0; i<3; i++) {
-		sg.urb[i] = usb_alloc_urb(0, GFP_NOIO);
-		if (!sg.urb[i])
-			goto out;
-	}
-	init_completion(&sg.comp);
-	usb_fill_bulk_urb(sg.urb[0], al->dev, al->bulk_out, command, 9,
-			alauda_complete, NULL);
-	usb_fill_bulk_urb(sg.urb[1], al->dev, al->bulk_in, buf, mtd->writesize,
-			alauda_complete, NULL);
-	usb_fill_bulk_urb(sg.urb[2], al->dev, al->bulk_in, oob, 16,
-			alauda_complete, &sg.comp);
-
-	mutex_lock(&al->card_mutex);
-	for (i=0; i<3; i++) {
-		err = usb_submit_urb(sg.urb[i], GFP_NOIO);
-		if (err)
-			goto cancel;
-	}
-	if (!wait_for_completion_timeout(&sg.comp, TIMEOUT)) {
-		err = -ETIMEDOUT;
-cancel:
-		for (i=0; i<3; i++) {
-			usb_kill_urb(sg.urb[i]);
-		}
-	}
-	mutex_unlock(&al->card_mutex);
-
-out:
-	usb_free_urb(sg.urb[0]);
-	usb_free_urb(sg.urb[1]);
-	usb_free_urb(sg.urb[2]);
-	return err;
-}
-
-static int alauda_read_page(struct mtd_info *mtd, loff_t from,
-		void *buf, u8 *oob, int *corrected, int *uncorrected)
-{
-	int err;
-
-	err = __alauda_read_page(mtd, from, buf, oob);
-	if (err)
-		return err;
-	correct_data(buf, oob+13, corrected, uncorrected);
-	correct_data(buf+256, oob+8, corrected, uncorrected);
-	return 0;
-}
-
-static int alauda_write_page(struct mtd_info *mtd, loff_t to, void *buf,
-		void *oob)
-{
-	struct alauda_sg_request sg;
-	struct alauda *al = mtd->priv;
-	u32 pba = to >> al->card->blockshift;
-	u32 page = (to >> al->card->pageshift) & al->pagemask;
-	u8 command[] = {
-		ALAUDA_BULK_CMD, ALAUDA_BULK_WRITE_PAGE, PBA_HI(pba),
-		PBA_ZONE(pba), 0, PBA_LO(pba) + page, 32, 0, al->port
-	};
-	int i, err;
-
-	for (i=0; i<3; i++)
-		sg.urb[i] = NULL;
-
-	err = -ENOMEM;
-	for (i=0; i<3; i++) {
-		sg.urb[i] = usb_alloc_urb(0, GFP_NOIO);
-		if (!sg.urb[i])
-			goto out;
-	}
-	init_completion(&sg.comp);
-	usb_fill_bulk_urb(sg.urb[0], al->dev, al->bulk_out, command, 9,
-			alauda_complete, NULL);
-	usb_fill_bulk_urb(sg.urb[1], al->dev, al->write_out, buf,mtd->writesize,
-			alauda_complete, NULL);
-	usb_fill_bulk_urb(sg.urb[2], al->dev, al->write_out, oob, 16,
-			alauda_complete, &sg.comp);
-
-	mutex_lock(&al->card_mutex);
-	for (i=0; i<3; i++) {
-		err = usb_submit_urb(sg.urb[i], GFP_NOIO);
-		if (err)
-			goto cancel;
-	}
-	if (!wait_for_completion_timeout(&sg.comp, TIMEOUT)) {
-		err = -ETIMEDOUT;
-cancel:
-		for (i=0; i<3; i++) {
-			usb_kill_urb(sg.urb[i]);
-		}
-	}
-	mutex_unlock(&al->card_mutex);
-
-out:
-	usb_free_urb(sg.urb[0]);
-	usb_free_urb(sg.urb[1]);
-	usb_free_urb(sg.urb[2]);
-	return err;
-}
-
-static int alauda_erase_block(struct mtd_info *mtd, loff_t ofs)
-{
-	struct alauda_sg_request sg;
-	struct alauda *al = mtd->priv;
-	u32 pba = ofs >> al->card->blockshift;
-	u8 command[] = {
-		ALAUDA_BULK_CMD, ALAUDA_BULK_ERASE_BLOCK, PBA_HI(pba),
-		PBA_ZONE(pba), 0, PBA_LO(pba), 0x02, 0, al->port
-	};
-	u8 buf[2];
-	int i, err;
-
-	for (i=0; i<2; i++)
-		sg.urb[i] = NULL;
-
-	err = -ENOMEM;
-	for (i=0; i<2; i++) {
-		sg.urb[i] = usb_alloc_urb(0, GFP_NOIO);
-		if (!sg.urb[i])
-			goto out;
-	}
-	init_completion(&sg.comp);
-	usb_fill_bulk_urb(sg.urb[0], al->dev, al->bulk_out, command, 9,
-			alauda_complete, NULL);
-	usb_fill_bulk_urb(sg.urb[1], al->dev, al->bulk_in, buf, 2,
-			alauda_complete, &sg.comp);
-
-	mutex_lock(&al->card_mutex);
-	for (i=0; i<2; i++) {
-		err = usb_submit_urb(sg.urb[i], GFP_NOIO);
-		if (err)
-			goto cancel;
-	}
-	if (!wait_for_completion_timeout(&sg.comp, TIMEOUT)) {
-		err = -ETIMEDOUT;
-cancel:
-		for (i=0; i<2; i++) {
-			usb_kill_urb(sg.urb[i]);
-		}
-	}
-	mutex_unlock(&al->card_mutex);
-
-out:
-	usb_free_urb(sg.urb[0]);
-	usb_free_urb(sg.urb[1]);
-	return err;
-}
-
-static int alauda_read_oob(struct mtd_info *mtd, loff_t from, void *oob)
-{
-	static u8 ignore_buf[512]; /* write only */
-
-	return __alauda_read_page(mtd, from, ignore_buf, oob);
-}
-
-static int alauda_isbad(struct mtd_info *mtd, loff_t ofs)
-{
-	u8 oob[16];
-	int err;
-
-	err = alauda_read_oob(mtd, ofs, oob);
-	if (err)
-		return err;
-
-	/* A block is marked bad if two or more bits are zero */
-	return hweight8(oob[5]) >= 7 ? 0 : 1;
-}
-
-static int alauda_bounce_read(struct mtd_info *mtd, loff_t from, size_t len,
-		size_t *retlen, u_char *buf)
-{
-	struct alauda *al = mtd->priv;
-	void *bounce_buf;
-	int err, corrected=0, uncorrected=0;
-
-	bounce_buf = kmalloc(mtd->writesize, GFP_KERNEL);
-	if (!bounce_buf)
-		return -ENOMEM;
-
-	*retlen = len;
-	while (len) {
-		u8 oob[16];
-		size_t byte = from & al->bytemask;
-		size_t cplen = min(len, mtd->writesize - byte);
-
-		err = alauda_read_page(mtd, from, bounce_buf, oob,
-				&corrected, &uncorrected);
-		if (err)
-			goto out;
-
-		memcpy(buf, bounce_buf + byte, cplen);
-		buf += cplen;
-		from += cplen;
-		len -= cplen;
-	}
-	err = 0;
-	if (corrected)
-		err = 1;	/* return max_bitflips per ecc step */
-	if (uncorrected)
-		err = -EBADMSG;
-out:
-	kfree(bounce_buf);
-	return err;
-}
-
-static int alauda_read(struct mtd_info *mtd, loff_t from, size_t len,
-		size_t *retlen, u_char *buf)
-{
-	struct alauda *al = mtd->priv;
-	int err, corrected=0, uncorrected=0;
-
-	if ((from & al->bytemask) || (len & al->bytemask))
-		return alauda_bounce_read(mtd, from, len, retlen, buf);
-
-	*retlen = len;
-	while (len) {
-		u8 oob[16];
-
-		err = alauda_read_page(mtd, from, buf, oob,
-				&corrected, &uncorrected);
-		if (err)
-			return err;
-
-		buf += mtd->writesize;
-		from += mtd->writesize;
-		len -= mtd->writesize;
-	}
-	err = 0;
-	if (corrected)
-		err = 1;	/* return max_bitflips per ecc step */
-	if (uncorrected)
-		err = -EBADMSG;
-	return err;
-}
-
-static int alauda_write(struct mtd_info *mtd, loff_t to, size_t len,
-		size_t *retlen, const u_char *buf)
-{
-	struct alauda *al = mtd->priv;
-	int err;
-
-	if ((to & al->bytemask) || (len & al->bytemask))
-		return -EINVAL;
-
-	*retlen = len;
-	while (len) {
-		u32 page = (to >> al->card->pageshift) & al->pagemask;
-		u8 oob[16] = {	'h', 'e', 'l', 'l', 'o', 0xff, 0xff, 0xff,
-				0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
-
-		/* don't write to bad blocks */
-		if (page == 0) {
-			err = alauda_isbad(mtd, to);
-			if (err) {
-				return -EIO;
-			}
-		}
-		nand_calculate_ecc(mtd, buf, &oob[13]);
-		nand_calculate_ecc(mtd, buf+256, &oob[8]);
-
-		err = alauda_write_page(mtd, to, (void*)buf, oob);
-		if (err)
-			return err;
-
-		buf += mtd->writesize;
-		to += mtd->writesize;
-		len -= mtd->writesize;
-	}
-	return 0;
-}
-
-static int __alauda_erase(struct mtd_info *mtd, struct erase_info *instr)
-{
-	struct alauda *al = mtd->priv;
-	u32 ofs = instr->addr;
-	u32 len = instr->len;
-	int err;
-
-	if ((ofs & al->blockmask) || (len & al->blockmask))
-		return -EINVAL;
-
-	while (len) {
-		/* don't erase bad blocks */
-		err = alauda_isbad(mtd, ofs);
-		if (err > 0)
-			err = -EIO;
-		if (err < 0)
-			return err;
-
-		err = alauda_erase_block(mtd, ofs);
-		if (err < 0)
-			return err;
-
-		ofs += mtd->erasesize;
-		len -= mtd->erasesize;
-	}
-	return 0;
-}
-
-static int alauda_erase(struct mtd_info *mtd, struct erase_info *instr)
-{
-	int err;
-
-	err = __alauda_erase(mtd, instr);
-	instr->state = err ? MTD_ERASE_FAILED : MTD_ERASE_DONE;
-	mtd_erase_callback(instr);
-	return err;
-}
-
-static int alauda_init_media(struct alauda *al)
-{
-	u8 buf[4], *b0=buf, *b1=buf+1;
-	struct alauda_card *card;
-	struct mtd_info *mtd;
-	int err;
-
-	mtd = kzalloc(sizeof(*mtd), GFP_KERNEL);
-	if (!mtd)
-		return -ENOMEM;
-
-	for (;;) {
-		err = alauda_get_media_status(al, buf);
-		if (err < 0)
-			goto error;
-		if (*b0 & 0x10)
-			break;
-		msleep(20);
-	}
-
-	err = alauda_ack_media(al);
-	if (err)
-		goto error;
-
-	msleep(10);
-
-	err = alauda_get_media_status(al, buf);
-	if (err < 0)
-		goto error;
-
-	if (*b0 != 0x14) {
-		/* media not ready */
-		err = -EIO;
-		goto error;
-	}
-	err = alauda_get_media_signatures(al, buf);
-	if (err < 0)
-		goto error;
-
-	card = get_card(*b1);
-	if (!card) {
-		printk(KERN_ERR"Alauda: unknown card id %02x\n", *b1);
-		err = -EIO;
-		goto error;
-	}
-	printk(KERN_INFO"pagesize=%x\nerasesize=%x\nsize=%xMiB\n",
-			1<<card->pageshift, 1<<card->blockshift,
-			1<<(card->chipshift-20));
-	al->card = card;
-	al->pagemask = (1 << (card->blockshift - card->pageshift)) - 1;
-	al->bytemask = (1 << card->pageshift) - 1;
-	al->blockmask = (1 << card->blockshift) - 1;
-
-	mtd->name = "alauda";
-	mtd->size = 1<<card->chipshift;
-	mtd->erasesize = 1<<card->blockshift;
-	mtd->writesize = 1<<card->pageshift;
-	mtd->type = MTD_NANDFLASH;
-	mtd->flags = MTD_CAP_NANDFLASH;
-	mtd->_read = alauda_read;
-	mtd->_write = alauda_write;
-	mtd->_erase = alauda_erase;
-	mtd->_block_isbad = alauda_isbad;
-	mtd->priv = al;
-	mtd->owner = THIS_MODULE;
-	mtd->ecc_strength = 1;
-
-	err = mtd_device_register(mtd, NULL, 0);
-	if (err) {
-		err = -ENFILE;
-		goto error;
-	}
-
-	al->mtd = mtd;
-	alauda_reset(al); /* no clue whether this is necessary */
-	return 0;
-error:
-	kfree(mtd);
-	return err;
-}
-
-static int alauda_check_media(struct alauda *al)
-{
-	u8 buf[2], *b0 = buf, *b1 = buf+1;
-	int err;
-
-	err = alauda_get_media_status(al, buf);
-	if (err < 0)
-		return err;
-
-	if ((*b1 & 0x01) == 0) {
-		/* door open */
-		return -EIO;
-	}
-	if ((*b0 & 0x80) || ((*b0 & 0x1F) == 0x10)) {
-		/* no media ? */
-		return -EIO;
-	}
-	if (*b0 & 0x08) {
-		/* media change ? */
-		return alauda_init_media(al);
-	}
-	return 0;
-}
-
-static int alauda_probe(struct usb_interface *interface,
-		const struct usb_device_id *id)
-{
-	struct alauda *al;
-	struct usb_host_interface *iface;
-	struct usb_endpoint_descriptor *ep,
-			*ep_in=NULL, *ep_out=NULL, *ep_wr=NULL;
-	int i, err = -ENOMEM;
-
-	al = kzalloc(2*sizeof(*al), GFP_KERNEL);
-	if (!al)
-		goto error;
-
-	kref_init(&al->kref);
-	usb_set_intfdata(interface, al);
-
-	al->dev = usb_get_dev(interface_to_usbdev(interface));
-	al->interface = interface;
-
-	iface = interface->cur_altsetting;
-	for (i = 0; i < iface->desc.bNumEndpoints; ++i) {
-		ep = &iface->endpoint[i].desc;
-
-		if (usb_endpoint_is_bulk_in(ep)) {
-			ep_in = ep;
-		} else if (usb_endpoint_is_bulk_out(ep)) {
-			if (i==0)
-				ep_wr = ep;
-			else
-				ep_out = ep;
-		}
-	}
-	err = -EIO;
-	if (!ep_wr || !ep_in || !ep_out)
-		goto error;
-
-	al->write_out = usb_sndbulkpipe(al->dev,
-			usb_endpoint_num(ep_wr));
-	al->bulk_in = usb_rcvbulkpipe(al->dev,
-			usb_endpoint_num(ep_in));
-	al->bulk_out = usb_sndbulkpipe(al->dev,
-			usb_endpoint_num(ep_out));
-
-	/* second device is identical up to now */
-	memcpy(al+1, al, sizeof(*al));
-
-	mutex_init(&al[0].card_mutex);
-	mutex_init(&al[1].card_mutex);
-
-	al[0].port = ALAUDA_PORT_XD;
-	al[1].port = ALAUDA_PORT_SM;
-
-	dev_info(&interface->dev, "alauda probed\n");
-	alauda_check_media(al);
-	alauda_check_media(al+1);
-
-	return 0;
-
-error:
-	if (al)
-		kref_put(&al->kref, alauda_delete);
-	return err;
-}
-
-static void alauda_disconnect(struct usb_interface *interface)
-{
-	struct alauda *al;
-
-	al = usb_get_intfdata(interface);
-	usb_set_intfdata(interface, NULL);
-
-	/* FIXME: prevent more I/O from starting */
-
-	/* decrement our usage count */
-	if (al)
-		kref_put(&al->kref, alauda_delete);
-
-	dev_info(&interface->dev, "alauda gone");
-}
-
-static struct usb_driver alauda_driver = {
-	.name =		"alauda",
-	.probe =	alauda_probe,
-	.disconnect =	alauda_disconnect,
-	.id_table =	alauda_table,
-};
-
-module_usb_driver(alauda_driver);
-
-MODULE_LICENSE("GPL");
diff --git a/drivers/mtd/nand/ams-delta.c b/drivers/mtd/nand/ams-delta.c
index f1d71cd..8611eb4 100644
--- a/drivers/mtd/nand/ams-delta.c
+++ b/drivers/mtd/nand/ams-delta.c
@@ -258,7 +258,6 @@
  out_mtd:
 	gpio_free_array(_mandatory_gpio, ARRAY_SIZE(_mandatory_gpio));
 out_gpio:
-	platform_set_drvdata(pdev, NULL);
 	gpio_free(AMS_DELTA_GPIO_PIN_NAND_RB);
 	iounmap(io_base);
 out_free:
diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
index 2d23d29..060feea 100644
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c
@@ -18,6 +18,9 @@
  *  Add Programmable Multibit ECC support for various AT91 SoC
  *     © Copyright 2012 ATMEL, Hong Xu
  *
+ *  Add Nand Flash Controller support for SAMA5 SoC
+ *     © Copyright 2013 ATMEL, Josh Wu (josh.wu@atmel.com)
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
@@ -37,13 +40,12 @@
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
 
+#include <linux/delay.h>
 #include <linux/dmaengine.h>
 #include <linux/gpio.h>
+#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/platform_data/atmel.h>
-#include <linux/pinctrl/consumer.h>
-
-#include <mach/cpu.h>
 
 static int use_dma = 1;
 module_param(use_dma, int, 0);
@@ -58,6 +60,7 @@
 	__raw_writel((value), add + ATMEL_ECC_##reg)
 
 #include "atmel_nand_ecc.h"	/* Hardware ECC registers */
+#include "atmel_nand_nfc.h"	/* Nand Flash Controller definition */
 
 /* oob layout for large page size
  * bad block info is on bytes 0 and 1
@@ -85,6 +88,23 @@
 	},
 };
 
+struct atmel_nfc {
+	void __iomem		*base_cmd_regs;
+	void __iomem		*hsmc_regs;
+	void __iomem		*sram_bank0;
+	dma_addr_t		sram_bank0_phys;
+	bool			use_nfc_sram;
+	bool			write_by_sram;
+
+	bool			is_initialized;
+	struct completion	comp_nfc;
+
+	/* Point to the sram bank which include readed data via NFC */
+	void __iomem		*data_in_sram;
+	bool			will_write_sram;
+};
+static struct atmel_nfc	nand_nfc;
+
 struct atmel_nand_host {
 	struct nand_chip	nand_chip;
 	struct mtd_info		mtd;
@@ -97,6 +117,8 @@
 	struct completion	comp;
 	struct dma_chan		*dma_chan;
 
+	struct atmel_nfc	*nfc;
+
 	bool			has_pmecc;
 	u8			pmecc_corr_cap;
 	u16			pmecc_sector_size;
@@ -128,11 +150,6 @@
 
 static struct nand_ecclayout atmel_pmecc_oobinfo;
 
-static int cpu_has_dma(void)
-{
-	return cpu_is_at91sam9rl() || cpu_is_at91sam9g45();
-}
-
 /*
  * Enable NAND.
  */
@@ -186,21 +203,103 @@
                 !!host->board.rdy_pin_active_low;
 }
 
+/* Set up for hardware ready pin and enable pin. */
+static int atmel_nand_set_enable_ready_pins(struct mtd_info *mtd)
+{
+	struct nand_chip *chip = mtd->priv;
+	struct atmel_nand_host *host = chip->priv;
+	int res = 0;
+
+	if (gpio_is_valid(host->board.rdy_pin)) {
+		res = devm_gpio_request(host->dev,
+				host->board.rdy_pin, "nand_rdy");
+		if (res < 0) {
+			dev_err(host->dev,
+				"can't request rdy gpio %d\n",
+				host->board.rdy_pin);
+			return res;
+		}
+
+		res = gpio_direction_input(host->board.rdy_pin);
+		if (res < 0) {
+			dev_err(host->dev,
+				"can't request input direction rdy gpio %d\n",
+				host->board.rdy_pin);
+			return res;
+		}
+
+		chip->dev_ready = atmel_nand_device_ready;
+	}
+
+	if (gpio_is_valid(host->board.enable_pin)) {
+		res = devm_gpio_request(host->dev,
+				host->board.enable_pin, "nand_enable");
+		if (res < 0) {
+			dev_err(host->dev,
+				"can't request enable gpio %d\n",
+				host->board.enable_pin);
+			return res;
+		}
+
+		res = gpio_direction_output(host->board.enable_pin, 1);
+		if (res < 0) {
+			dev_err(host->dev,
+				"can't request output direction enable gpio %d\n",
+				host->board.enable_pin);
+			return res;
+		}
+	}
+
+	return res;
+}
+
+static void memcpy32_fromio(void *trg, const void __iomem  *src, size_t size)
+{
+	int i;
+	u32 *t = trg;
+	const __iomem u32 *s = src;
+
+	for (i = 0; i < (size >> 2); i++)
+		*t++ = readl_relaxed(s++);
+}
+
+static void memcpy32_toio(void __iomem *trg, const void *src, int size)
+{
+	int i;
+	u32 __iomem *t = trg;
+	const u32 *s = src;
+
+	for (i = 0; i < (size >> 2); i++)
+		writel_relaxed(*s++, t++);
+}
+
 /*
  * Minimal-overhead PIO for data access.
  */
 static void atmel_read_buf8(struct mtd_info *mtd, u8 *buf, int len)
 {
 	struct nand_chip	*nand_chip = mtd->priv;
+	struct atmel_nand_host *host = nand_chip->priv;
 
-	__raw_readsb(nand_chip->IO_ADDR_R, buf, len);
+	if (host->nfc && host->nfc->use_nfc_sram && host->nfc->data_in_sram) {
+		memcpy32_fromio(buf, host->nfc->data_in_sram, len);
+		host->nfc->data_in_sram += len;
+	} else {
+		__raw_readsb(nand_chip->IO_ADDR_R, buf, len);
+	}
 }
 
 static void atmel_read_buf16(struct mtd_info *mtd, u8 *buf, int len)
 {
 	struct nand_chip	*nand_chip = mtd->priv;
+	struct atmel_nand_host *host = nand_chip->priv;
 
-	__raw_readsw(nand_chip->IO_ADDR_R, buf, len / 2);
+	if (host->nfc && host->nfc->use_nfc_sram && host->nfc->data_in_sram) {
+		memcpy32_fromio(buf, host->nfc->data_in_sram, len);
+		host->nfc->data_in_sram += len;
+	} else {
+		__raw_readsw(nand_chip->IO_ADDR_R, buf, len / 2);
+	}
 }
 
 static void atmel_write_buf8(struct mtd_info *mtd, const u8 *buf, int len)
@@ -222,6 +321,40 @@
 	complete(completion);
 }
 
+static int nfc_set_sram_bank(struct atmel_nand_host *host, unsigned int bank)
+{
+	/* NFC only has two banks. Must be 0 or 1 */
+	if (bank > 1)
+		return -EINVAL;
+
+	if (bank) {
+		/* Only for a 2k-page or lower flash, NFC can handle 2 banks */
+		if (host->mtd.writesize > 2048)
+			return -EINVAL;
+		nfc_writel(host->nfc->hsmc_regs, BANK, ATMEL_HSMC_NFC_BANK1);
+	} else {
+		nfc_writel(host->nfc->hsmc_regs, BANK, ATMEL_HSMC_NFC_BANK0);
+	}
+
+	return 0;
+}
+
+static uint nfc_get_sram_off(struct atmel_nand_host *host)
+{
+	if (nfc_readl(host->nfc->hsmc_regs, BANK) & ATMEL_HSMC_NFC_BANK1)
+		return NFC_SRAM_BANK1_OFFSET;
+	else
+		return 0;
+}
+
+static dma_addr_t nfc_sram_phys(struct atmel_nand_host *host)
+{
+	if (nfc_readl(host->nfc->hsmc_regs, BANK) & ATMEL_HSMC_NFC_BANK1)
+		return host->nfc->sram_bank0_phys + NFC_SRAM_BANK1_OFFSET;
+	else
+		return host->nfc->sram_bank0_phys;
+}
+
 static int atmel_nand_dma_op(struct mtd_info *mtd, void *buf, int len,
 			       int is_read)
 {
@@ -235,6 +368,7 @@
 	void *p = buf;
 	int err = -EIO;
 	enum dma_data_direction dir = is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+	struct atmel_nfc *nfc = host->nfc;
 
 	if (buf >= high_memory)
 		goto err_buf;
@@ -251,11 +385,20 @@
 	}
 
 	if (is_read) {
-		dma_src_addr = host->io_phys;
+		if (nfc && nfc->data_in_sram)
+			dma_src_addr = nfc_sram_phys(host) + (nfc->data_in_sram
+				- (nfc->sram_bank0 + nfc_get_sram_off(host)));
+		else
+			dma_src_addr = host->io_phys;
+
 		dma_dst_addr = phys_addr;
 	} else {
 		dma_src_addr = phys_addr;
-		dma_dst_addr = host->io_phys;
+
+		if (nfc && nfc->write_by_sram)
+			dma_dst_addr = nfc_sram_phys(host);
+		else
+			dma_dst_addr = host->io_phys;
 	}
 
 	tx = dma_dev->device_prep_dma_memcpy(host->dma_chan, dma_dst_addr,
@@ -278,6 +421,10 @@
 	dma_async_issue_pending(host->dma_chan);
 	wait_for_completion(&host->comp);
 
+	if (is_read && nfc && nfc->data_in_sram)
+		/* After read data from SRAM, need to increase the position */
+		nfc->data_in_sram += len;
+
 	err = 0;
 
 err_dma:
@@ -366,43 +513,34 @@
 			table_size * sizeof(int16_t);
 }
 
-static void pmecc_data_free(struct atmel_nand_host *host)
-{
-	kfree(host->pmecc_partial_syn);
-	kfree(host->pmecc_si);
-	kfree(host->pmecc_lmu);
-	kfree(host->pmecc_smu);
-	kfree(host->pmecc_mu);
-	kfree(host->pmecc_dmu);
-	kfree(host->pmecc_delta);
-}
-
 static int pmecc_data_alloc(struct atmel_nand_host *host)
 {
 	const int cap = host->pmecc_corr_cap;
+	int size;
 
-	host->pmecc_partial_syn = kzalloc((2 * cap + 1) * sizeof(int16_t),
-					GFP_KERNEL);
-	host->pmecc_si = kzalloc((2 * cap + 1) * sizeof(int16_t), GFP_KERNEL);
-	host->pmecc_lmu = kzalloc((cap + 1) * sizeof(int16_t), GFP_KERNEL);
-	host->pmecc_smu = kzalloc((cap + 2) * (2 * cap + 1) * sizeof(int16_t),
-					GFP_KERNEL);
-	host->pmecc_mu = kzalloc((cap + 1) * sizeof(int), GFP_KERNEL);
-	host->pmecc_dmu = kzalloc((cap + 1) * sizeof(int), GFP_KERNEL);
-	host->pmecc_delta = kzalloc((cap + 1) * sizeof(int), GFP_KERNEL);
+	size = (2 * cap + 1) * sizeof(int16_t);
+	host->pmecc_partial_syn = devm_kzalloc(host->dev, size, GFP_KERNEL);
+	host->pmecc_si = devm_kzalloc(host->dev, size, GFP_KERNEL);
+	host->pmecc_lmu = devm_kzalloc(host->dev,
+			(cap + 1) * sizeof(int16_t), GFP_KERNEL);
+	host->pmecc_smu = devm_kzalloc(host->dev,
+			(cap + 2) * size, GFP_KERNEL);
 
-	if (host->pmecc_partial_syn &&
-			host->pmecc_si &&
-			host->pmecc_lmu &&
-			host->pmecc_smu &&
-			host->pmecc_mu &&
-			host->pmecc_dmu &&
-			host->pmecc_delta)
-		return 0;
+	size = (cap + 1) * sizeof(int);
+	host->pmecc_mu = devm_kzalloc(host->dev, size, GFP_KERNEL);
+	host->pmecc_dmu = devm_kzalloc(host->dev, size, GFP_KERNEL);
+	host->pmecc_delta = devm_kzalloc(host->dev, size, GFP_KERNEL);
 
-	/* error happened */
-	pmecc_data_free(host);
-	return -ENOMEM;
+	if (!host->pmecc_partial_syn ||
+		!host->pmecc_si ||
+		!host->pmecc_lmu ||
+		!host->pmecc_smu ||
+		!host->pmecc_mu ||
+		!host->pmecc_dmu ||
+		!host->pmecc_delta)
+		return -ENOMEM;
+
+	return 0;
 }
 
 static void pmecc_gen_syndrome(struct mtd_info *mtd, int sector)
@@ -763,6 +901,30 @@
 	return total_err;
 }
 
+static void pmecc_enable(struct atmel_nand_host *host, int ecc_op)
+{
+	u32 val;
+
+	if (ecc_op != NAND_ECC_READ && ecc_op != NAND_ECC_WRITE) {
+		dev_err(host->dev, "atmel_nand: wrong pmecc operation type!");
+		return;
+	}
+
+	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_RST);
+	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_DISABLE);
+	val = pmecc_readl_relaxed(host->ecc, CFG);
+
+	if (ecc_op == NAND_ECC_READ)
+		pmecc_writel(host->ecc, CFG, (val & ~PMECC_CFG_WRITE_OP)
+			| PMECC_CFG_AUTO_ENABLE);
+	else
+		pmecc_writel(host->ecc, CFG, (val | PMECC_CFG_WRITE_OP)
+			& ~PMECC_CFG_AUTO_ENABLE);
+
+	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_ENABLE);
+	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_DATA);
+}
+
 static int atmel_nand_pmecc_read_page(struct mtd_info *mtd,
 	struct nand_chip *chip, uint8_t *buf, int oob_required, int page)
 {
@@ -774,13 +936,8 @@
 	unsigned long end_time;
 	int bitflips = 0;
 
-	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_RST);
-	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_DISABLE);
-	pmecc_writel(host->ecc, CFG, (pmecc_readl_relaxed(host->ecc, CFG)
-		& ~PMECC_CFG_WRITE_OP) | PMECC_CFG_AUTO_ENABLE);
-
-	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_ENABLE);
-	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_DATA);
+	if (!host->nfc || !host->nfc->use_nfc_sram)
+		pmecc_enable(host, NAND_ECC_READ);
 
 	chip->read_buf(mtd, buf, eccsize);
 	chip->read_buf(mtd, oob, mtd->oobsize);
@@ -813,16 +970,10 @@
 	int i, j;
 	unsigned long end_time;
 
-	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_RST);
-	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_DISABLE);
-
-	pmecc_writel(host->ecc, CFG, (pmecc_readl_relaxed(host->ecc, CFG) |
-		PMECC_CFG_WRITE_OP) & ~PMECC_CFG_AUTO_ENABLE);
-
-	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_ENABLE);
-	pmecc_writel(host->ecc, CTRL, PMECC_CTRL_DATA);
-
-	chip->write_buf(mtd, (u8 *)buf, mtd->writesize);
+	if (!host->nfc || !host->nfc->write_by_sram) {
+		pmecc_enable(host, NAND_ECC_WRITE);
+		chip->write_buf(mtd, (u8 *)buf, mtd->writesize);
+	}
 
 	end_time = jiffies + msecs_to_jiffies(PMECC_MAX_TIMEOUT_MS);
 	while ((pmecc_readl_relaxed(host->ecc, SR) & PMECC_SR_BUSY)) {
@@ -967,11 +1118,11 @@
 			host->pmecc_corr_cap = 2;
 		else if (*cap <= 4)
 			host->pmecc_corr_cap = 4;
-		else if (*cap < 8)
+		else if (*cap <= 8)
 			host->pmecc_corr_cap = 8;
-		else if (*cap < 12)
+		else if (*cap <= 12)
 			host->pmecc_corr_cap = 12;
-		else if (*cap < 24)
+		else if (*cap <= 24)
 			host->pmecc_corr_cap = 24;
 		else
 			return -EINVAL;
@@ -1002,7 +1153,7 @@
 		return err_no;
 	}
 
-	if (cap != host->pmecc_corr_cap ||
+	if (cap > host->pmecc_corr_cap ||
 			sector_size != host->pmecc_sector_size)
 		dev_info(host->dev, "WARNING: Be Caution! Using different PMECC parameters from Nand ONFI ECC reqirement.\n");
 
@@ -1023,27 +1174,28 @@
 		return 0;
 	}
 
-	host->ecc = ioremap(regs->start, resource_size(regs));
-	if (host->ecc == NULL) {
+	host->ecc = devm_ioremap_resource(&pdev->dev, regs);
+	if (IS_ERR(host->ecc)) {
 		dev_err(host->dev, "ioremap failed\n");
-		err_no = -EIO;
-		goto err_pmecc_ioremap;
+		err_no = PTR_ERR(host->ecc);
+		goto err;
 	}
 
 	regs_pmerr = platform_get_resource(pdev, IORESOURCE_MEM, 2);
-	regs_rom = platform_get_resource(pdev, IORESOURCE_MEM, 3);
-	if (regs_pmerr && regs_rom) {
-		host->pmerrloc_base = ioremap(regs_pmerr->start,
-			resource_size(regs_pmerr));
-		host->pmecc_rom_base = ioremap(regs_rom->start,
-			resource_size(regs_rom));
+	host->pmerrloc_base = devm_ioremap_resource(&pdev->dev, regs_pmerr);
+	if (IS_ERR(host->pmerrloc_base)) {
+		dev_err(host->dev,
+			"Can not get I/O resource for PMECC ERRLOC controller!\n");
+		err_no = PTR_ERR(host->pmerrloc_base);
+		goto err;
 	}
 
-	if (!host->pmerrloc_base || !host->pmecc_rom_base) {
-		dev_err(host->dev,
-			"Can not get I/O resource for PMECC ERRLOC controller or ROM!\n");
-		err_no = -EIO;
-		goto err_pmloc_ioremap;
+	regs_rom = platform_get_resource(pdev, IORESOURCE_MEM, 3);
+	host->pmecc_rom_base = devm_ioremap_resource(&pdev->dev, regs_rom);
+	if (IS_ERR(host->pmecc_rom_base)) {
+		dev_err(host->dev, "Can not get I/O resource for ROM!\n");
+		err_no = PTR_ERR(host->pmecc_rom_base);
+		goto err;
 	}
 
 	/* ECC is calculated for the whole page (1 step) */
@@ -1052,7 +1204,8 @@
 	/* set ECC page size and oob layout */
 	switch (mtd->writesize) {
 	case 2048:
-		host->pmecc_degree = PMECC_GF_DIMENSION_13;
+		host->pmecc_degree = (sector_size == 512) ?
+			PMECC_GF_DIMENSION_13 : PMECC_GF_DIMENSION_14;
 		host->pmecc_cw_len = (1 << host->pmecc_degree) - 1;
 		host->pmecc_sector_number = mtd->writesize / sector_size;
 		host->pmecc_bytes_per_sector = pmecc_get_ecc_bytes(
@@ -1068,7 +1221,7 @@
 		if (nand_chip->ecc.bytes > mtd->oobsize - 2) {
 			dev_err(host->dev, "No room for ECC bytes\n");
 			err_no = -EINVAL;
-			goto err_no_ecc_room;
+			goto err;
 		}
 		pmecc_config_ecc_layout(&atmel_pmecc_oobinfo,
 					mtd->oobsize,
@@ -1093,7 +1246,7 @@
 	if (err_no) {
 		dev_err(host->dev,
 				"Cannot allocate memory for PMECC computation!\n");
-		goto err_pmecc_data_alloc;
+		goto err;
 	}
 
 	nand_chip->ecc.read_page = atmel_nand_pmecc_read_page;
@@ -1103,15 +1256,7 @@
 
 	return 0;
 
-err_pmecc_data_alloc:
-err_no_ecc_room:
-err_pmloc_ioremap:
-	iounmap(host->ecc);
-	if (host->pmerrloc_base)
-		iounmap(host->pmerrloc_base);
-	if (host->pmecc_rom_base)
-		iounmap(host->pmecc_rom_base);
-err_pmecc_ioremap:
+err:
 	return err_no;
 }
 
@@ -1174,10 +1319,9 @@
 	 * Workaround: Reset the parity registers before reading the
 	 * actual data.
 	 */
-	if (cpu_is_at32ap7000()) {
-		struct atmel_nand_host *host = chip->priv;
+	struct atmel_nand_host *host = chip->priv;
+	if (host->board.need_reset_workaround)
 		ecc_writel(host->ecc, CR, ATMEL_ECC_RST);
-	}
 
 	/* read the page */
 	chip->read_buf(mtd, p, eccsize);
@@ -1298,11 +1442,11 @@
  */
 static void atmel_nand_hwctl(struct mtd_info *mtd, int mode)
 {
-	if (cpu_is_at32ap7000()) {
-		struct nand_chip *nand_chip = mtd->priv;
-		struct atmel_nand_host *host = nand_chip->priv;
+	struct nand_chip *nand_chip = mtd->priv;
+	struct atmel_nand_host *host = nand_chip->priv;
+
+	if (host->board.need_reset_workaround)
 		ecc_writel(host->ecc, CR, ATMEL_ECC_RST);
-	}
 }
 
 #if defined(CONFIG_OF)
@@ -1337,6 +1481,8 @@
 
 	board->on_flash_bbt = of_get_nand_on_flash_bbt(np);
 
+	board->has_dma = of_property_read_bool(np, "atmel,nand-has-dma");
+
 	if (of_get_nand_bus_width(np) == 16)
 		board->bus_width_16 = 1;
 
@@ -1348,6 +1494,9 @@
 
 	host->has_pmecc = of_property_read_bool(np, "atmel,has-pmecc");
 
+	/* load the nfc driver if there is */
+	of_platform_populate(np, NULL, NULL, host->dev);
+
 	if (!(board->ecc_mode == NAND_ECC_HW) || !host->has_pmecc)
 		return 0;	/* Not using PMECC */
 
@@ -1414,10 +1563,10 @@
 		return 0;
 	}
 
-	host->ecc = ioremap(regs->start, resource_size(regs));
-	if (host->ecc == NULL) {
+	host->ecc = devm_ioremap_resource(&pdev->dev, regs);
+	if (IS_ERR(host->ecc)) {
 		dev_err(host->dev, "ioremap failed\n");
-		return -EIO;
+		return PTR_ERR(host->ecc);
 	}
 
 	/* ECC is calculated for the whole page (1 step) */
@@ -1459,6 +1608,382 @@
 	return 0;
 }
 
+/* SMC interrupt service routine */
+static irqreturn_t hsmc_interrupt(int irq, void *dev_id)
+{
+	struct atmel_nand_host *host = dev_id;
+	u32 status, mask, pending;
+	irqreturn_t ret = IRQ_HANDLED;
+
+	status = nfc_readl(host->nfc->hsmc_regs, SR);
+	mask = nfc_readl(host->nfc->hsmc_regs, IMR);
+	pending = status & mask;
+
+	if (pending & NFC_SR_XFR_DONE) {
+		complete(&host->nfc->comp_nfc);
+		nfc_writel(host->nfc->hsmc_regs, IDR, NFC_SR_XFR_DONE);
+	} else if (pending & NFC_SR_RB_EDGE) {
+		complete(&host->nfc->comp_nfc);
+		nfc_writel(host->nfc->hsmc_regs, IDR, NFC_SR_RB_EDGE);
+	} else if (pending & NFC_SR_CMD_DONE) {
+		complete(&host->nfc->comp_nfc);
+		nfc_writel(host->nfc->hsmc_regs, IDR, NFC_SR_CMD_DONE);
+	} else {
+		ret = IRQ_NONE;
+	}
+
+	return ret;
+}
+
+/* NFC(Nand Flash Controller) related functions */
+static int nfc_wait_interrupt(struct atmel_nand_host *host, u32 flag)
+{
+	unsigned long timeout;
+	init_completion(&host->nfc->comp_nfc);
+
+	/* Enable interrupt that need to wait for */
+	nfc_writel(host->nfc->hsmc_regs, IER, flag);
+
+	timeout = wait_for_completion_timeout(&host->nfc->comp_nfc,
+			msecs_to_jiffies(NFC_TIME_OUT_MS));
+	if (timeout)
+		return 0;
+
+	/* Time out to wait for the interrupt */
+	dev_err(host->dev, "Time out to wait for interrupt: 0x%08x\n", flag);
+	return -ETIMEDOUT;
+}
+
+static int nfc_send_command(struct atmel_nand_host *host,
+	unsigned int cmd, unsigned int addr, unsigned char cycle0)
+{
+	unsigned long timeout;
+	dev_dbg(host->dev,
+		"nfc_cmd: 0x%08x, addr1234: 0x%08x, cycle0: 0x%02x\n",
+		cmd, addr, cycle0);
+
+	timeout = jiffies + msecs_to_jiffies(NFC_TIME_OUT_MS);
+	while (nfc_cmd_readl(NFCADDR_CMD_NFCBUSY, host->nfc->base_cmd_regs)
+			& NFCADDR_CMD_NFCBUSY) {
+		if (time_after(jiffies, timeout)) {
+			dev_err(host->dev,
+				"Time out to wait CMD_NFCBUSY ready!\n");
+			return -ETIMEDOUT;
+		}
+	}
+	nfc_writel(host->nfc->hsmc_regs, CYCLE0, cycle0);
+	nfc_cmd_addr1234_writel(cmd, addr, host->nfc->base_cmd_regs);
+	return nfc_wait_interrupt(host, NFC_SR_CMD_DONE);
+}
+
+static int nfc_device_ready(struct mtd_info *mtd)
+{
+	struct nand_chip *nand_chip = mtd->priv;
+	struct atmel_nand_host *host = nand_chip->priv;
+	if (!nfc_wait_interrupt(host, NFC_SR_RB_EDGE))
+		return 1;
+	return 0;
+}
+
+static void nfc_select_chip(struct mtd_info *mtd, int chip)
+{
+	struct nand_chip *nand_chip = mtd->priv;
+	struct atmel_nand_host *host = nand_chip->priv;
+
+	if (chip == -1)
+		nfc_writel(host->nfc->hsmc_regs, CTRL, NFC_CTRL_DISABLE);
+	else
+		nfc_writel(host->nfc->hsmc_regs, CTRL, NFC_CTRL_ENABLE);
+}
+
+static int nfc_make_addr(struct mtd_info *mtd, int column, int page_addr,
+		unsigned int *addr1234, unsigned int *cycle0)
+{
+	struct nand_chip *chip = mtd->priv;
+
+	int acycle = 0;
+	unsigned char addr_bytes[8];
+	int index = 0, bit_shift;
+
+	BUG_ON(addr1234 == NULL || cycle0 == NULL);
+
+	*cycle0 = 0;
+	*addr1234 = 0;
+
+	if (column != -1) {
+		if (chip->options & NAND_BUSWIDTH_16)
+			column >>= 1;
+		addr_bytes[acycle++] = column & 0xff;
+		if (mtd->writesize > 512)
+			addr_bytes[acycle++] = (column >> 8) & 0xff;
+	}
+
+	if (page_addr != -1) {
+		addr_bytes[acycle++] = page_addr & 0xff;
+		addr_bytes[acycle++] = (page_addr >> 8) & 0xff;
+		if (chip->chipsize > (128 << 20))
+			addr_bytes[acycle++] = (page_addr >> 16) & 0xff;
+	}
+
+	if (acycle > 4)
+		*cycle0 = addr_bytes[index++];
+
+	for (bit_shift = 0; index < acycle; bit_shift += 8)
+		*addr1234 += addr_bytes[index++] << bit_shift;
+
+	/* return acycle in cmd register */
+	return acycle << NFCADDR_CMD_ACYCLE_BIT_POS;
+}
+
+static void nfc_nand_command(struct mtd_info *mtd, unsigned int command,
+				int column, int page_addr)
+{
+	struct nand_chip *chip = mtd->priv;
+	struct atmel_nand_host *host = chip->priv;
+	unsigned long timeout;
+	unsigned int nfc_addr_cmd = 0;
+
+	unsigned int cmd1 = command << NFCADDR_CMD_CMD1_BIT_POS;
+
+	/* Set default settings: no cmd2, no addr cycle. read from nand */
+	unsigned int cmd2 = 0;
+	unsigned int vcmd2 = 0;
+	int acycle = NFCADDR_CMD_ACYCLE_NONE;
+	int csid = NFCADDR_CMD_CSID_3;
+	int dataen = NFCADDR_CMD_DATADIS;
+	int nfcwr = NFCADDR_CMD_NFCRD;
+	unsigned int addr1234 = 0;
+	unsigned int cycle0 = 0;
+	bool do_addr = true;
+	host->nfc->data_in_sram = NULL;
+
+	dev_dbg(host->dev, "%s: cmd = 0x%02x, col = 0x%08x, page = 0x%08x\n",
+	     __func__, command, column, page_addr);
+
+	switch (command) {
+	case NAND_CMD_RESET:
+		nfc_addr_cmd = cmd1 | acycle | csid | dataen | nfcwr;
+		nfc_send_command(host, nfc_addr_cmd, addr1234, cycle0);
+		udelay(chip->chip_delay);
+
+		nfc_nand_command(mtd, NAND_CMD_STATUS, -1, -1);
+		timeout = jiffies + msecs_to_jiffies(NFC_TIME_OUT_MS);
+		while (!(chip->read_byte(mtd) & NAND_STATUS_READY)) {
+			if (time_after(jiffies, timeout)) {
+				dev_err(host->dev,
+					"Time out to wait status ready!\n");
+				break;
+			}
+		}
+		return;
+	case NAND_CMD_STATUS:
+		do_addr = false;
+		break;
+	case NAND_CMD_PARAM:
+	case NAND_CMD_READID:
+		do_addr = false;
+		acycle = NFCADDR_CMD_ACYCLE_1;
+		if (column != -1)
+			addr1234 = column;
+		break;
+	case NAND_CMD_RNDOUT:
+		cmd2 = NAND_CMD_RNDOUTSTART << NFCADDR_CMD_CMD2_BIT_POS;
+		vcmd2 = NFCADDR_CMD_VCMD2;
+		break;
+	case NAND_CMD_READ0:
+	case NAND_CMD_READOOB:
+		if (command == NAND_CMD_READOOB) {
+			column += mtd->writesize;
+			command = NAND_CMD_READ0; /* only READ0 is valid */
+			cmd1 = command << NFCADDR_CMD_CMD1_BIT_POS;
+		}
+		if (host->nfc->use_nfc_sram) {
+			/* Enable Data transfer to sram */
+			dataen = NFCADDR_CMD_DATAEN;
+
+			/* Need enable PMECC now, since NFC will transfer
+			 * data in bus after sending nfc read command.
+			 */
+			if (chip->ecc.mode == NAND_ECC_HW && host->has_pmecc)
+				pmecc_enable(host, NAND_ECC_READ);
+		}
+
+		cmd2 = NAND_CMD_READSTART << NFCADDR_CMD_CMD2_BIT_POS;
+		vcmd2 = NFCADDR_CMD_VCMD2;
+		break;
+	/* For prgramming command, the cmd need set to write enable */
+	case NAND_CMD_PAGEPROG:
+	case NAND_CMD_SEQIN:
+	case NAND_CMD_RNDIN:
+		nfcwr = NFCADDR_CMD_NFCWR;
+		if (host->nfc->will_write_sram && command == NAND_CMD_SEQIN)
+			dataen = NFCADDR_CMD_DATAEN;
+		break;
+	default:
+		break;
+	}
+
+	if (do_addr)
+		acycle = nfc_make_addr(mtd, column, page_addr, &addr1234,
+				&cycle0);
+
+	nfc_addr_cmd = cmd1 | cmd2 | vcmd2 | acycle | csid | dataen | nfcwr;
+	nfc_send_command(host, nfc_addr_cmd, addr1234, cycle0);
+
+	if (dataen == NFCADDR_CMD_DATAEN)
+		if (nfc_wait_interrupt(host, NFC_SR_XFR_DONE))
+			dev_err(host->dev, "something wrong, No XFR_DONE interrupt comes.\n");
+
+	/*
+	 * Program and erase have their own busy handlers status, sequential
+	 * in, and deplete1 need no delay.
+	 */
+	switch (command) {
+	case NAND_CMD_CACHEDPROG:
+	case NAND_CMD_PAGEPROG:
+	case NAND_CMD_ERASE1:
+	case NAND_CMD_ERASE2:
+	case NAND_CMD_RNDIN:
+	case NAND_CMD_STATUS:
+	case NAND_CMD_RNDOUT:
+	case NAND_CMD_SEQIN:
+	case NAND_CMD_READID:
+		return;
+
+	case NAND_CMD_READ0:
+		if (dataen == NFCADDR_CMD_DATAEN) {
+			host->nfc->data_in_sram = host->nfc->sram_bank0 +
+				nfc_get_sram_off(host);
+			return;
+		}
+		/* fall through */
+	default:
+		nfc_wait_interrupt(host, NFC_SR_RB_EDGE);
+	}
+}
+
+static int nfc_sram_write_page(struct mtd_info *mtd, struct nand_chip *chip,
+			uint32_t offset, int data_len, const uint8_t *buf,
+			int oob_required, int page, int cached, int raw)
+{
+	int cfg, len;
+	int status = 0;
+	struct atmel_nand_host *host = chip->priv;
+	void __iomem *sram = host->nfc->sram_bank0 + nfc_get_sram_off(host);
+
+	/* Subpage write is not supported */
+	if (offset || (data_len < mtd->writesize))
+		return -EINVAL;
+
+	cfg = nfc_readl(host->nfc->hsmc_regs, CFG);
+	len = mtd->writesize;
+
+	if (unlikely(raw)) {
+		len += mtd->oobsize;
+		nfc_writel(host->nfc->hsmc_regs, CFG, cfg | NFC_CFG_WSPARE);
+	} else
+		nfc_writel(host->nfc->hsmc_regs, CFG, cfg & ~NFC_CFG_WSPARE);
+
+	/* Copy page data to sram that will write to nand via NFC */
+	if (use_dma) {
+		if (atmel_nand_dma_op(mtd, (void *)buf, len, 0) != 0)
+			/* Fall back to use cpu copy */
+			memcpy32_toio(sram, buf, len);
+	} else {
+		memcpy32_toio(sram, buf, len);
+	}
+
+	if (chip->ecc.mode == NAND_ECC_HW && host->has_pmecc)
+		/*
+		 * When use NFC sram, need set up PMECC before send
+		 * NAND_CMD_SEQIN command. Since when the nand command
+		 * is sent, nfc will do transfer from sram and nand.
+		 */
+		pmecc_enable(host, NAND_ECC_WRITE);
+
+	host->nfc->will_write_sram = true;
+	chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page);
+	host->nfc->will_write_sram = false;
+
+	if (likely(!raw))
+		/* Need to write ecc into oob */
+		status = chip->ecc.write_page(mtd, chip, buf, oob_required);
+
+	if (status < 0)
+		return status;
+
+	chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
+	status = chip->waitfunc(mtd, chip);
+
+	if ((status & NAND_STATUS_FAIL) && (chip->errstat))
+		status = chip->errstat(mtd, chip, FL_WRITING, status, page);
+
+	if (status & NAND_STATUS_FAIL)
+		return -EIO;
+
+	return 0;
+}
+
+static int nfc_sram_init(struct mtd_info *mtd)
+{
+	struct nand_chip *chip = mtd->priv;
+	struct atmel_nand_host *host = chip->priv;
+	int res = 0;
+
+	/* Initialize the NFC CFG register */
+	unsigned int cfg_nfc = 0;
+
+	/* set page size and oob layout */
+	switch (mtd->writesize) {
+	case 512:
+		cfg_nfc = NFC_CFG_PAGESIZE_512;
+		break;
+	case 1024:
+		cfg_nfc = NFC_CFG_PAGESIZE_1024;
+		break;
+	case 2048:
+		cfg_nfc = NFC_CFG_PAGESIZE_2048;
+		break;
+	case 4096:
+		cfg_nfc = NFC_CFG_PAGESIZE_4096;
+		break;
+	case 8192:
+		cfg_nfc = NFC_CFG_PAGESIZE_8192;
+		break;
+	default:
+		dev_err(host->dev, "Unsupported page size for NFC.\n");
+		res = -ENXIO;
+		return res;
+	}
+
+	/* oob bytes size = (NFCSPARESIZE + 1) * 4
+	 * Max support spare size is 512 bytes. */
+	cfg_nfc |= (((mtd->oobsize / 4) - 1) << NFC_CFG_NFC_SPARESIZE_BIT_POS
+		& NFC_CFG_NFC_SPARESIZE);
+	/* default set a max timeout */
+	cfg_nfc |= NFC_CFG_RSPARE |
+			NFC_CFG_NFC_DTOCYC | NFC_CFG_NFC_DTOMUL;
+
+	nfc_writel(host->nfc->hsmc_regs, CFG, cfg_nfc);
+
+	host->nfc->will_write_sram = false;
+	nfc_set_sram_bank(host, 0);
+
+	/* Use Write page with NFC SRAM only for PMECC or ECC NONE. */
+	if (host->nfc->write_by_sram) {
+		if ((chip->ecc.mode == NAND_ECC_HW && host->has_pmecc) ||
+				chip->ecc.mode == NAND_ECC_NONE)
+			chip->write_page = nfc_sram_write_page;
+		else
+			host->nfc->write_by_sram = false;
+	}
+
+	dev_info(host->dev, "Using NFC Sram read %s\n",
+			host->nfc->write_by_sram ? "and write" : "");
+	return 0;
+}
+
+static struct platform_driver atmel_nand_nfc_driver;
 /*
  * Probe for the NAND device.
  */
@@ -1469,30 +1994,27 @@
 	struct nand_chip *nand_chip;
 	struct resource *mem;
 	struct mtd_part_parser_data ppdata = {};
-	int res;
-	struct pinctrl *pinctrl;
-
-	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!mem) {
-		printk(KERN_ERR "atmel_nand: can't get I/O resource mem\n");
-		return -ENXIO;
-	}
+	int res, irq;
 
 	/* Allocate memory for the device structure (and zero it) */
-	host = kzalloc(sizeof(struct atmel_nand_host), GFP_KERNEL);
+	host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL);
 	if (!host) {
 		printk(KERN_ERR "atmel_nand: failed to allocate device structure.\n");
 		return -ENOMEM;
 	}
 
-	host->io_phys = (dma_addr_t)mem->start;
+	res = platform_driver_register(&atmel_nand_nfc_driver);
+	if (res)
+		dev_err(&pdev->dev, "atmel_nand: can't register NFC driver\n");
 
-	host->io_base = ioremap(mem->start, resource_size(mem));
-	if (host->io_base == NULL) {
-		printk(KERN_ERR "atmel_nand: ioremap failed\n");
-		res = -EIO;
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	host->io_base = devm_ioremap_resource(&pdev->dev, mem);
+	if (IS_ERR(host->io_base)) {
+		dev_err(&pdev->dev, "atmel_nand: ioremap resource failed\n");
+		res = PTR_ERR(host->io_base);
 		goto err_nand_ioremap;
 	}
+	host->io_phys = (dma_addr_t)mem->start;
 
 	mtd = &host->mtd;
 	nand_chip = &host->nand_chip;
@@ -1500,9 +2022,9 @@
 	if (pdev->dev.of_node) {
 		res = atmel_of_init_port(host, pdev->dev.of_node);
 		if (res)
-			goto err_ecc_ioremap;
+			goto err_nand_ioremap;
 	} else {
-		memcpy(&host->board, pdev->dev.platform_data,
+		memcpy(&host->board, dev_get_platdata(&pdev->dev),
 		       sizeof(struct atmel_nand_data));
 	}
 
@@ -1513,51 +2035,36 @@
 	/* Set address of NAND IO lines */
 	nand_chip->IO_ADDR_R = host->io_base;
 	nand_chip->IO_ADDR_W = host->io_base;
-	nand_chip->cmd_ctrl = atmel_nand_cmd_ctrl;
 
-	pinctrl = devm_pinctrl_get_select_default(&pdev->dev);
-	if (IS_ERR(pinctrl)) {
-		dev_err(host->dev, "Failed to request pinctrl\n");
-		res = PTR_ERR(pinctrl);
-		goto err_ecc_ioremap;
-	}
+	if (nand_nfc.is_initialized) {
+		/* NFC driver is probed and initialized */
+		host->nfc = &nand_nfc;
 
-	if (gpio_is_valid(host->board.rdy_pin)) {
-		res = gpio_request(host->board.rdy_pin, "nand_rdy");
-		if (res < 0) {
-			dev_err(&pdev->dev,
-				"can't request rdy gpio %d\n",
-				host->board.rdy_pin);
-			goto err_ecc_ioremap;
+		nand_chip->select_chip = nfc_select_chip;
+		nand_chip->dev_ready = nfc_device_ready;
+		nand_chip->cmdfunc = nfc_nand_command;
+
+		/* Initialize the interrupt for NFC */
+		irq = platform_get_irq(pdev, 0);
+		if (irq < 0) {
+			dev_err(host->dev, "Cannot get HSMC irq!\n");
+			res = irq;
+			goto err_nand_ioremap;
 		}
 
-		res = gpio_direction_input(host->board.rdy_pin);
-		if (res < 0) {
-			dev_err(&pdev->dev,
-				"can't request input direction rdy gpio %d\n",
-				host->board.rdy_pin);
-			goto err_ecc_ioremap;
+		res = devm_request_irq(&pdev->dev, irq, hsmc_interrupt,
+				0, "hsmc", host);
+		if (res) {
+			dev_err(&pdev->dev, "Unable to request HSMC irq %d\n",
+				irq);
+			goto err_nand_ioremap;
 		}
+	} else {
+		res = atmel_nand_set_enable_ready_pins(mtd);
+		if (res)
+			goto err_nand_ioremap;
 
-		nand_chip->dev_ready = atmel_nand_device_ready;
-	}
-
-	if (gpio_is_valid(host->board.enable_pin)) {
-		res = gpio_request(host->board.enable_pin, "nand_enable");
-		if (res < 0) {
-			dev_err(&pdev->dev,
-				"can't request enable gpio %d\n",
-				host->board.enable_pin);
-			goto err_ecc_ioremap;
-		}
-
-		res = gpio_direction_output(host->board.enable_pin, 1);
-		if (res < 0) {
-			dev_err(&pdev->dev,
-				"can't request output direction enable gpio %d\n",
-				host->board.enable_pin);
-			goto err_ecc_ioremap;
-		}
+		nand_chip->cmd_ctrl = atmel_nand_cmd_ctrl;
 	}
 
 	nand_chip->ecc.mode = host->board.ecc_mode;
@@ -1573,7 +2080,8 @@
 	atmel_nand_enable(host);
 
 	if (gpio_is_valid(host->board.det_pin)) {
-		res = gpio_request(host->board.det_pin, "nand_det");
+		res = devm_gpio_request(&pdev->dev,
+				host->board.det_pin, "nand_det");
 		if (res < 0) {
 			dev_err(&pdev->dev,
 				"can't request det gpio %d\n",
@@ -1601,7 +2109,7 @@
 		nand_chip->bbt_options |= NAND_BBT_USE_FLASH;
 	}
 
-	if (!cpu_has_dma())
+	if (!host->board.has_dma)
 		use_dma = 0;
 
 	if (use_dma) {
@@ -1637,6 +2145,15 @@
 			goto err_hw_ecc;
 	}
 
+	/* initialize the nfc configuration register */
+	if (host->nfc && host->nfc->use_nfc_sram) {
+		res = nfc_sram_init(mtd);
+		if (res) {
+			host->nfc->use_nfc_sram = false;
+			dev_err(host->dev, "Disable use nfc sram for data transfer.\n");
+		}
+	}
+
 	/* second phase scan */
 	if (nand_scan_tail(mtd)) {
 		res = -ENXIO;
@@ -1651,27 +2168,16 @@
 		return res;
 
 err_scan_tail:
-	if (host->has_pmecc && host->nand_chip.ecc.mode == NAND_ECC_HW) {
+	if (host->has_pmecc && host->nand_chip.ecc.mode == NAND_ECC_HW)
 		pmecc_writel(host->ecc, CTRL, PMECC_CTRL_DISABLE);
-		pmecc_data_free(host);
-	}
-	if (host->ecc)
-		iounmap(host->ecc);
-	if (host->pmerrloc_base)
-		iounmap(host->pmerrloc_base);
-	if (host->pmecc_rom_base)
-		iounmap(host->pmecc_rom_base);
 err_hw_ecc:
 err_scan_ident:
 err_no_card:
 	atmel_nand_disable(host);
-	platform_set_drvdata(pdev, NULL);
 	if (host->dma_chan)
 		dma_release_channel(host->dma_chan);
-err_ecc_ioremap:
-	iounmap(host->io_base);
 err_nand_ioremap:
-	kfree(host);
+	platform_driver_unregister(&atmel_nand_nfc_driver);
 	return res;
 }
 
@@ -1691,30 +2197,12 @@
 		pmecc_writel(host->ecc, CTRL, PMECC_CTRL_DISABLE);
 		pmerrloc_writel(host->pmerrloc_base, ELDIS,
 				PMERRLOC_DISABLE);
-		pmecc_data_free(host);
 	}
 
-	if (gpio_is_valid(host->board.det_pin))
-		gpio_free(host->board.det_pin);
-
-	if (gpio_is_valid(host->board.enable_pin))
-		gpio_free(host->board.enable_pin);
-
-	if (gpio_is_valid(host->board.rdy_pin))
-		gpio_free(host->board.rdy_pin);
-
-	if (host->ecc)
-		iounmap(host->ecc);
-	if (host->pmecc_rom_base)
-		iounmap(host->pmecc_rom_base);
-	if (host->pmerrloc_base)
-		iounmap(host->pmerrloc_base);
-
 	if (host->dma_chan)
 		dma_release_channel(host->dma_chan);
 
-	iounmap(host->io_base);
-	kfree(host);
+	platform_driver_unregister(&atmel_nand_nfc_driver);
 
 	return 0;
 }
@@ -1728,6 +2216,59 @@
 MODULE_DEVICE_TABLE(of, atmel_nand_dt_ids);
 #endif
 
+static int atmel_nand_nfc_probe(struct platform_device *pdev)
+{
+	struct atmel_nfc *nfc = &nand_nfc;
+	struct resource *nfc_cmd_regs, *nfc_hsmc_regs, *nfc_sram;
+
+	nfc_cmd_regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	nfc->base_cmd_regs = devm_ioremap_resource(&pdev->dev, nfc_cmd_regs);
+	if (IS_ERR(nfc->base_cmd_regs))
+		return PTR_ERR(nfc->base_cmd_regs);
+
+	nfc_hsmc_regs = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	nfc->hsmc_regs = devm_ioremap_resource(&pdev->dev, nfc_hsmc_regs);
+	if (IS_ERR(nfc->hsmc_regs))
+		return PTR_ERR(nfc->hsmc_regs);
+
+	nfc_sram = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+	if (nfc_sram) {
+		nfc->sram_bank0 = devm_ioremap_resource(&pdev->dev, nfc_sram);
+		if (IS_ERR(nfc->sram_bank0)) {
+			dev_warn(&pdev->dev, "Fail to ioremap the NFC sram with error: %ld. So disable NFC sram.\n",
+					PTR_ERR(nfc->sram_bank0));
+		} else {
+			nfc->use_nfc_sram = true;
+			nfc->sram_bank0_phys = (dma_addr_t)nfc_sram->start;
+
+			if (pdev->dev.of_node)
+				nfc->write_by_sram = of_property_read_bool(
+						pdev->dev.of_node,
+						"atmel,write-by-sram");
+		}
+	}
+
+	nfc->is_initialized = true;
+	dev_info(&pdev->dev, "NFC is probed.\n");
+	return 0;
+}
+
+#if defined(CONFIG_OF)
+static struct of_device_id atmel_nand_nfc_match[] = {
+	{ .compatible = "atmel,sama5d3-nfc" },
+	{ /* sentinel */ }
+};
+#endif
+
+static struct platform_driver atmel_nand_nfc_driver = {
+	.driver = {
+		.name = "atmel_nand_nfc",
+		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(atmel_nand_nfc_match),
+	},
+	.probe = atmel_nand_nfc_probe,
+};
+
 static struct platform_driver atmel_nand_driver = {
 	.remove		= __exit_p(atmel_nand_remove),
 	.driver		= {
diff --git a/drivers/mtd/nand/atmel_nand_nfc.h b/drivers/mtd/nand/atmel_nand_nfc.h
new file mode 100644
index 0000000..4efd117
--- /dev/null
+++ b/drivers/mtd/nand/atmel_nand_nfc.h
@@ -0,0 +1,98 @@
+/*
+ * Atmel Nand Flash Controller (NFC) - System peripherals regsters.
+ * Based on SAMA5D3 datasheet.
+ *
+ * © Copyright 2013 Atmel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#ifndef ATMEL_NAND_NFC_H
+#define ATMEL_NAND_NFC_H
+
+/*
+ * HSMC NFC registers
+ */
+#define ATMEL_HSMC_NFC_CFG	0x00		/* NFC Configuration Register */
+#define		NFC_CFG_PAGESIZE	(7 << 0)
+#define			NFC_CFG_PAGESIZE_512	(0 << 0)
+#define			NFC_CFG_PAGESIZE_1024	(1 << 0)
+#define			NFC_CFG_PAGESIZE_2048	(2 << 0)
+#define			NFC_CFG_PAGESIZE_4096	(3 << 0)
+#define			NFC_CFG_PAGESIZE_8192	(4 << 0)
+#define		NFC_CFG_WSPARE		(1 << 8)
+#define		NFC_CFG_RSPARE		(1 << 9)
+#define		NFC_CFG_NFC_DTOCYC	(0xf << 16)
+#define		NFC_CFG_NFC_DTOMUL	(0x7 << 20)
+#define		NFC_CFG_NFC_SPARESIZE	(0x7f << 24)
+#define		NFC_CFG_NFC_SPARESIZE_BIT_POS	24
+
+#define ATMEL_HSMC_NFC_CTRL	0x04		/* NFC Control Register */
+#define		NFC_CTRL_ENABLE		(1 << 0)
+#define		NFC_CTRL_DISABLE	(1 << 1)
+
+#define ATMEL_HSMC_NFC_SR	0x08		/* NFC Status Register */
+#define		NFC_SR_XFR_DONE		(1 << 16)
+#define		NFC_SR_CMD_DONE		(1 << 17)
+#define		NFC_SR_RB_EDGE		(1 << 24)
+
+#define ATMEL_HSMC_NFC_IER	0x0c
+#define ATMEL_HSMC_NFC_IDR	0x10
+#define ATMEL_HSMC_NFC_IMR	0x14
+#define ATMEL_HSMC_NFC_CYCLE0	0x18		/* NFC Address Cycle Zero */
+#define		ATMEL_HSMC_NFC_ADDR_CYCLE0	(0xff)
+
+#define ATMEL_HSMC_NFC_BANK	0x1c		/* NFC Bank Register */
+#define		ATMEL_HSMC_NFC_BANK0		(0 << 0)
+#define		ATMEL_HSMC_NFC_BANK1		(1 << 0)
+
+#define nfc_writel(addr, reg, value) \
+	writel((value), (addr) + ATMEL_HSMC_NFC_##reg)
+
+#define nfc_readl(addr, reg) \
+	readl_relaxed((addr) + ATMEL_HSMC_NFC_##reg)
+
+/*
+ * NFC Address Command definitions
+ */
+#define NFCADDR_CMD_CMD1	(0xff << 2)	/* Command for Cycle 1 */
+#define NFCADDR_CMD_CMD1_BIT_POS	2
+#define NFCADDR_CMD_CMD2	(0xff << 10)	/* Command for Cycle 2 */
+#define NFCADDR_CMD_CMD2_BIT_POS	10
+#define NFCADDR_CMD_VCMD2	(0x1 << 18)	/* Valid Cycle 2 Command */
+#define NFCADDR_CMD_ACYCLE	(0x7 << 19)	/* Number of Address required */
+#define		NFCADDR_CMD_ACYCLE_NONE		(0x0 << 19)
+#define		NFCADDR_CMD_ACYCLE_1		(0x1 << 19)
+#define		NFCADDR_CMD_ACYCLE_2		(0x2 << 19)
+#define		NFCADDR_CMD_ACYCLE_3		(0x3 << 19)
+#define		NFCADDR_CMD_ACYCLE_4		(0x4 << 19)
+#define		NFCADDR_CMD_ACYCLE_5		(0x5 << 19)
+#define NFCADDR_CMD_ACYCLE_BIT_POS	19
+#define NFCADDR_CMD_CSID	(0x7 << 22)	/* Chip Select Identifier */
+#define		NFCADDR_CMD_CSID_0		(0x0 << 22)
+#define		NFCADDR_CMD_CSID_1		(0x1 << 22)
+#define		NFCADDR_CMD_CSID_2		(0x2 << 22)
+#define		NFCADDR_CMD_CSID_3		(0x3 << 22)
+#define		NFCADDR_CMD_CSID_4		(0x4 << 22)
+#define		NFCADDR_CMD_CSID_5		(0x5 << 22)
+#define		NFCADDR_CMD_CSID_6		(0x6 << 22)
+#define		NFCADDR_CMD_CSID_7		(0x7 << 22)
+#define NFCADDR_CMD_DATAEN	(0x1 << 25)	/* Data Transfer Enable */
+#define NFCADDR_CMD_DATADIS	(0x0 << 25)	/* Data Transfer Disable */
+#define NFCADDR_CMD_NFCRD	(0x0 << 26)	/* NFC Read Enable */
+#define NFCADDR_CMD_NFCWR	(0x1 << 26)	/* NFC Write Enable */
+#define NFCADDR_CMD_NFCBUSY	(0x1 << 27)	/* NFC Busy */
+
+#define nfc_cmd_addr1234_writel(cmd, addr1234, nfc_base) \
+	writel((addr1234), (cmd) + nfc_base)
+
+#define nfc_cmd_readl(bitstatus, nfc_base) \
+	readl_relaxed((bitstatus) + nfc_base)
+
+#define NFC_TIME_OUT_MS		100
+#define	NFC_SRAM_BANK1_OFFSET	0x1200
+
+#endif
diff --git a/drivers/mtd/nand/au1550nd.c b/drivers/mtd/nand/au1550nd.c
index 217459d..ae8dd7c 100644
--- a/drivers/mtd/nand/au1550nd.c
+++ b/drivers/mtd/nand/au1550nd.c
@@ -411,7 +411,7 @@
 	struct resource *r;
 	int ret, cs;
 
-	pd = pdev->dev.platform_data;
+	pd = dev_get_platdata(&pdev->dev);
 	if (!pd) {
 		dev_err(&pdev->dev, "missing platform data\n");
 		return -ENODEV;
diff --git a/drivers/mtd/nand/bf5xx_nand.c b/drivers/mtd/nand/bf5xx_nand.c
index 776df36..2c42e12 100644
--- a/drivers/mtd/nand/bf5xx_nand.c
+++ b/drivers/mtd/nand/bf5xx_nand.c
@@ -171,7 +171,7 @@
 
 static struct bf5xx_nand_platform *to_nand_plat(struct platform_device *pdev)
 {
-	return pdev->dev.platform_data;
+	return dev_get_platdata(&pdev->dev);
 }
 
 /*
@@ -671,8 +671,6 @@
 {
 	struct bf5xx_nand_info *info = to_nand_info(pdev);
 
-	platform_set_drvdata(pdev, NULL);
-
 	/* first thing we need to do is release all our mtds
 	 * and their partitions, then go through freeing the
 	 * resources used
@@ -832,7 +830,6 @@
 out_err_nand_scan:
 	bf5xx_nand_dma_remove(info);
 out_err_hw_init:
-	platform_set_drvdata(pdev, NULL);
 	kfree(info);
 out_err_kzalloc:
 	peripheral_free_list(bfin_nfc_pin_req);
diff --git a/drivers/mtd/nand/cs553x_nand.c b/drivers/mtd/nand/cs553x_nand.c
index 2cdeab8..d469a9a 100644
--- a/drivers/mtd/nand/cs553x_nand.c
+++ b/drivers/mtd/nand/cs553x_nand.c
@@ -197,7 +197,7 @@
 	}
 
 	/* Allocate memory for MTD device structure and private data */
-	new_mtd = kmalloc(sizeof(struct mtd_info) + sizeof(struct nand_chip), GFP_KERNEL);
+	new_mtd = kzalloc(sizeof(struct mtd_info) + sizeof(struct nand_chip), GFP_KERNEL);
 	if (!new_mtd) {
 		printk(KERN_WARNING "Unable to allocate CS553X NAND MTD device structure.\n");
 		err = -ENOMEM;
@@ -207,10 +207,6 @@
 	/* Get pointer to private data */
 	this = (struct nand_chip *)(&new_mtd[1]);
 
-	/* Initialize structures */
-	memset(new_mtd, 0, sizeof(struct mtd_info));
-	memset(this, 0, sizeof(struct nand_chip));
-
 	/* Link the private data with the MTD structure */
 	new_mtd->priv = this;
 	new_mtd->owner = THIS_MODULE;
diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c
index c3e15a5..b77a01e 100644
--- a/drivers/mtd/nand/davinci_nand.c
+++ b/drivers/mtd/nand/davinci_nand.c
@@ -530,7 +530,7 @@
 static struct davinci_nand_pdata
 	*nand_davinci_get_pdata(struct platform_device *pdev)
 {
-	if (!pdev->dev.platform_data && pdev->dev.of_node) {
+	if (!dev_get_platdata(&pdev->dev) && pdev->dev.of_node) {
 		struct davinci_nand_pdata *pdata;
 		const char *mode;
 		u32 prop;
@@ -575,13 +575,13 @@
 			pdata->bbt_options = NAND_BBT_USE_FLASH;
 	}
 
-	return pdev->dev.platform_data;
+	return dev_get_platdata(&pdev->dev);
 }
 #else
 static struct davinci_nand_pdata
 	*nand_davinci_get_pdata(struct platform_device *pdev)
 {
-	return pdev->dev.platform_data;
+	return dev_get_platdata(&pdev->dev);
 }
 #endif
 
@@ -623,11 +623,14 @@
 		goto err_nomem;
 	}
 
-	vaddr = devm_request_and_ioremap(&pdev->dev, res1);
-	base = devm_request_and_ioremap(&pdev->dev, res2);
-	if (!vaddr || !base) {
-		dev_err(&pdev->dev, "ioremap failed\n");
-		ret = -EADDRNOTAVAIL;
+	vaddr = devm_ioremap_resource(&pdev->dev, res1);
+	if (IS_ERR(vaddr)) {
+		ret = PTR_ERR(vaddr);
+		goto err_ioremap;
+	}
+	base = devm_ioremap_resource(&pdev->dev, res2);
+	if (IS_ERR(base)) {
+		ret = PTR_ERR(base);
 		goto err_ioremap;
 	}
 
diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 0c8bb6b..2ed2bb3 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -1520,7 +1520,7 @@
 	 * so just let controller do 15bit ECC for MLC and 8bit ECC for
 	 * SLC if possible.
 	 * */
-	if (denali->nand.cellinfo & 0xc &&
+	if (denali->nand.cellinfo & NAND_CI_CELLTYPE_MSK &&
 			(denali->mtd.oobsize > (denali->bbtskipbytes +
 			ECC_15BITS * (denali->mtd.writesize /
 			ECC_SECTOR_SIZE)))) {
diff --git a/drivers/mtd/nand/diskonchip.c b/drivers/mtd/nand/diskonchip.c
index 81fa578..eaa3c29 100644
--- a/drivers/mtd/nand/diskonchip.c
+++ b/drivers/mtd/nand/diskonchip.c
@@ -46,13 +46,13 @@
 	0xfffd8000, 0xfffda000, 0xfffdc000, 0xfffde000,
 	0xfffe0000, 0xfffe2000, 0xfffe4000, 0xfffe6000,
 	0xfffe8000, 0xfffea000, 0xfffec000, 0xfffee000,
-#else /*  CONFIG_MTD_DOCPROBE_HIGH */
+#else
 	0xc8000, 0xca000, 0xcc000, 0xce000,
 	0xd0000, 0xd2000, 0xd4000, 0xd6000,
 	0xd8000, 0xda000, 0xdc000, 0xde000,
 	0xe0000, 0xe2000, 0xe4000, 0xe6000,
 	0xe8000, 0xea000, 0xec000, 0xee000,
-#endif /*  CONFIG_MTD_DOCPROBE_HIGH */
+#endif
 #endif
 	0xffffffff };
 
diff --git a/drivers/mtd/nand/docg4.c b/drivers/mtd/nand/docg4.c
index fa25e7a..548db23 100644
--- a/drivers/mtd/nand/docg4.c
+++ b/drivers/mtd/nand/docg4.c
@@ -1093,7 +1093,6 @@
 	struct nand_chip *nand = mtd->priv;
 	struct docg4_priv *doc = nand->priv;
 	struct nand_bbt_descr *bbtd = nand->badblock_pattern;
-	int block = (int)(ofs >> nand->bbt_erase_shift);
 	int page = (int)(ofs >> nand->page_shift);
 	uint32_t g4_addr = mtd_to_docg4_address(page, 0);
 
@@ -1108,9 +1107,6 @@
 	if (buf == NULL)
 		return -ENOMEM;
 
-	/* update bbt in memory */
-	nand->bbt[block / 4] |= 0x01 << ((block & 0x03) * 2);
-
 	/* write bit-wise negation of pattern to oob buffer */
 	memset(nand->oob_poi, 0xff, mtd->oobsize);
 	for (i = 0; i < bbtd->len; i++)
@@ -1120,8 +1116,6 @@
 	write_page_prologue(mtd, g4_addr);
 	docg4_write_page(mtd, nand, buf, 1);
 	ret = pageprog(mtd);
-	if (!ret)
-		mtd->ecc_stats.badblocks++;
 
 	kfree(buf);
 
@@ -1368,7 +1362,6 @@
 		struct nand_chip *nand = mtd->priv;
 		struct docg4_priv *doc = nand->priv;
 		nand_release(mtd); /* deletes partitions and mtd devices */
-		platform_set_drvdata(pdev, NULL);
 		free_bch(doc->bch);
 		kfree(mtd);
 	}
@@ -1380,7 +1373,6 @@
 {
 	struct docg4_priv *doc = platform_get_drvdata(pdev);
 	nand_release(doc->mtd);
-	platform_set_drvdata(pdev, NULL);
 	free_bch(doc->bch);
 	kfree(doc->mtd);
 	iounmap(doc->virtadr);
diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c
index f1f7f12..317a771 100644
--- a/drivers/mtd/nand/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/fsl_ifc_nand.c
@@ -823,7 +823,7 @@
 
 	/* set up nand options */
 	chip->bbt_options = NAND_BBT_USE_FLASH;
-
+	chip->options = NAND_NO_SUBPAGE_WRITE;
 
 	if (ioread32be(&ifc->cspr_cs[priv->bank].cspr) & CSPR_PORT_SIZE_16) {
 		chip->read_byte = fsl_ifc_read_byte16;
@@ -908,7 +908,6 @@
 
 	ifc_nand_ctrl->chips[priv->bank] = NULL;
 	dev_set_drvdata(priv->dev, NULL);
-	kfree(priv);
 
 	return 0;
 }
diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index 911e243..3dc1a75 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -889,6 +889,24 @@
 	if (of_get_property(np, "nand-skip-bbtscan", NULL))
 		pdata->options = NAND_SKIP_BBTSCAN;
 
+	pdata->nand_timings = devm_kzalloc(&pdev->dev,
+				sizeof(*pdata->nand_timings), GFP_KERNEL);
+	if (!pdata->nand_timings) {
+		dev_err(&pdev->dev, "no memory for nand_timing\n");
+		return -ENOMEM;
+	}
+	of_property_read_u8_array(np, "timings", (u8 *)pdata->nand_timings,
+						sizeof(*pdata->nand_timings));
+
+	/* Set default NAND bank to 0 */
+	pdata->bank = 0;
+	if (!of_property_read_u32(np, "bank", &val)) {
+		if (val > 3) {
+			dev_err(&pdev->dev, "invalid bank %u\n", val);
+			return -EINVAL;
+		}
+		pdata->bank = val;
+	}
 	return 0;
 }
 #else
@@ -940,9 +958,6 @@
 	}
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_data");
-	if (!res)
-		return -EINVAL;
-
 	host->data_va = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(host->data_va))
 		return PTR_ERR(host->data_va);
@@ -950,25 +965,16 @@
 	host->data_pa = (dma_addr_t)res->start;
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_addr");
-	if (!res)
-		return -EINVAL;
-
 	host->addr_va = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(host->addr_va))
 		return PTR_ERR(host->addr_va);
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_cmd");
-	if (!res)
-		return -EINVAL;
-
 	host->cmd_va = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(host->cmd_va))
 		return PTR_ERR(host->cmd_va);
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "fsmc_regs");
-	if (!res)
-		return -EINVAL;
-
 	host->regs_va = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(host->regs_va))
 		return PTR_ERR(host->regs_va);
@@ -1174,8 +1180,6 @@
 {
 	struct fsmc_nand_data *host = platform_get_drvdata(pdev);
 
-	platform_set_drvdata(pdev, NULL);
-
 	if (host) {
 		nand_release(&host->mtd);
 
@@ -1190,7 +1194,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int fsmc_nand_suspend(struct device *dev)
 {
 	struct fsmc_nand_data *host = dev_get_drvdata(dev);
@@ -1210,9 +1214,9 @@
 	}
 	return 0;
 }
+#endif
 
 static SIMPLE_DEV_PM_OPS(fsmc_nand_pm_ops, fsmc_nand_suspend, fsmc_nand_resume);
-#endif
 
 #ifdef CONFIG_OF
 static const struct of_device_id fsmc_nand_id_table[] = {
@@ -1229,9 +1233,7 @@
 		.owner = THIS_MODULE,
 		.name = "fsmc-nand",
 		.of_match_table = of_match_ptr(fsmc_nand_id_table),
-#ifdef CONFIG_PM
 		.pm = &fsmc_nand_pm_ops,
-#endif
 	},
 };
 
diff --git a/drivers/mtd/nand/gpio.c b/drivers/mtd/nand/gpio.c
index 89065dd..e826f89 100644
--- a/drivers/mtd/nand/gpio.c
+++ b/drivers/mtd/nand/gpio.c
@@ -17,6 +17,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/err.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/module.h>
@@ -86,59 +87,11 @@
 	gpio_nand_dosync(gpiomtd);
 }
 
-static void gpio_nand_writebuf(struct mtd_info *mtd, const u_char *buf, int len)
-{
-	struct nand_chip *this = mtd->priv;
-
-	iowrite8_rep(this->IO_ADDR_W, buf, len);
-}
-
-static void gpio_nand_readbuf(struct mtd_info *mtd, u_char *buf, int len)
-{
-	struct nand_chip *this = mtd->priv;
-
-	ioread8_rep(this->IO_ADDR_R, buf, len);
-}
-
-static void gpio_nand_writebuf16(struct mtd_info *mtd, const u_char *buf,
-				 int len)
-{
-	struct nand_chip *this = mtd->priv;
-
-	if (IS_ALIGNED((unsigned long)buf, 2)) {
-		iowrite16_rep(this->IO_ADDR_W, buf, len>>1);
-	} else {
-		int i;
-		unsigned short *ptr = (unsigned short *)buf;
-
-		for (i = 0; i < len; i += 2, ptr++)
-			writew(*ptr, this->IO_ADDR_W);
-	}
-}
-
-static void gpio_nand_readbuf16(struct mtd_info *mtd, u_char *buf, int len)
-{
-	struct nand_chip *this = mtd->priv;
-
-	if (IS_ALIGNED((unsigned long)buf, 2)) {
-		ioread16_rep(this->IO_ADDR_R, buf, len>>1);
-	} else {
-		int i;
-		unsigned short *ptr = (unsigned short *)buf;
-
-		for (i = 0; i < len; i += 2, ptr++)
-			*ptr = readw(this->IO_ADDR_R);
-	}
-}
-
 static int gpio_nand_devready(struct mtd_info *mtd)
 {
 	struct gpiomtd *gpiomtd = gpio_nand_getpriv(mtd);
 
-	if (gpio_is_valid(gpiomtd->plat.gpio_rdy))
-		return gpio_get_value(gpiomtd->plat.gpio_rdy);
-
-	return 1;
+	return gpio_get_value(gpiomtd->plat.gpio_rdy);
 }
 
 #ifdef CONFIG_OF
@@ -153,6 +106,9 @@
 {
 	u32 val;
 
+	if (!dev->of_node)
+		return -ENODEV;
+
 	if (!of_property_read_u32(dev->of_node, "bank-width", &val)) {
 		if (val == 2) {
 			plat->options |= NAND_BUSWIDTH_16;
@@ -211,8 +167,8 @@
 	if (!ret)
 		return ret;
 
-	if (dev->platform_data) {
-		memcpy(plat, dev->platform_data, sizeof(*plat));
+	if (dev_get_platdata(dev)) {
+		memcpy(plat, dev_get_platdata(dev), sizeof(*plat));
 		return 0;
 	}
 
@@ -230,145 +186,100 @@
 	return platform_get_resource(pdev, IORESOURCE_MEM, 1);
 }
 
-static int gpio_nand_remove(struct platform_device *dev)
+static int gpio_nand_remove(struct platform_device *pdev)
 {
-	struct gpiomtd *gpiomtd = platform_get_drvdata(dev);
-	struct resource *res;
+	struct gpiomtd *gpiomtd = platform_get_drvdata(pdev);
 
 	nand_release(&gpiomtd->mtd_info);
 
-	res = gpio_nand_get_io_sync(dev);
-	iounmap(gpiomtd->io_sync);
-	if (res)
-		release_mem_region(res->start, resource_size(res));
-
-	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
-	iounmap(gpiomtd->nand_chip.IO_ADDR_R);
-	release_mem_region(res->start, resource_size(res));
-
 	if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
 		gpio_set_value(gpiomtd->plat.gpio_nwp, 0);
 	gpio_set_value(gpiomtd->plat.gpio_nce, 1);
 
-	gpio_free(gpiomtd->plat.gpio_cle);
-	gpio_free(gpiomtd->plat.gpio_ale);
-	gpio_free(gpiomtd->plat.gpio_nce);
-	if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
-		gpio_free(gpiomtd->plat.gpio_nwp);
-	if (gpio_is_valid(gpiomtd->plat.gpio_rdy))
-		gpio_free(gpiomtd->plat.gpio_rdy);
-
 	return 0;
 }
 
-static void __iomem *request_and_remap(struct resource *res, size_t size,
-					const char *name, int *err)
-{
-	void __iomem *ptr;
-
-	if (!request_mem_region(res->start, resource_size(res), name)) {
-		*err = -EBUSY;
-		return NULL;
-	}
-
-	ptr = ioremap(res->start, size);
-	if (!ptr) {
-		release_mem_region(res->start, resource_size(res));
-		*err = -ENOMEM;
-	}
-	return ptr;
-}
-
-static int gpio_nand_probe(struct platform_device *dev)
+static int gpio_nand_probe(struct platform_device *pdev)
 {
 	struct gpiomtd *gpiomtd;
-	struct nand_chip *this;
-	struct resource *res0, *res1;
+	struct nand_chip *chip;
+	struct resource *res;
 	struct mtd_part_parser_data ppdata = {};
 	int ret = 0;
 
-	if (!dev->dev.of_node && !dev->dev.platform_data)
+	if (!pdev->dev.of_node && !dev_get_platdata(&pdev->dev))
 		return -EINVAL;
 
-	res0 = platform_get_resource(dev, IORESOURCE_MEM, 0);
-	if (!res0)
-		return -EINVAL;
-
-	gpiomtd = devm_kzalloc(&dev->dev, sizeof(*gpiomtd), GFP_KERNEL);
-	if (gpiomtd == NULL) {
-		dev_err(&dev->dev, "failed to create NAND MTD\n");
+	gpiomtd = devm_kzalloc(&pdev->dev, sizeof(*gpiomtd), GFP_KERNEL);
+	if (!gpiomtd) {
+		dev_err(&pdev->dev, "failed to create NAND MTD\n");
 		return -ENOMEM;
 	}
 
-	this = &gpiomtd->nand_chip;
-	this->IO_ADDR_R = request_and_remap(res0, 2, "NAND", &ret);
-	if (!this->IO_ADDR_R) {
-		dev_err(&dev->dev, "unable to map NAND\n");
-		goto err_map;
+	chip = &gpiomtd->nand_chip;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	chip->IO_ADDR_R = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(chip->IO_ADDR_R))
+		return PTR_ERR(chip->IO_ADDR_R);
+
+	res = gpio_nand_get_io_sync(pdev);
+	if (res) {
+		gpiomtd->io_sync = devm_ioremap_resource(&pdev->dev, res);
+		if (IS_ERR(gpiomtd->io_sync))
+			return PTR_ERR(gpiomtd->io_sync);
 	}
 
-	res1 = gpio_nand_get_io_sync(dev);
-	if (res1) {
-		gpiomtd->io_sync = request_and_remap(res1, 4, "NAND sync", &ret);
-		if (!gpiomtd->io_sync) {
-			dev_err(&dev->dev, "unable to map sync NAND\n");
-			goto err_sync;
-		}
-	}
-
-	ret = gpio_nand_get_config(&dev->dev, &gpiomtd->plat);
+	ret = gpio_nand_get_config(&pdev->dev, &gpiomtd->plat);
 	if (ret)
-		goto err_nce;
+		return ret;
 
-	ret = gpio_request(gpiomtd->plat.gpio_nce, "NAND NCE");
+	ret = devm_gpio_request(&pdev->dev, gpiomtd->plat.gpio_nce, "NAND NCE");
 	if (ret)
-		goto err_nce;
+		return ret;
 	gpio_direction_output(gpiomtd->plat.gpio_nce, 1);
+
 	if (gpio_is_valid(gpiomtd->plat.gpio_nwp)) {
-		ret = gpio_request(gpiomtd->plat.gpio_nwp, "NAND NWP");
+		ret = devm_gpio_request(&pdev->dev, gpiomtd->plat.gpio_nwp,
+					"NAND NWP");
 		if (ret)
-			goto err_nwp;
-		gpio_direction_output(gpiomtd->plat.gpio_nwp, 1);
+			return ret;
 	}
-	ret = gpio_request(gpiomtd->plat.gpio_ale, "NAND ALE");
+
+	ret = devm_gpio_request(&pdev->dev, gpiomtd->plat.gpio_ale, "NAND ALE");
 	if (ret)
-		goto err_ale;
+		return ret;
 	gpio_direction_output(gpiomtd->plat.gpio_ale, 0);
-	ret = gpio_request(gpiomtd->plat.gpio_cle, "NAND CLE");
+
+	ret = devm_gpio_request(&pdev->dev, gpiomtd->plat.gpio_cle, "NAND CLE");
 	if (ret)
-		goto err_cle;
+		return ret;
 	gpio_direction_output(gpiomtd->plat.gpio_cle, 0);
+
 	if (gpio_is_valid(gpiomtd->plat.gpio_rdy)) {
-		ret = gpio_request(gpiomtd->plat.gpio_rdy, "NAND RDY");
+		ret = devm_gpio_request(&pdev->dev, gpiomtd->plat.gpio_rdy,
+					"NAND RDY");
 		if (ret)
-			goto err_rdy;
+			return ret;
 		gpio_direction_input(gpiomtd->plat.gpio_rdy);
+		chip->dev_ready = gpio_nand_devready;
 	}
 
+	chip->IO_ADDR_W		= chip->IO_ADDR_R;
+	chip->ecc.mode		= NAND_ECC_SOFT;
+	chip->options		= gpiomtd->plat.options;
+	chip->chip_delay	= gpiomtd->plat.chip_delay;
+	chip->cmd_ctrl		= gpio_nand_cmd_ctrl;
 
-	this->IO_ADDR_W  = this->IO_ADDR_R;
-	this->ecc.mode   = NAND_ECC_SOFT;
-	this->options    = gpiomtd->plat.options;
-	this->chip_delay = gpiomtd->plat.chip_delay;
+	gpiomtd->mtd_info.priv	= chip;
+	gpiomtd->mtd_info.owner	= THIS_MODULE;
 
-	/* install our routines */
-	this->cmd_ctrl   = gpio_nand_cmd_ctrl;
-	this->dev_ready  = gpio_nand_devready;
+	platform_set_drvdata(pdev, gpiomtd);
 
-	if (this->options & NAND_BUSWIDTH_16) {
-		this->read_buf   = gpio_nand_readbuf16;
-		this->write_buf  = gpio_nand_writebuf16;
-	} else {
-		this->read_buf   = gpio_nand_readbuf;
-		this->write_buf  = gpio_nand_writebuf;
-	}
-
-	/* set the mtd private data for the nand driver */
-	gpiomtd->mtd_info.priv = this;
-	gpiomtd->mtd_info.owner = THIS_MODULE;
+	if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
+		gpio_direction_output(gpiomtd->plat.gpio_nwp, 1);
 
 	if (nand_scan(&gpiomtd->mtd_info, 1)) {
-		dev_err(&dev->dev, "no nand chips found?\n");
 		ret = -ENXIO;
 		goto err_wp;
 	}
@@ -377,38 +288,17 @@
 		gpiomtd->plat.adjust_parts(&gpiomtd->plat,
 					   gpiomtd->mtd_info.size);
 
-	ppdata.of_node = dev->dev.of_node;
+	ppdata.of_node = pdev->dev.of_node;
 	ret = mtd_device_parse_register(&gpiomtd->mtd_info, NULL, &ppdata,
 					gpiomtd->plat.parts,
 					gpiomtd->plat.num_parts);
-	if (ret)
-		goto err_wp;
-	platform_set_drvdata(dev, gpiomtd);
-
-	return 0;
+	if (!ret)
+		return 0;
 
 err_wp:
 	if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
 		gpio_set_value(gpiomtd->plat.gpio_nwp, 0);
-	if (gpio_is_valid(gpiomtd->plat.gpio_rdy))
-		gpio_free(gpiomtd->plat.gpio_rdy);
-err_rdy:
-	gpio_free(gpiomtd->plat.gpio_cle);
-err_cle:
-	gpio_free(gpiomtd->plat.gpio_ale);
-err_ale:
-	if (gpio_is_valid(gpiomtd->plat.gpio_nwp))
-		gpio_free(gpiomtd->plat.gpio_nwp);
-err_nwp:
-	gpio_free(gpiomtd->plat.gpio_nce);
-err_nce:
-	iounmap(gpiomtd->io_sync);
-	if (res1)
-		release_mem_region(res1->start, resource_size(res1));
-err_sync:
-	iounmap(gpiomtd->nand_chip.IO_ADDR_R);
-	release_mem_region(res0->start, resource_size(res0));
-err_map:
+
 	return ret;
 }
 
@@ -417,6 +307,7 @@
 	.remove		= gpio_nand_remove,
 	.driver		= {
 		.name	= "gpio-nand",
+		.owner	= THIS_MODULE,
 		.of_match_table = of_match_ptr(gpio_nand_id_table),
 	},
 };
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index 25ecfa1..59ab069 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -26,7 +26,6 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/mtd/partitions.h>
-#include <linux/pinctrl/consumer.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/of_mtd.h>
@@ -112,7 +111,131 @@
 	return true;
 }
 
-int common_nfc_set_geometry(struct gpmi_nand_data *this)
+/*
+ * If we can get the ECC information from the nand chip, we do not
+ * need to calculate them ourselves.
+ *
+ * We may have available oob space in this case.
+ */
+static bool set_geometry_by_ecc_info(struct gpmi_nand_data *this)
+{
+	struct bch_geometry *geo = &this->bch_geometry;
+	struct mtd_info *mtd = &this->mtd;
+	struct nand_chip *chip = mtd->priv;
+	struct nand_oobfree *of = gpmi_hw_ecclayout.oobfree;
+	unsigned int block_mark_bit_offset;
+
+	if (!(chip->ecc_strength_ds > 0 && chip->ecc_step_ds > 0))
+		return false;
+
+	switch (chip->ecc_step_ds) {
+	case SZ_512:
+		geo->gf_len = 13;
+		break;
+	case SZ_1K:
+		geo->gf_len = 14;
+		break;
+	default:
+		dev_err(this->dev,
+			"unsupported nand chip. ecc bits : %d, ecc size : %d\n",
+			chip->ecc_strength_ds, chip->ecc_step_ds);
+		return false;
+	}
+	geo->ecc_chunk_size = chip->ecc_step_ds;
+	geo->ecc_strength = round_up(chip->ecc_strength_ds, 2);
+	if (!gpmi_check_ecc(this))
+		return false;
+
+	/* Keep the C >= O */
+	if (geo->ecc_chunk_size < mtd->oobsize) {
+		dev_err(this->dev,
+			"unsupported nand chip. ecc size: %d, oob size : %d\n",
+			chip->ecc_step_ds, mtd->oobsize);
+		return false;
+	}
+
+	/* The default value, see comment in the legacy_set_geometry(). */
+	geo->metadata_size = 10;
+
+	geo->ecc_chunk_count = mtd->writesize / geo->ecc_chunk_size;
+
+	/*
+	 * Now, the NAND chip with 2K page(data chunk is 512byte) shows below:
+	 *
+	 *    |                          P                            |
+	 *    |<----------------------------------------------------->|
+	 *    |                                                       |
+	 *    |                                        (Block Mark)   |
+	 *    |                      P'                      |      | |     |
+	 *    |<-------------------------------------------->|  D   | |  O' |
+	 *    |                                              |<---->| |<--->|
+	 *    V                                              V      V V     V
+	 *    +---+----------+-+----------+-+----------+-+----------+-+-----+
+	 *    | M |   data   |E|   data   |E|   data   |E|   data   |E|     |
+	 *    +---+----------+-+----------+-+----------+-+----------+-+-----+
+	 *                                                   ^              ^
+	 *                                                   |      O       |
+	 *                                                   |<------------>|
+	 *                                                   |              |
+	 *
+	 *	P : the page size for BCH module.
+	 *	E : The ECC strength.
+	 *	G : the length of Galois Field.
+	 *	N : The chunk count of per page.
+	 *	M : the metasize of per page.
+	 *	C : the ecc chunk size, aka the "data" above.
+	 *	P': the nand chip's page size.
+	 *	O : the nand chip's oob size.
+	 *	O': the free oob.
+	 *
+	 *	The formula for P is :
+	 *
+	 *	            E * G * N
+	 *	       P = ------------ + P' + M
+	 *                      8
+	 *
+	 * The position of block mark moves forward in the ECC-based view
+	 * of page, and the delta is:
+	 *
+	 *                   E * G * (N - 1)
+	 *             D = (---------------- + M)
+	 *                          8
+	 *
+	 * Please see the comment in legacy_set_geometry().
+	 * With the condition C >= O , we still can get same result.
+	 * So the bit position of the physical block mark within the ECC-based
+	 * view of the page is :
+	 *             (P' - D) * 8
+	 */
+	geo->page_size = mtd->writesize + geo->metadata_size +
+		(geo->gf_len * geo->ecc_strength * geo->ecc_chunk_count) / 8;
+
+	/* The available oob size we have. */
+	if (geo->page_size < mtd->writesize + mtd->oobsize) {
+		of->offset = geo->page_size - mtd->writesize;
+		of->length = mtd->oobsize - of->offset;
+	}
+
+	geo->payload_size = mtd->writesize;
+
+	geo->auxiliary_status_offset = ALIGN(geo->metadata_size, 4);
+	geo->auxiliary_size = ALIGN(geo->metadata_size, 4)
+				+ ALIGN(geo->ecc_chunk_count, 4);
+
+	if (!this->swap_block_mark)
+		return true;
+
+	/* For bit swap. */
+	block_mark_bit_offset = mtd->writesize * 8 -
+		(geo->ecc_strength * geo->gf_len * (geo->ecc_chunk_count - 1)
+				+ geo->metadata_size * 8);
+
+	geo->block_mark_byte_offset = block_mark_bit_offset / 8;
+	geo->block_mark_bit_offset  = block_mark_bit_offset % 8;
+	return true;
+}
+
+static int legacy_set_geometry(struct gpmi_nand_data *this)
 {
 	struct bch_geometry *geo = &this->bch_geometry;
 	struct mtd_info *mtd = &this->mtd;
@@ -224,6 +347,11 @@
 	return 0;
 }
 
+int common_nfc_set_geometry(struct gpmi_nand_data *this)
+{
+	return set_geometry_by_ecc_info(this) ? 0 : legacy_set_geometry(this);
+}
+
 struct dma_chan *get_dma_chan(struct gpmi_nand_data *this)
 {
 	int chipnr = this->current_chip;
@@ -355,7 +483,7 @@
 	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, res_name);
 	if (!r) {
 		pr_err("Can't get resource for %s\n", res_name);
-		return -ENXIO;
+		return -ENODEV;
 	}
 
 	p = ioremap(r->start, resource_size(r));
@@ -396,7 +524,7 @@
 	r = platform_get_resource_byname(pdev, IORESOURCE_IRQ, res_name);
 	if (!r) {
 		pr_err("Can't get resource for %s\n", res_name);
-		return -ENXIO;
+		return -ENODEV;
 	}
 
 	err = request_irq(r->start, irq_h, 0, res_name, this);
@@ -473,12 +601,14 @@
 	struct resources *r = &this->resources;
 	char **extra_clks = NULL;
 	struct clk *clk;
-	int i;
+	int err, i;
 
 	/* The main clock is stored in the first. */
 	r->clock[0] = clk_get(this->dev, "gpmi_io");
-	if (IS_ERR(r->clock[0]))
+	if (IS_ERR(r->clock[0])) {
+		err = PTR_ERR(r->clock[0]);
 		goto err_clock;
+	}
 
 	/* Get extra clocks */
 	if (GPMI_IS_MX6Q(this))
@@ -491,8 +621,10 @@
 			break;
 
 		clk = clk_get(this->dev, extra_clks[i - 1]);
-		if (IS_ERR(clk))
+		if (IS_ERR(clk)) {
+			err = PTR_ERR(clk);
 			goto err_clock;
+		}
 
 		r->clock[i] = clk;
 	}
@@ -511,12 +643,11 @@
 err_clock:
 	dev_dbg(this->dev, "failed in finding the clocks.\n");
 	gpmi_put_clks(this);
-	return -ENOMEM;
+	return err;
 }
 
 static int acquire_resources(struct gpmi_nand_data *this)
 {
-	struct pinctrl *pinctrl;
 	int ret;
 
 	ret = acquire_register_block(this, GPMI_NAND_GPMI_REGS_ADDR_RES_NAME);
@@ -535,19 +666,12 @@
 	if (ret)
 		goto exit_dma_channels;
 
-	pinctrl = devm_pinctrl_get_select_default(&this->pdev->dev);
-	if (IS_ERR(pinctrl)) {
-		ret = PTR_ERR(pinctrl);
-		goto exit_pin;
-	}
-
 	ret = gpmi_get_clks(this);
 	if (ret)
 		goto exit_clock;
 	return 0;
 
 exit_clock:
-exit_pin:
 	release_dma_channels(this);
 exit_dma_channels:
 	release_bch_irq(this);
@@ -1153,43 +1277,31 @@
 {
 	struct nand_chip *chip = mtd->priv;
 	struct gpmi_nand_data *this = chip->priv;
-	int block, ret = 0;
+	int ret = 0;
 	uint8_t *block_mark;
 	int column, page, status, chipnr;
 
-	/* Get block number */
-	block = (int)(ofs >> chip->bbt_erase_shift);
-	if (chip->bbt)
-		chip->bbt[block >> 2] |= 0x01 << ((block & 0x03) << 1);
+	chipnr = (int)(ofs >> chip->chip_shift);
+	chip->select_chip(mtd, chipnr);
 
-	/* Do we have a flash based bad block table ? */
-	if (chip->bbt_options & NAND_BBT_USE_FLASH)
-		ret = nand_update_bbt(mtd, ofs);
-	else {
-		chipnr = (int)(ofs >> chip->chip_shift);
-		chip->select_chip(mtd, chipnr);
+	column = this->swap_block_mark ? mtd->writesize : 0;
 
-		column = this->swap_block_mark ? mtd->writesize : 0;
+	/* Write the block mark. */
+	block_mark = this->data_buffer_dma;
+	block_mark[0] = 0; /* bad block marker */
 
-		/* Write the block mark. */
-		block_mark = this->data_buffer_dma;
-		block_mark[0] = 0; /* bad block marker */
+	/* Shift to get page */
+	page = (int)(ofs >> chip->page_shift);
 
-		/* Shift to get page */
-		page = (int)(ofs >> chip->page_shift);
+	chip->cmdfunc(mtd, NAND_CMD_SEQIN, column, page);
+	chip->write_buf(mtd, block_mark, 1);
+	chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
 
-		chip->cmdfunc(mtd, NAND_CMD_SEQIN, column, page);
-		chip->write_buf(mtd, block_mark, 1);
-		chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
+	status = chip->waitfunc(mtd, chip);
+	if (status & NAND_STATUS_FAIL)
+		ret = -EIO;
 
-		status = chip->waitfunc(mtd, chip);
-		if (status & NAND_STATUS_FAIL)
-			ret = -EIO;
-
-		chip->select_chip(mtd, -1);
-	}
-	if (!ret)
-		mtd->ecc_stats.badblocks++;
+	chip->select_chip(mtd, -1);
 
 	return ret;
 }
@@ -1469,19 +1581,22 @@
 	if (ret)
 		return ret;
 
-	/* Adjust the ECC strength according to the chip. */
-	this->nand.ecc.strength = this->bch_geometry.ecc_strength;
-	this->mtd.ecc_strength = this->bch_geometry.ecc_strength;
-	this->mtd.bitflip_threshold = this->bch_geometry.ecc_strength;
-
 	/* NAND boot init, depends on the gpmi_set_geometry(). */
 	return nand_boot_init(this);
 }
 
-static int gpmi_scan_bbt(struct mtd_info *mtd)
+static void gpmi_nfc_exit(struct gpmi_nand_data *this)
 {
+	nand_release(&this->mtd);
+	gpmi_free_dma_buffer(this);
+}
+
+static int gpmi_init_last(struct gpmi_nand_data *this)
+{
+	struct mtd_info *mtd = &this->mtd;
 	struct nand_chip *chip = mtd->priv;
-	struct gpmi_nand_data *this = chip->priv;
+	struct nand_ecc_ctrl *ecc = &chip->ecc;
+	struct bch_geometry *bch_geo = &this->bch_geometry;
 	int ret;
 
 	/* Prepare for the BBT scan. */
@@ -1489,6 +1604,16 @@
 	if (ret)
 		return ret;
 
+	/* Init the nand_ecc_ctrl{} */
+	ecc->read_page	= gpmi_ecc_read_page;
+	ecc->write_page	= gpmi_ecc_write_page;
+	ecc->read_oob	= gpmi_ecc_read_oob;
+	ecc->write_oob	= gpmi_ecc_write_oob;
+	ecc->mode	= NAND_ECC_HW;
+	ecc->size	= bch_geo->ecc_chunk_size;
+	ecc->strength	= bch_geo->ecc_strength;
+	ecc->layout	= &gpmi_hw_ecclayout;
+
 	/*
 	 * Can we enable the extra features? such as EDO or Sync mode.
 	 *
@@ -1497,14 +1622,7 @@
 	 */
 	gpmi_extra_init(this);
 
-	/* use the default BBT implementation */
-	return nand_default_bbt(mtd);
-}
-
-static void gpmi_nfc_exit(struct gpmi_nand_data *this)
-{
-	nand_release(&this->mtd);
-	gpmi_free_dma_buffer(this);
+	return 0;
 }
 
 static int gpmi_nfc_init(struct gpmi_nand_data *this)
@@ -1530,33 +1648,33 @@
 	chip->read_byte		= gpmi_read_byte;
 	chip->read_buf		= gpmi_read_buf;
 	chip->write_buf		= gpmi_write_buf;
-	chip->ecc.read_page	= gpmi_ecc_read_page;
-	chip->ecc.write_page	= gpmi_ecc_write_page;
-	chip->ecc.read_oob	= gpmi_ecc_read_oob;
-	chip->ecc.write_oob	= gpmi_ecc_write_oob;
-	chip->scan_bbt		= gpmi_scan_bbt;
 	chip->badblock_pattern	= &gpmi_bbt_descr;
 	chip->block_markbad	= gpmi_block_markbad;
 	chip->options		|= NAND_NO_SUBPAGE_WRITE;
-	chip->ecc.mode		= NAND_ECC_HW;
-	chip->ecc.size		= 1;
-	chip->ecc.strength	= 8;
-	chip->ecc.layout	= &gpmi_hw_ecclayout;
 	if (of_get_nand_on_flash_bbt(this->dev->of_node))
 		chip->bbt_options |= NAND_BBT_USE_FLASH | NAND_BBT_NO_OOB;
 
-	/* Allocate a temporary DMA buffer for reading ID in the nand_scan() */
+	/*
+	 * Allocate a temporary DMA buffer for reading ID in the
+	 * nand_scan_ident().
+	 */
 	this->bch_geometry.payload_size = 1024;
 	this->bch_geometry.auxiliary_size = 128;
 	ret = gpmi_alloc_dma_buffer(this);
 	if (ret)
 		goto err_out;
 
-	ret = nand_scan(mtd, 1);
-	if (ret) {
-		pr_err("Chip scan failed\n");
+	ret = nand_scan_ident(mtd, 1, NULL);
+	if (ret)
 		goto err_out;
-	}
+
+	ret = gpmi_init_last(this);
+	if (ret)
+		goto err_out;
+
+	ret = nand_scan_tail(mtd);
+	if (ret)
+		goto err_out;
 
 	ppdata.of_node = this->pdev->dev.of_node;
 	ret = mtd_device_parse_register(mtd, NULL, &ppdata, NULL, 0);
@@ -1601,7 +1719,7 @@
 		pdev->id_entry = of_id->data;
 	} else {
 		pr_err("Failed to find the right device id.\n");
-		return -ENOMEM;
+		return -ENODEV;
 	}
 
 	this = kzalloc(sizeof(*this), GFP_KERNEL);
@@ -1633,7 +1751,6 @@
 exit_nfc_init:
 	release_resources(this);
 exit_acquire_resources:
-	platform_set_drvdata(pdev, NULL);
 	dev_err(this->dev, "driver registration failed: %d\n", ret);
 	kfree(this);
 
@@ -1646,7 +1763,6 @@
 
 	gpmi_nfc_exit(this);
 	release_resources(this);
-	platform_set_drvdata(pdev, NULL);
 	kfree(this);
 	return 0;
 }
diff --git a/drivers/mtd/nand/jz4740_nand.c b/drivers/mtd/nand/jz4740_nand.c
index b76460e..a264b88 100644
--- a/drivers/mtd/nand/jz4740_nand.c
+++ b/drivers/mtd/nand/jz4740_nand.c
@@ -411,7 +411,7 @@
 	struct jz_nand *nand;
 	struct nand_chip *chip;
 	struct mtd_info *mtd;
-	struct jz_nand_platform_data *pdata = pdev->dev.platform_data;
+	struct jz_nand_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	size_t chipnr, bank_idx;
 	uint8_t nand_maf_id = 0, nand_dev_id = 0;
 
@@ -538,7 +538,6 @@
 err_gpio_busy:
 	if (pdata && gpio_is_valid(pdata->busy_gpio))
 		gpio_free(pdata->busy_gpio);
-	platform_set_drvdata(pdev, NULL);
 err_iounmap_mmio:
 	jz_nand_iounmap_resource(nand->mem, nand->base);
 err_free:
@@ -549,7 +548,7 @@
 static int jz_nand_remove(struct platform_device *pdev)
 {
 	struct jz_nand *nand = platform_get_drvdata(pdev);
-	struct jz_nand_platform_data *pdata = pdev->dev.platform_data;
+	struct jz_nand_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	size_t i;
 
 	nand_release(&nand->mtd);
@@ -570,7 +569,6 @@
 
 	jz_nand_iounmap_resource(nand->mem, nand->base);
 
-	platform_set_drvdata(pdev, NULL);
 	kfree(nand);
 
 	return 0;
diff --git a/drivers/mtd/nand/lpc32xx_mlc.c b/drivers/mtd/nand/lpc32xx_mlc.c
index fd1df5e..f4dd2a8 100644
--- a/drivers/mtd/nand/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/lpc32xx_mlc.c
@@ -696,7 +696,7 @@
 	}
 	lpc32xx_wp_disable(host);
 
-	host->pdata = pdev->dev.platform_data;
+	host->pdata = dev_get_platdata(&pdev->dev);
 
 	nand_chip->priv = host;		/* link the private data structures */
 	mtd->priv = nand_chip;
@@ -828,7 +828,6 @@
 err_exit2:
 	clk_disable(host->clk);
 	clk_put(host->clk);
-	platform_set_drvdata(pdev, NULL);
 err_exit1:
 	lpc32xx_wp_enable(host);
 	gpio_free(host->ncfg->wp_gpio);
@@ -851,7 +850,6 @@
 
 	clk_disable(host->clk);
 	clk_put(host->clk);
-	platform_set_drvdata(pdev, NULL);
 
 	lpc32xx_wp_enable(host);
 	gpio_free(host->ncfg->wp_gpio);
diff --git a/drivers/mtd/nand/lpc32xx_slc.c b/drivers/mtd/nand/lpc32xx_slc.c
index be94ed5..add7570 100644
--- a/drivers/mtd/nand/lpc32xx_slc.c
+++ b/drivers/mtd/nand/lpc32xx_slc.c
@@ -798,7 +798,7 @@
 	}
 	lpc32xx_wp_disable(host);
 
-	host->pdata = pdev->dev.platform_data;
+	host->pdata = dev_get_platdata(&pdev->dev);
 
 	mtd = &host->mtd;
 	chip = &host->nand_chip;
@@ -936,7 +936,6 @@
 err_exit2:
 	clk_disable(host->clk);
 	clk_put(host->clk);
-	platform_set_drvdata(pdev, NULL);
 err_exit1:
 	lpc32xx_wp_enable(host);
 	gpio_free(host->ncfg->wp_gpio);
@@ -963,7 +962,6 @@
 
 	clk_disable(host->clk);
 	clk_put(host->clk);
-	platform_set_drvdata(pdev, NULL);
 	lpc32xx_wp_enable(host);
 	gpio_free(host->ncfg->wp_gpio);
 
diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
index 07e5784..ce8242b 100644
--- a/drivers/mtd/nand/mxc_nand.c
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -266,7 +266,7 @@
 	}
 };
 
-static const char const *part_probes[] = {
+static const char * const part_probes[] = {
 	"cmdlinepart", "RedBoot", "ofpart", NULL };
 
 static void memcpy32_fromio(void *trg, const void __iomem  *src, size_t size)
@@ -1432,7 +1432,8 @@
 
 	err = mxcnd_probe_dt(host);
 	if (err > 0) {
-		struct mxc_nand_platform_data *pdata = pdev->dev.platform_data;
+		struct mxc_nand_platform_data *pdata =
+					dev_get_platdata(&pdev->dev);
 		if (pdata) {
 			host->pdata = *pdata;
 			host->devtype_data = (struct mxc_nand_devtype_data *)
@@ -1446,8 +1447,6 @@
 
 	if (host->devtype_data->needs_ip) {
 		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-		if (!res)
-			return -ENODEV;
 		host->regs_ip = devm_ioremap_resource(&pdev->dev, res);
 		if (IS_ERR(host->regs_ip))
 			return PTR_ERR(host->regs_ip);
@@ -1457,9 +1456,6 @@
 		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	}
 
-	if (!res)
-		return -ENODEV;
-
 	host->base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(host->base))
 		return PTR_ERR(host->base);
@@ -1578,8 +1574,6 @@
 {
 	struct mxc_nand_host *host = platform_get_drvdata(pdev);
 
-	platform_set_drvdata(pdev, NULL);
-
 	nand_release(&host->mtd);
 
 	return 0;
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index dfcd0a5..7ed4841 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -108,13 +108,13 @@
 	int ret = 0;
 
 	/* Start address must align on block boundary */
-	if (ofs & ((1 << chip->phys_erase_shift) - 1)) {
+	if (ofs & ((1ULL << chip->phys_erase_shift) - 1)) {
 		pr_debug("%s: unaligned address\n", __func__);
 		ret = -EINVAL;
 	}
 
 	/* Length must align on block boundary */
-	if (len & ((1 << chip->phys_erase_shift) - 1)) {
+	if (len & ((1ULL << chip->phys_erase_shift) - 1)) {
 		pr_debug("%s: length not block aligned\n", __func__);
 		ret = -EINVAL;
 	}
@@ -211,11 +211,9 @@
  */
 static void nand_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
 {
-	int i;
 	struct nand_chip *chip = mtd->priv;
 
-	for (i = 0; i < len; i++)
-		writeb(buf[i], chip->IO_ADDR_W);
+	iowrite8_rep(chip->IO_ADDR_W, buf, len);
 }
 
 /**
@@ -228,11 +226,9 @@
  */
 static void nand_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
 {
-	int i;
 	struct nand_chip *chip = mtd->priv;
 
-	for (i = 0; i < len; i++)
-		buf[i] = readb(chip->IO_ADDR_R);
+	ioread8_rep(chip->IO_ADDR_R, buf, len);
 }
 
 /**
@@ -245,14 +241,10 @@
  */
 static void nand_write_buf16(struct mtd_info *mtd, const uint8_t *buf, int len)
 {
-	int i;
 	struct nand_chip *chip = mtd->priv;
 	u16 *p = (u16 *) buf;
-	len >>= 1;
 
-	for (i = 0; i < len; i++)
-		writew(p[i], chip->IO_ADDR_W);
-
+	iowrite16_rep(chip->IO_ADDR_W, p, len >> 1);
 }
 
 /**
@@ -265,13 +257,10 @@
  */
 static void nand_read_buf16(struct mtd_info *mtd, uint8_t *buf, int len)
 {
-	int i;
 	struct nand_chip *chip = mtd->priv;
 	u16 *p = (u16 *) buf;
-	len >>= 1;
 
-	for (i = 0; i < len; i++)
-		p[i] = readw(chip->IO_ADDR_R);
+	ioread16_rep(chip->IO_ADDR_R, p, len >> 1);
 }
 
 /**
@@ -335,80 +324,88 @@
 }
 
 /**
- * nand_default_block_markbad - [DEFAULT] mark a block bad
+ * nand_default_block_markbad - [DEFAULT] mark a block bad via bad block marker
  * @mtd: MTD device structure
  * @ofs: offset from device start
  *
  * This is the default implementation, which can be overridden by a hardware
- * specific driver. We try operations in the following order, according to our
- * bbt_options (NAND_BBT_NO_OOB_BBM and NAND_BBT_USE_FLASH):
- *  (1) erase the affected block, to allow OOB marker to be written cleanly
- *  (2) update in-memory BBT
- *  (3) write bad block marker to OOB area of affected block
- *  (4) update flash-based BBT
- * Note that we retain the first error encountered in (3) or (4), finish the
- * procedures, and dump the error in the end.
-*/
+ * specific driver. It provides the details for writing a bad block marker to a
+ * block.
+ */
 static int nand_default_block_markbad(struct mtd_info *mtd, loff_t ofs)
 {
 	struct nand_chip *chip = mtd->priv;
+	struct mtd_oob_ops ops;
 	uint8_t buf[2] = { 0, 0 };
-	int block, res, ret = 0, i = 0;
-	int write_oob = !(chip->bbt_options & NAND_BBT_NO_OOB_BBM);
+	int ret = 0, res, i = 0;
 
-	if (write_oob) {
+	ops.datbuf = NULL;
+	ops.oobbuf = buf;
+	ops.ooboffs = chip->badblockpos;
+	if (chip->options & NAND_BUSWIDTH_16) {
+		ops.ooboffs &= ~0x01;
+		ops.len = ops.ooblen = 2;
+	} else {
+		ops.len = ops.ooblen = 1;
+	}
+	ops.mode = MTD_OPS_PLACE_OOB;
+
+	/* Write to first/last page(s) if necessary */
+	if (chip->bbt_options & NAND_BBT_SCANLASTPAGE)
+		ofs += mtd->erasesize - mtd->writesize;
+	do {
+		res = nand_do_write_oob(mtd, ofs, &ops);
+		if (!ret)
+			ret = res;
+
+		i++;
+		ofs += mtd->writesize;
+	} while ((chip->bbt_options & NAND_BBT_SCAN2NDPAGE) && i < 2);
+
+	return ret;
+}
+
+/**
+ * nand_block_markbad_lowlevel - mark a block bad
+ * @mtd: MTD device structure
+ * @ofs: offset from device start
+ *
+ * This function performs the generic NAND bad block marking steps (i.e., bad
+ * block table(s) and/or marker(s)). We only allow the hardware driver to
+ * specify how to write bad block markers to OOB (chip->block_markbad).
+ *
+ * We try operations in the following order:
+ *  (1) erase the affected block, to allow OOB marker to be written cleanly
+ *  (2) write bad block marker to OOB area of affected block (unless flag
+ *      NAND_BBT_NO_OOB_BBM is present)
+ *  (3) update the BBT
+ * Note that we retain the first error encountered in (2) or (3), finish the
+ * procedures, and dump the error in the end.
+*/
+static int nand_block_markbad_lowlevel(struct mtd_info *mtd, loff_t ofs)
+{
+	struct nand_chip *chip = mtd->priv;
+	int res, ret = 0;
+
+	if (!(chip->bbt_options & NAND_BBT_NO_OOB_BBM)) {
 		struct erase_info einfo;
 
 		/* Attempt erase before marking OOB */
 		memset(&einfo, 0, sizeof(einfo));
 		einfo.mtd = mtd;
 		einfo.addr = ofs;
-		einfo.len = 1 << chip->phys_erase_shift;
+		einfo.len = 1ULL << chip->phys_erase_shift;
 		nand_erase_nand(mtd, &einfo, 0);
-	}
 
-	/* Get block number */
-	block = (int)(ofs >> chip->bbt_erase_shift);
-	/* Mark block bad in memory-based BBT */
-	if (chip->bbt)
-		chip->bbt[block >> 2] |= 0x01 << ((block & 0x03) << 1);
-
-	/* Write bad block marker to OOB */
-	if (write_oob) {
-		struct mtd_oob_ops ops;
-		loff_t wr_ofs = ofs;
-
+		/* Write bad block marker to OOB */
 		nand_get_device(mtd, FL_WRITING);
-
-		ops.datbuf = NULL;
-		ops.oobbuf = buf;
-		ops.ooboffs = chip->badblockpos;
-		if (chip->options & NAND_BUSWIDTH_16) {
-			ops.ooboffs &= ~0x01;
-			ops.len = ops.ooblen = 2;
-		} else {
-			ops.len = ops.ooblen = 1;
-		}
-		ops.mode = MTD_OPS_PLACE_OOB;
-
-		/* Write to first/last page(s) if necessary */
-		if (chip->bbt_options & NAND_BBT_SCANLASTPAGE)
-			wr_ofs += mtd->erasesize - mtd->writesize;
-		do {
-			res = nand_do_write_oob(mtd, wr_ofs, &ops);
-			if (!ret)
-				ret = res;
-
-			i++;
-			wr_ofs += mtd->writesize;
-		} while ((chip->bbt_options & NAND_BBT_SCAN2NDPAGE) && i < 2);
-
+		ret = chip->block_markbad(mtd, ofs);
 		nand_release_device(mtd);
 	}
 
-	/* Update flash-based bad block table */
-	if (chip->bbt_options & NAND_BBT_USE_FLASH) {
-		res = nand_update_bbt(mtd, ofs);
+	/* Mark block bad in BBT */
+	if (chip->bbt) {
+		res = nand_markbad_bbt(mtd, ofs);
 		if (!ret)
 			ret = res;
 	}
@@ -1983,13 +1980,14 @@
  * nand_write_subpage_hwecc - [REPLACABLE] hardware ECC based subpage write
  * @mtd:	mtd info structure
  * @chip:	nand chip info structure
- * @column:	column address of subpage within the page
+ * @offset:	column address of subpage within the page
  * @data_len:	data length
+ * @buf:	data buffer
  * @oob_required: must write chip->oob_poi to OOB
  */
 static int nand_write_subpage_hwecc(struct mtd_info *mtd,
 				struct nand_chip *chip, uint32_t offset,
-				uint32_t data_len, const uint8_t *data_buf,
+				uint32_t data_len, const uint8_t *buf,
 				int oob_required)
 {
 	uint8_t *oob_buf  = chip->oob_poi;
@@ -2008,20 +2006,20 @@
 		chip->ecc.hwctl(mtd, NAND_ECC_WRITE);
 
 		/* write data (untouched subpages already masked by 0xFF) */
-		chip->write_buf(mtd, data_buf, ecc_size);
+		chip->write_buf(mtd, buf, ecc_size);
 
 		/* mask ECC of un-touched subpages by padding 0xFF */
 		if ((step < start_step) || (step > end_step))
 			memset(ecc_calc, 0xff, ecc_bytes);
 		else
-			chip->ecc.calculate(mtd, data_buf, ecc_calc);
+			chip->ecc.calculate(mtd, buf, ecc_calc);
 
 		/* mask OOB of un-touched subpages by padding 0xFF */
 		/* if oob_required, preserve OOB metadata of written subpage */
 		if (!oob_required || (step < start_step) || (step > end_step))
 			memset(oob_buf, 0xff, oob_bytes);
 
-		data_buf += ecc_size;
+		buf += ecc_size;
 		ecc_calc += ecc_bytes;
 		oob_buf  += oob_bytes;
 	}
@@ -2633,7 +2631,7 @@
 		}
 
 		/* Increment page address and decrement length */
-		len -= (1 << chip->phys_erase_shift);
+		len -= (1ULL << chip->phys_erase_shift);
 		page += pages_per_block;
 
 		/* Check, if we cross a chip boundary */
@@ -2694,7 +2692,6 @@
  */
 static int nand_block_markbad(struct mtd_info *mtd, loff_t ofs)
 {
-	struct nand_chip *chip = mtd->priv;
 	int ret;
 
 	ret = nand_block_isbad(mtd, ofs);
@@ -2705,7 +2702,7 @@
 		return ret;
 	}
 
-	return chip->block_markbad(mtd, ofs);
+	return nand_block_markbad_lowlevel(mtd, ofs);
 }
 
 /**
@@ -2720,7 +2717,9 @@
 {
 	int status;
 
-	if (!chip->onfi_version)
+	if (!chip->onfi_version ||
+	    !(le16_to_cpu(chip->onfi_params.opt_cmd)
+	      & ONFI_OPT_CMD_SET_GET_FEATURES))
 		return -EINVAL;
 
 	chip->cmdfunc(mtd, NAND_CMD_SET_FEATURES, addr, -1);
@@ -2741,7 +2740,9 @@
 static int nand_onfi_get_features(struct mtd_info *mtd, struct nand_chip *chip,
 			int addr, uint8_t *subfeature_param)
 {
-	if (!chip->onfi_version)
+	if (!chip->onfi_version ||
+	    !(le16_to_cpu(chip->onfi_params.opt_cmd)
+	      & ONFI_OPT_CMD_SET_GET_FEATURES))
 		return -EINVAL;
 
 	/* clear the sub feature parameters */
@@ -2793,7 +2794,15 @@
 
 	if (!chip->select_chip)
 		chip->select_chip = nand_select_chip;
-	if (!chip->read_byte)
+
+	/* set for ONFI nand */
+	if (!chip->onfi_set_features)
+		chip->onfi_set_features = nand_onfi_set_features;
+	if (!chip->onfi_get_features)
+		chip->onfi_get_features = nand_onfi_get_features;
+
+	/* If called twice, pointers that depend on busw may need to be reset */
+	if (!chip->read_byte || chip->read_byte == nand_read_byte)
 		chip->read_byte = busw ? nand_read_byte16 : nand_read_byte;
 	if (!chip->read_word)
 		chip->read_word = nand_read_word;
@@ -2801,9 +2810,9 @@
 		chip->block_bad = nand_block_bad;
 	if (!chip->block_markbad)
 		chip->block_markbad = nand_default_block_markbad;
-	if (!chip->write_buf)
+	if (!chip->write_buf || chip->write_buf == nand_write_buf)
 		chip->write_buf = busw ? nand_write_buf16 : nand_write_buf;
-	if (!chip->read_buf)
+	if (!chip->read_buf || chip->read_buf == nand_read_buf)
 		chip->read_buf = busw ? nand_read_buf16 : nand_read_buf;
 	if (!chip->scan_bbt)
 		chip->scan_bbt = nand_default_bbt;
@@ -2846,6 +2855,78 @@
 	return crc;
 }
 
+/* Parse the Extended Parameter Page. */
+static int nand_flash_detect_ext_param_page(struct mtd_info *mtd,
+		struct nand_chip *chip, struct nand_onfi_params *p)
+{
+	struct onfi_ext_param_page *ep;
+	struct onfi_ext_section *s;
+	struct onfi_ext_ecc_info *ecc;
+	uint8_t *cursor;
+	int ret = -EINVAL;
+	int len;
+	int i;
+
+	len = le16_to_cpu(p->ext_param_page_length) * 16;
+	ep = kmalloc(len, GFP_KERNEL);
+	if (!ep) {
+		ret = -ENOMEM;
+		goto ext_out;
+	}
+
+	/* Send our own NAND_CMD_PARAM. */
+	chip->cmdfunc(mtd, NAND_CMD_PARAM, 0, -1);
+
+	/* Use the Change Read Column command to skip the ONFI param pages. */
+	chip->cmdfunc(mtd, NAND_CMD_RNDOUT,
+			sizeof(*p) * p->num_of_param_pages , -1);
+
+	/* Read out the Extended Parameter Page. */
+	chip->read_buf(mtd, (uint8_t *)ep, len);
+	if ((onfi_crc16(ONFI_CRC_BASE, ((uint8_t *)ep) + 2, len - 2)
+		!= le16_to_cpu(ep->crc))) {
+		pr_debug("fail in the CRC.\n");
+		goto ext_out;
+	}
+
+	/*
+	 * Check the signature.
+	 * Do not strictly follow the ONFI spec, maybe changed in future.
+	 */
+	if (strncmp(ep->sig, "EPPS", 4)) {
+		pr_debug("The signature is invalid.\n");
+		goto ext_out;
+	}
+
+	/* find the ECC section. */
+	cursor = (uint8_t *)(ep + 1);
+	for (i = 0; i < ONFI_EXT_SECTION_MAX; i++) {
+		s = ep->sections + i;
+		if (s->type == ONFI_SECTION_TYPE_2)
+			break;
+		cursor += s->length * 16;
+	}
+	if (i == ONFI_EXT_SECTION_MAX) {
+		pr_debug("We can not find the ECC section.\n");
+		goto ext_out;
+	}
+
+	/* get the info we want. */
+	ecc = (struct onfi_ext_ecc_info *)cursor;
+
+	if (ecc->codeword_size) {
+		chip->ecc_strength_ds = ecc->ecc_bits;
+		chip->ecc_step_ds = 1 << ecc->codeword_size;
+	}
+
+	pr_info("ONFI extended param page detected.\n");
+	return 0;
+
+ext_out:
+	kfree(ep);
+	return ret;
+}
+
 /*
  * Check if the NAND chip is ONFI compliant, returns 1 if it is, 0 otherwise.
  */
@@ -2907,9 +2988,31 @@
 	mtd->oobsize = le16_to_cpu(p->spare_bytes_per_page);
 	chip->chipsize = le32_to_cpu(p->blocks_per_lun);
 	chip->chipsize *= (uint64_t)mtd->erasesize * p->lun_count;
-	*busw = 0;
-	if (le16_to_cpu(p->features) & 1)
+
+	if (onfi_feature(chip) & ONFI_FEATURE_16_BIT_BUS)
 		*busw = NAND_BUSWIDTH_16;
+	else
+		*busw = 0;
+
+	if (p->ecc_bits != 0xff) {
+		chip->ecc_strength_ds = p->ecc_bits;
+		chip->ecc_step_ds = 512;
+	} else if (chip->onfi_version >= 21 &&
+		(onfi_feature(chip) & ONFI_FEATURE_EXT_PARAM_PAGE)) {
+
+		/*
+		 * The nand_flash_detect_ext_param_page() uses the
+		 * Change Read Column command which maybe not supported
+		 * by the chip->cmdfunc. So try to update the chip->cmdfunc
+		 * now. We do not replace user supplied command function.
+		 */
+		if (mtd->writesize > 512 && chip->cmdfunc == nand_command)
+			chip->cmdfunc = nand_command_lp;
+
+		/* The Extended Parameter Page is supported since ONFI 2.1. */
+		if (nand_flash_detect_ext_param_page(mtd, chip, p))
+			pr_info("Failed to detect the extended param page.\n");
+	}
 
 	pr_info("ONFI flash detected\n");
 	return 1;
@@ -3086,6 +3189,22 @@
 		extid >>= 2;
 		/* Get buswidth information */
 		*busw = (extid & 0x01) ? NAND_BUSWIDTH_16 : 0;
+
+		/*
+		 * Toshiba 24nm raw SLC (i.e., not BENAND) have 32B OOB per
+		 * 512B page. For Toshiba SLC, we decode the 5th/6th byte as
+		 * follows:
+		 * - ID byte 6, bits[2:0]: 100b -> 43nm, 101b -> 32nm,
+		 *                         110b -> 24nm
+		 * - ID byte 5, bit[7]:    1 -> BENAND, 0 -> raw SLC
+		 */
+		if (id_len >= 6 && id_data[0] == NAND_MFR_TOSHIBA &&
+				!(chip->cellinfo & NAND_CI_CELLTYPE_MSK) &&
+				(id_data[5] & 0x7) == 0x6 /* 24nm */ &&
+				!(id_data[4] & 0x80) /* !BENAND */) {
+			mtd->oobsize = 32 * mtd->writesize >> 9;
+		}
+
 	}
 }
 
@@ -3172,6 +3291,8 @@
 		chip->cellinfo = id_data[2];
 		chip->chipsize = (uint64_t)type->chipsize << 20;
 		chip->options |= type->options;
+		chip->ecc_strength_ds = NAND_ECC_STRENGTH(type);
+		chip->ecc_step_ds = NAND_ECC_STEP(type);
 
 		*busw = type->options & NAND_BUSWIDTH_16;
 
@@ -3446,12 +3567,6 @@
 	if (!chip->write_page)
 		chip->write_page = nand_write_page;
 
-	/* set for ONFI nand */
-	if (!chip->onfi_set_features)
-		chip->onfi_set_features = nand_onfi_set_features;
-	if (!chip->onfi_get_features)
-		chip->onfi_get_features = nand_onfi_get_features;
-
 	/*
 	 * Check ECC mode, default to software if 3byte/512byte hardware ECC is
 	 * selected and we have 256 byte pagesize fallback to software ECC
@@ -3674,6 +3789,7 @@
 	/* propagate ecc info to mtd_info */
 	mtd->ecclayout = chip->ecc.layout;
 	mtd->ecc_strength = chip->ecc.strength;
+	mtd->ecc_step_size = chip->ecc.size;
 	/*
 	 * Initialize bitflip_threshold to its default prior scan_bbt() call.
 	 * scan_bbt() might invoke mtd_read(), thus bitflip_threshold must be
diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
index 2672643..bc06196 100644
--- a/drivers/mtd/nand/nand_bbt.c
+++ b/drivers/mtd/nand/nand_bbt.c
@@ -71,6 +71,30 @@
 #include <linux/export.h>
 #include <linux/string.h>
 
+#define BBT_BLOCK_GOOD		0x00
+#define BBT_BLOCK_WORN		0x01
+#define BBT_BLOCK_RESERVED	0x02
+#define BBT_BLOCK_FACTORY_BAD	0x03
+
+#define BBT_ENTRY_MASK		0x03
+#define BBT_ENTRY_SHIFT		2
+
+static int nand_update_bbt(struct mtd_info *mtd, loff_t offs);
+
+static inline uint8_t bbt_get_entry(struct nand_chip *chip, int block)
+{
+	uint8_t entry = chip->bbt[block >> BBT_ENTRY_SHIFT];
+	entry >>= (block & BBT_ENTRY_MASK) * 2;
+	return entry & BBT_ENTRY_MASK;
+}
+
+static inline void bbt_mark_entry(struct nand_chip *chip, int block,
+		uint8_t mark)
+{
+	uint8_t msk = (mark & BBT_ENTRY_MASK) << ((block & BBT_ENTRY_MASK) * 2);
+	chip->bbt[block >> BBT_ENTRY_SHIFT] |= msk;
+}
+
 static int check_pattern_no_oob(uint8_t *buf, struct nand_bbt_descr *td)
 {
 	if (memcmp(buf, td->pattern, td->len))
@@ -86,33 +110,17 @@
  * @td: search pattern descriptor
  *
  * Check for a pattern at the given place. Used to search bad block tables and
- * good / bad block identifiers. If the SCAN_EMPTY option is set then check, if
- * all bytes except the pattern area contain 0xff.
+ * good / bad block identifiers.
  */
 static int check_pattern(uint8_t *buf, int len, int paglen, struct nand_bbt_descr *td)
 {
-	int end = 0;
-	uint8_t *p = buf;
-
 	if (td->options & NAND_BBT_NO_OOB)
 		return check_pattern_no_oob(buf, td);
 
-	end = paglen + td->offs;
-	if (td->options & NAND_BBT_SCANEMPTY)
-		if (memchr_inv(p, 0xff, end))
-			return -1;
-	p += end;
-
 	/* Compare the pattern */
-	if (memcmp(p, td->pattern, td->len))
+	if (memcmp(buf + paglen + td->offs, td->pattern, td->len))
 		return -1;
 
-	if (td->options & NAND_BBT_SCANEMPTY) {
-		p += td->len;
-		end += td->len;
-		if (memchr_inv(p, 0xff, len - end))
-			return -1;
-	}
 	return 0;
 }
 
@@ -159,7 +167,7 @@
  * @page: the starting page
  * @num: the number of bbt descriptors to read
  * @td: the bbt describtion table
- * @offs: offset in the memory table
+ * @offs: block number offset in the table
  *
  * Read the bad block table starting from page.
  */
@@ -209,14 +217,16 @@
 		/* Analyse data */
 		for (i = 0; i < len; i++) {
 			uint8_t dat = buf[i];
-			for (j = 0; j < 8; j += bits, act += 2) {
+			for (j = 0; j < 8; j += bits, act++) {
 				uint8_t tmp = (dat >> j) & msk;
 				if (tmp == msk)
 					continue;
 				if (reserved_block_code && (tmp == reserved_block_code)) {
 					pr_info("nand_read_bbt: reserved block at 0x%012llx\n",
-						 (loff_t)((offs << 2) + (act >> 1)) << this->bbt_erase_shift);
-					this->bbt[offs + (act >> 3)] |= 0x2 << (act & 0x06);
+						 (loff_t)(offs + act) <<
+						 this->bbt_erase_shift);
+					bbt_mark_entry(this, offs + act,
+							BBT_BLOCK_RESERVED);
 					mtd->ecc_stats.bbtblocks++;
 					continue;
 				}
@@ -225,12 +235,15 @@
 				 * move this message to pr_debug.
 				 */
 				pr_info("nand_read_bbt: bad block at 0x%012llx\n",
-					 (loff_t)((offs << 2) + (act >> 1)) << this->bbt_erase_shift);
+					 (loff_t)(offs + act) <<
+					 this->bbt_erase_shift);
 				/* Factory marked bad or worn out? */
 				if (tmp == 0)
-					this->bbt[offs + (act >> 3)] |= 0x3 << (act & 0x06);
+					bbt_mark_entry(this, offs + act,
+							BBT_BLOCK_FACTORY_BAD);
 				else
-					this->bbt[offs + (act >> 3)] |= 0x1 << (act & 0x06);
+					bbt_mark_entry(this, offs + act,
+							BBT_BLOCK_WORN);
 				mtd->ecc_stats.badblocks++;
 			}
 		}
@@ -265,7 +278,7 @@
 					td, offs);
 			if (res)
 				return res;
-			offs += this->chipsize >> (this->bbt_erase_shift + 2);
+			offs += this->chipsize >> this->bbt_erase_shift;
 		}
 	} else {
 		res = read_bbt(mtd, buf, td->pages[0],
@@ -478,22 +491,12 @@
 	else
 		numpages = 1;
 
-	if (!(bd->options & NAND_BBT_SCANEMPTY)) {
-		/* We need only read few bytes from the OOB area */
-		scanlen = 0;
-		readlen = bd->len;
-	} else {
-		/* Full page content should be read */
-		scanlen = mtd->writesize + mtd->oobsize;
-		readlen = numpages * mtd->writesize;
-	}
+	/* We need only read few bytes from the OOB area */
+	scanlen = 0;
+	readlen = bd->len;
 
 	if (chip == -1) {
-		/*
-		 * Note that numblocks is 2 * (real numblocks) here, see i+=2
-		 * below as it makes shifting and masking less painful
-		 */
-		numblocks = mtd->size >> (this->bbt_erase_shift - 1);
+		numblocks = mtd->size >> this->bbt_erase_shift;
 		startblock = 0;
 		from = 0;
 	} else {
@@ -502,16 +505,16 @@
 			       chip + 1, this->numchips);
 			return -EINVAL;
 		}
-		numblocks = this->chipsize >> (this->bbt_erase_shift - 1);
+		numblocks = this->chipsize >> this->bbt_erase_shift;
 		startblock = chip * numblocks;
 		numblocks += startblock;
-		from = (loff_t)startblock << (this->bbt_erase_shift - 1);
+		from = (loff_t)startblock << this->bbt_erase_shift;
 	}
 
 	if (this->bbt_options & NAND_BBT_SCANLASTPAGE)
 		from += mtd->erasesize - (mtd->writesize * numpages);
 
-	for (i = startblock; i < numblocks;) {
+	for (i = startblock; i < numblocks; i++) {
 		int ret;
 
 		BUG_ON(bd->options & NAND_BBT_NO_OOB);
@@ -526,13 +529,12 @@
 			return ret;
 
 		if (ret) {
-			this->bbt[i >> 3] |= 0x03 << (i & 0x6);
+			bbt_mark_entry(this, i, BBT_BLOCK_FACTORY_BAD);
 			pr_warn("Bad eraseblock %d at 0x%012llx\n",
-				i >> 1, (unsigned long long)from);
+				i, (unsigned long long)from);
 			mtd->ecc_stats.badblocks++;
 		}
 
-		i += 2;
 		from += (1 << this->bbt_erase_shift);
 	}
 	return 0;
@@ -655,9 +657,9 @@
 {
 	struct nand_chip *this = mtd->priv;
 	struct erase_info einfo;
-	int i, j, res, chip = 0;
+	int i, res, chip = 0;
 	int bits, startblock, dir, page, offs, numblocks, sft, sftmsk;
-	int nrchips, bbtoffs, pageoffs, ooboffs;
+	int nrchips, pageoffs, ooboffs;
 	uint8_t msk[4];
 	uint8_t rcode = td->reserved_block_code;
 	size_t retlen, len = 0;
@@ -713,10 +715,9 @@
 		for (i = 0; i < td->maxblocks; i++) {
 			int block = startblock + dir * i;
 			/* Check, if the block is bad */
-			switch ((this->bbt[block >> 2] >>
-				 (2 * (block & 0x03))) & 0x03) {
-			case 0x01:
-			case 0x03:
+			switch (bbt_get_entry(this, block)) {
+			case BBT_BLOCK_WORN:
+			case BBT_BLOCK_FACTORY_BAD:
 				continue;
 			}
 			page = block <<
@@ -748,8 +749,6 @@
 		default: return -EINVAL;
 		}
 
-		bbtoffs = chip * (numblocks >> 2);
-
 		to = ((loff_t)page) << this->page_shift;
 
 		/* Must we save the block contents? */
@@ -814,16 +813,12 @@
 			buf[ooboffs + td->veroffs] = td->version[chip];
 
 		/* Walk through the memory table */
-		for (i = 0; i < numblocks;) {
+		for (i = 0; i < numblocks; i++) {
 			uint8_t dat;
-			dat = this->bbt[bbtoffs + (i >> 2)];
-			for (j = 0; j < 4; j++, i++) {
-				int sftcnt = (i << (3 - sft)) & sftmsk;
-				/* Do not store the reserved bbt blocks! */
-				buf[offs + (i >> sft)] &=
-					~(msk[dat & 0x03] << sftcnt);
-				dat >>= 2;
-			}
+			int sftcnt = (i << (3 - sft)) & sftmsk;
+			dat = bbt_get_entry(this, chip * numblocks + i);
+			/* Do not store the reserved bbt blocks! */
+			buf[offs + (i >> sft)] &= ~(msk[dat] << sftcnt);
 		}
 
 		memset(&einfo, 0, sizeof(einfo));
@@ -865,7 +860,6 @@
 {
 	struct nand_chip *this = mtd->priv;
 
-	bd->options &= ~NAND_BBT_SCANEMPTY;
 	return create_bbt(mtd, this->buffers->databuf, bd, -1);
 }
 
@@ -1009,7 +1003,7 @@
 {
 	struct nand_chip *this = mtd->priv;
 	int i, j, chips, block, nrblocks, update;
-	uint8_t oldval, newval;
+	uint8_t oldval;
 
 	/* Do we have a bbt per chip? */
 	if (td->options & NAND_BBT_PERCHIP) {
@@ -1026,12 +1020,12 @@
 			if (td->pages[i] == -1)
 				continue;
 			block = td->pages[i] >> (this->bbt_erase_shift - this->page_shift);
-			block <<= 1;
-			oldval = this->bbt[(block >> 3)];
-			newval = oldval | (0x2 << (block & 0x06));
-			this->bbt[(block >> 3)] = newval;
-			if ((oldval != newval) && td->reserved_block_code)
-				nand_update_bbt(mtd, (loff_t)block << (this->bbt_erase_shift - 1));
+			oldval = bbt_get_entry(this, block);
+			bbt_mark_entry(this, block, BBT_BLOCK_RESERVED);
+			if ((oldval != BBT_BLOCK_RESERVED) &&
+					td->reserved_block_code)
+				nand_update_bbt(mtd, (loff_t)block <<
+						this->bbt_erase_shift);
 			continue;
 		}
 		update = 0;
@@ -1039,14 +1033,12 @@
 			block = ((i + 1) * nrblocks) - td->maxblocks;
 		else
 			block = i * nrblocks;
-		block <<= 1;
 		for (j = 0; j < td->maxblocks; j++) {
-			oldval = this->bbt[(block >> 3)];
-			newval = oldval | (0x2 << (block & 0x06));
-			this->bbt[(block >> 3)] = newval;
-			if (oldval != newval)
+			oldval = bbt_get_entry(this, block);
+			bbt_mark_entry(this, block, BBT_BLOCK_RESERVED);
+			if (oldval != BBT_BLOCK_RESERVED)
 				update = 1;
-			block += 2;
+			block++;
 		}
 		/*
 		 * If we want reserved blocks to be recorded to flash, and some
@@ -1054,7 +1046,8 @@
 		 * bbts.  This should only happen once.
 		 */
 		if (update && td->reserved_block_code)
-			nand_update_bbt(mtd, (loff_t)(block - 2) << (this->bbt_erase_shift - 1));
+			nand_update_bbt(mtd, (loff_t)(block - 1) <<
+					this->bbt_erase_shift);
 	}
 }
 
@@ -1180,13 +1173,13 @@
 }
 
 /**
- * nand_update_bbt - [NAND Interface] update bad block table(s)
+ * nand_update_bbt - update bad block table(s)
  * @mtd: MTD device structure
  * @offs: the offset of the newly marked block
  *
  * The function updates the bad block table(s).
  */
-int nand_update_bbt(struct mtd_info *mtd, loff_t offs)
+static int nand_update_bbt(struct mtd_info *mtd, loff_t offs)
 {
 	struct nand_chip *this = mtd->priv;
 	int len, res = 0;
@@ -1356,28 +1349,47 @@
 int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt)
 {
 	struct nand_chip *this = mtd->priv;
-	int block;
-	uint8_t res;
+	int block, res;
 
-	/* Get block number * 2 */
-	block = (int)(offs >> (this->bbt_erase_shift - 1));
-	res = (this->bbt[block >> 3] >> (block & 0x06)) & 0x03;
+	block = (int)(offs >> this->bbt_erase_shift);
+	res = bbt_get_entry(this, block);
 
 	pr_debug("nand_isbad_bbt(): bbt info for offs 0x%08x: "
 			"(block %d) 0x%02x\n",
-			(unsigned int)offs, block >> 1, res);
+			(unsigned int)offs, block, res);
 
-	switch ((int)res) {
-	case 0x00:
+	switch (res) {
+	case BBT_BLOCK_GOOD:
 		return 0;
-	case 0x01:
+	case BBT_BLOCK_WORN:
 		return 1;
-	case 0x02:
+	case BBT_BLOCK_RESERVED:
 		return allowbbt ? 0 : 1;
 	}
 	return 1;
 }
 
+/**
+ * nand_markbad_bbt - [NAND Interface] Mark a block bad in the BBT
+ * @mtd: MTD device structure
+ * @offs: offset of the bad block
+ */
+int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs)
+{
+	struct nand_chip *this = mtd->priv;
+	int block, ret = 0;
+
+	block = (int)(offs >> this->bbt_erase_shift);
+
+	/* Mark bad block in memory */
+	bbt_mark_entry(this, block, BBT_BLOCK_WORN);
+
+	/* Update flash-based bad block table */
+	if (this->bbt_options & NAND_BBT_USE_FLASH)
+		ret = nand_update_bbt(mtd, offs);
+
+	return ret;
+}
+
 EXPORT_SYMBOL(nand_scan_bbt);
 EXPORT_SYMBOL(nand_default_bbt);
-EXPORT_SYMBOL_GPL(nand_update_bbt);
diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c
index 683813a..a87b0a3 100644
--- a/drivers/mtd/nand/nand_ids.c
+++ b/drivers/mtd/nand/nand_ids.c
@@ -33,16 +33,16 @@
 	 */
 	{"TC58NVG2S0F 4G 3.3V 8-bit",
 		{ .id = {0x98, 0xdc, 0x90, 0x26, 0x76, 0x15, 0x01, 0x08} },
-		  SZ_4K, SZ_512, SZ_256K, 0, 8, 224},
+		  SZ_4K, SZ_512, SZ_256K, 0, 8, 224, NAND_ECC_INFO(4, SZ_512) },
 	{"TC58NVG3S0F 8G 3.3V 8-bit",
 		{ .id = {0x98, 0xd3, 0x90, 0x26, 0x76, 0x15, 0x02, 0x08} },
-		  SZ_4K, SZ_1K, SZ_256K, 0, 8, 232},
+		  SZ_4K, SZ_1K, SZ_256K, 0, 8, 232, NAND_ECC_INFO(4, SZ_512) },
 	{"TC58NVG5D2 32G 3.3V 8-bit",
 		{ .id = {0x98, 0xd7, 0x94, 0x32, 0x76, 0x56, 0x09, 0x00} },
-		  SZ_8K, SZ_4K, SZ_1M, 0, 8, 640},
+		  SZ_8K, SZ_4K, SZ_1M, 0, 8, 640, NAND_ECC_INFO(40, SZ_1K) },
 	{"TC58NVG6D2 64G 3.3V 8-bit",
 		{ .id = {0x98, 0xde, 0x94, 0x82, 0x76, 0x56, 0x04, 0x20} },
-		  SZ_8K, SZ_8K, SZ_2M, 0, 8, 640},
+		  SZ_8K, SZ_8K, SZ_2M, 0, 8, 640, NAND_ECC_INFO(40, SZ_1K) },
 
 	LEGACY_ID_NAND("NAND 4MiB 5V 8-bit",   0x6B, 4, SZ_8K, SP_OPTIONS),
 	LEGACY_ID_NAND("NAND 4MiB 3,3V 8-bit", 0xE3, 4, SZ_8K, SP_OPTIONS),
diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index cb38f3d..bdc1d15 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -205,7 +205,7 @@
 
 /* Calculate the page offset in flash RAM image by (row, column) address */
 #define NS_RAW_OFFSET(ns) \
-	(((ns)->regs.row << (ns)->geom.pgshift) + ((ns)->regs.row * (ns)->geom.oobsz) + (ns)->regs.column)
+	(((ns)->regs.row * (ns)->geom.pgszoob) + (ns)->regs.column)
 
 /* Calculate the OOB offset in flash RAM image by (row, column) address */
 #define NS_RAW_OFFSET_OOB(ns) (NS_RAW_OFFSET(ns) + ns->geom.pgsz)
@@ -336,7 +336,6 @@
 		uint pgsec;         /* number of pages per sector */
 		uint secshift;      /* bits number in sector size */
 		uint pgshift;       /* bits number in page size */
-		uint oobshift;      /* bits number in OOB size */
 		uint pgaddrbytes;   /* bytes per page address */
 		uint secaddrbytes;  /* bytes per sector address */
 		uint idbytes;       /* the number ID bytes that this chip outputs */
@@ -363,7 +362,7 @@
 
 	/* Fields needed when using a cache file */
 	struct file *cfile; /* Open file */
-	unsigned char *pages_written; /* Which pages have been written */
+	unsigned long *pages_written; /* Which pages have been written */
 	void *file_buf;
 	struct page *held_pages[NS_MAX_HELD_PAGES];
 	int held_cnt;
@@ -586,7 +585,8 @@
 			err = -EINVAL;
 			goto err_close;
 		}
-		ns->pages_written = vzalloc(ns->geom.pgnum);
+		ns->pages_written = vzalloc(BITS_TO_LONGS(ns->geom.pgnum) *
+					    sizeof(unsigned long));
 		if (!ns->pages_written) {
 			NS_ERR("alloc_device: unable to allocate pages written array\n");
 			err = -ENOMEM;
@@ -653,9 +653,7 @@
 
 static char *get_partition_name(int i)
 {
-	char buf[64];
-	sprintf(buf, "NAND simulator partition %d", i);
-	return kstrdup(buf, GFP_KERNEL);
+	return kasprintf(GFP_KERNEL, "NAND simulator partition %d", i);
 }
 
 /*
@@ -690,7 +688,6 @@
 	ns->geom.totszoob = ns->geom.totsz + (uint64_t)ns->geom.pgnum * ns->geom.oobsz;
 	ns->geom.secshift = ffs(ns->geom.secsz) - 1;
 	ns->geom.pgshift  = chip->page_shift;
-	ns->geom.oobshift = ffs(ns->geom.oobsz) - 1;
 	ns->geom.pgsec    = ns->geom.secsz / ns->geom.pgsz;
 	ns->geom.secszoob = ns->geom.secsz + ns->geom.oobsz * ns->geom.pgsec;
 	ns->options = 0;
@@ -761,12 +758,6 @@
 		ns->nbparts += 1;
 	}
 
-	/* Detect how many ID bytes the NAND chip outputs */
-	for (i = 0; nand_flash_ids[i].name != NULL; i++) {
-		if (second_id_byte != nand_flash_ids[i].dev_id)
-			continue;
-	}
-
 	if (ns->busw == 16)
 		NS_WARN("16-bit flashes support wasn't tested\n");
 
@@ -780,7 +771,7 @@
 	printk("bus width: %u\n",               ns->busw);
 	printk("bits in sector size: %u\n",     ns->geom.secshift);
 	printk("bits in page size: %u\n",       ns->geom.pgshift);
-	printk("bits in OOB size: %u\n",	ns->geom.oobshift);
+	printk("bits in OOB size: %u\n",	ffs(ns->geom.oobsz) - 1);
 	printk("flash size with OOB: %llu KiB\n",
 			(unsigned long long)ns->geom.totszoob >> 10);
 	printk("page address bytes: %u\n",      ns->geom.pgaddrbytes);
@@ -1442,7 +1433,7 @@
 	return NS_GET_PAGE(ns)->byte + ns->regs.column + ns->regs.off;
 }
 
-int do_read_error(struct nandsim *ns, int num)
+static int do_read_error(struct nandsim *ns, int num)
 {
 	unsigned int page_no = ns->regs.row;
 
@@ -1454,7 +1445,7 @@
 	return 0;
 }
 
-void do_bit_flips(struct nandsim *ns, int num)
+static void do_bit_flips(struct nandsim *ns, int num)
 {
 	if (bitflips && prandom_u32() < (1 << 22)) {
 		int flips = 1;
@@ -1479,7 +1470,7 @@
 	union ns_mem *mypage;
 
 	if (ns->cfile) {
-		if (!ns->pages_written[ns->regs.row]) {
+		if (!test_bit(ns->regs.row, ns->pages_written)) {
 			NS_DBG("read_page: page %d not written\n", ns->regs.row);
 			memset(ns->buf.byte, 0xFF, num);
 		} else {
@@ -1490,7 +1481,7 @@
 				ns->regs.row, ns->regs.column + ns->regs.off);
 			if (do_read_error(ns, num))
 				return;
-			pos = (loff_t)ns->regs.row * ns->geom.pgszoob + ns->regs.column + ns->regs.off;
+			pos = (loff_t)NS_RAW_OFFSET(ns) + ns->regs.off;
 			tx = read_file(ns, ns->cfile, ns->buf.byte, num, pos);
 			if (tx != num) {
 				NS_ERR("read_page: read error for page %d ret %ld\n", ns->regs.row, (long)tx);
@@ -1525,9 +1516,9 @@
 
 	if (ns->cfile) {
 		for (i = 0; i < ns->geom.pgsec; i++)
-			if (ns->pages_written[ns->regs.row + i]) {
+			if (__test_and_clear_bit(ns->regs.row + i,
+						 ns->pages_written)) {
 				NS_DBG("erase_sector: freeing page %d\n", ns->regs.row + i);
-				ns->pages_written[ns->regs.row + i] = 0;
 			}
 		return;
 	}
@@ -1559,8 +1550,8 @@
 
 		NS_DBG("prog_page: writing page %d\n", ns->regs.row);
 		pg_off = ns->file_buf + ns->regs.column + ns->regs.off;
-		off = (loff_t)ns->regs.row * ns->geom.pgszoob + ns->regs.column + ns->regs.off;
-		if (!ns->pages_written[ns->regs.row]) {
+		off = (loff_t)NS_RAW_OFFSET(ns) + ns->regs.off;
+		if (!test_bit(ns->regs.row, ns->pages_written)) {
 			all = 1;
 			memset(ns->file_buf, 0xff, ns->geom.pgszoob);
 		} else {
@@ -1580,7 +1571,7 @@
 				NS_ERR("prog_page: write error for page %d ret %ld\n", ns->regs.row, (long)tx);
 				return -1;
 			}
-			ns->pages_written[ns->regs.row] = 1;
+			__set_bit(ns->regs.row, ns->pages_written);
 		} else {
 			tx = write_file(ns, ns->cfile, pg_off, num, off);
 			if (tx != num) {
diff --git a/drivers/mtd/nand/nuc900_nand.c b/drivers/mtd/nand/nuc900_nand.c
index cd6be2e..5211515 100644
--- a/drivers/mtd/nand/nuc900_nand.c
+++ b/drivers/mtd/nand/nuc900_nand.c
@@ -324,8 +324,6 @@
 
 	kfree(nuc900_nand);
 
-	platform_set_drvdata(pdev, NULL);
-
 	return 0;
 }
 
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 81b80af..4ecf0e5f 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -154,7 +154,7 @@
  */
 static uint8_t scan_ff_pattern[] = { 0xff };
 static struct nand_bbt_descr bb_descrip_flashbased = {
-	.options = NAND_BBT_SCANEMPTY | NAND_BBT_SCANALLPAGES,
+	.options = NAND_BBT_SCANALLPAGES,
 	.offs = 0,
 	.len = 1,
 	.pattern = scan_ff_pattern,
@@ -1831,7 +1831,7 @@
 	struct resource			*res;
 	struct mtd_part_parser_data	ppdata = {};
 
-	pdata = pdev->dev.platform_data;
+	pdata = dev_get_platdata(&pdev->dev);
 	if (pdata == NULL) {
 		dev_err(&pdev->dev, "platform data missing\n");
 		return -ENODEV;
@@ -2087,7 +2087,6 @@
 							mtd);
 	omap3_free_bch(&info->mtd);
 
-	platform_set_drvdata(pdev, NULL);
 	if (info->dma)
 		dma_release_channel(info->dma);
 
diff --git a/drivers/mtd/nand/orion_nand.c b/drivers/mtd/nand/orion_nand.c
index 8fbd002..a393a5b 100644
--- a/drivers/mtd/nand/orion_nand.c
+++ b/drivers/mtd/nand/orion_nand.c
@@ -130,8 +130,9 @@
 		if (!of_property_read_u32(pdev->dev.of_node,
 						"chip-delay", &val))
 			board->chip_delay = (u8)val;
-	} else
-		board = pdev->dev.platform_data;
+	} else {
+		board = dev_get_platdata(&pdev->dev);
+	}
 
 	mtd->priv = nc;
 	mtd->owner = THIS_MODULE;
@@ -186,7 +187,6 @@
 		clk_disable_unprepare(clk);
 		clk_put(clk);
 	}
-	platform_set_drvdata(pdev, NULL);
 	iounmap(io_base);
 no_res:
 	kfree(nc);
diff --git a/drivers/mtd/nand/plat_nand.c b/drivers/mtd/nand/plat_nand.c
index c004566..cad4cdc 100644
--- a/drivers/mtd/nand/plat_nand.c
+++ b/drivers/mtd/nand/plat_nand.c
@@ -30,7 +30,7 @@
  */
 static int plat_nand_probe(struct platform_device *pdev)
 {
-	struct platform_nand_data *pdata = pdev->dev.platform_data;
+	struct platform_nand_data *pdata = dev_get_platdata(&pdev->dev);
 	struct mtd_part_parser_data ppdata;
 	struct plat_nand_data *data;
 	struct resource *res;
@@ -122,7 +122,6 @@
 out:
 	if (pdata->ctrl.remove)
 		pdata->ctrl.remove(pdev);
-	platform_set_drvdata(pdev, NULL);
 	iounmap(data->io_base);
 out_release_io:
 	release_mem_region(res->start, resource_size(res));
@@ -137,7 +136,7 @@
 static int plat_nand_remove(struct platform_device *pdev)
 {
 	struct plat_nand_data *data = platform_get_drvdata(pdev);
-	struct platform_nand_data *pdata = pdev->dev.platform_data;
+	struct platform_nand_data *pdata = dev_get_platdata(&pdev->dev);
 	struct resource *res;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index dec80ca..5db900d 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -25,7 +25,14 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 
+#if defined(CONFIG_ARCH_PXA) || defined(CONFIG_ARCH_MMP)
+#define ARCH_HAS_DMA
+#endif
+
+#ifdef ARCH_HAS_DMA
 #include <mach/dma.h>
+#endif
+
 #include <linux/platform_data/mtd-nand-pxa3xx.h>
 
 #define	CHIP_DELAY_TIMEOUT	(2 * HZ/10)
@@ -80,6 +87,7 @@
 #define NDSR_RDDREQ		(0x1 << 1)
 #define NDSR_WRCMDREQ		(0x1)
 
+#define NDCB0_LEN_OVRD		(0x1 << 28)
 #define NDCB0_ST_ROW_EN         (0x1 << 26)
 #define NDCB0_AUTO_RS		(0x1 << 25)
 #define NDCB0_CSEL		(0x1 << 24)
@@ -123,9 +131,13 @@
 	STATE_READY,
 };
 
+enum pxa3xx_nand_variant {
+	PXA3XX_NAND_VARIANT_PXA,
+	PXA3XX_NAND_VARIANT_ARMADA370,
+};
+
 struct pxa3xx_nand_host {
 	struct nand_chip	chip;
-	struct pxa3xx_nand_cmdset *cmdset;
 	struct mtd_info         *mtd;
 	void			*info_data;
 
@@ -139,10 +151,6 @@
 	unsigned int		row_addr_cycles;
 	size_t			read_id_bytes;
 
-	/* cached register value */
-	uint32_t		reg_ndcr;
-	uint32_t		ndtr0cs0;
-	uint32_t		ndtr1cs0;
 };
 
 struct pxa3xx_nand_info {
@@ -171,9 +179,16 @@
 	struct pxa3xx_nand_host *host[NUM_CHIP_SELECT];
 	unsigned int		state;
 
+	/*
+	 * This driver supports NFCv1 (as found in PXA SoC)
+	 * and NFCv2 (as found in Armada 370/XP SoC).
+	 */
+	enum pxa3xx_nand_variant variant;
+
 	int			cs;
 	int			use_ecc;	/* use HW ECC ? */
 	int			use_dma;	/* use DMA ? */
+	int			use_spare;	/* use spare ? */
 	int			is_ready;
 
 	unsigned int		page_size;	/* page size of attached chip */
@@ -181,33 +196,22 @@
 	unsigned int		oob_size;
 	int 			retcode;
 
+	/* cached register value */
+	uint32_t		reg_ndcr;
+	uint32_t		ndtr0cs0;
+	uint32_t		ndtr1cs0;
+
 	/* generated NDCBx register values */
 	uint32_t		ndcb0;
 	uint32_t		ndcb1;
 	uint32_t		ndcb2;
+	uint32_t		ndcb3;
 };
 
 static bool use_dma = 1;
 module_param(use_dma, bool, 0444);
 MODULE_PARM_DESC(use_dma, "enable DMA for data transferring to/from NAND HW");
 
-/*
- * Default NAND flash controller configuration setup by the
- * bootloader. This configuration is used only when pdata->keep_config is set
- */
-static struct pxa3xx_nand_cmdset default_cmdset = {
-	.read1		= 0x3000,
-	.read2		= 0x0050,
-	.program	= 0x1080,
-	.read_status	= 0x0070,
-	.read_id	= 0x0090,
-	.erase		= 0xD060,
-	.reset		= 0x00FF,
-	.lock		= 0x002A,
-	.unlock		= 0x2423,
-	.lock_status	= 0x007A,
-};
-
 static struct pxa3xx_nand_timing timing[] = {
 	{ 40, 80, 60, 100, 80, 100, 90000, 400, 40, },
 	{ 10,  0, 20,  40, 30,  40, 11123, 110, 10, },
@@ -230,8 +234,6 @@
 /* Define a default flash type setting serve as flash detecting only */
 #define DEFAULT_FLASH_TYPE (&builtin_flash_types[0])
 
-const char *mtd_names[] = {"pxa3xx_nand-0", "pxa3xx_nand-1", NULL};
-
 #define NDTR0_tCH(c)	(min((c), 7) << 19)
 #define NDTR0_tCS(c)	(min((c), 7) << 16)
 #define NDTR0_tWH(c)	(min((c), 7) << 11)
@@ -264,8 +266,8 @@
 		NDTR1_tWHR(ns2cycle(t->tWHR, nand_clk)) |
 		NDTR1_tAR(ns2cycle(t->tAR, nand_clk));
 
-	host->ndtr0cs0 = ndtr0;
-	host->ndtr1cs0 = ndtr1;
+	info->ndtr0cs0 = ndtr0;
+	info->ndtr1cs0 = ndtr1;
 	nand_writel(info, NDTR0CS0, ndtr0);
 	nand_writel(info, NDTR1CS0, ndtr1);
 }
@@ -273,7 +275,7 @@
 static void pxa3xx_set_datasize(struct pxa3xx_nand_info *info)
 {
 	struct pxa3xx_nand_host *host = info->host[info->cs];
-	int oob_enable = host->reg_ndcr & NDCR_SPARE_EN;
+	int oob_enable = info->reg_ndcr & NDCR_SPARE_EN;
 
 	info->data_size = host->page_size;
 	if (!oob_enable) {
@@ -299,12 +301,25 @@
  */
 static void pxa3xx_nand_start(struct pxa3xx_nand_info *info)
 {
-	struct pxa3xx_nand_host *host = info->host[info->cs];
 	uint32_t ndcr;
 
-	ndcr = host->reg_ndcr;
-	ndcr |= info->use_ecc ? NDCR_ECC_EN : 0;
-	ndcr |= info->use_dma ? NDCR_DMA_EN : 0;
+	ndcr = info->reg_ndcr;
+
+	if (info->use_ecc)
+		ndcr |= NDCR_ECC_EN;
+	else
+		ndcr &= ~NDCR_ECC_EN;
+
+	if (info->use_dma)
+		ndcr |= NDCR_DMA_EN;
+	else
+		ndcr &= ~NDCR_DMA_EN;
+
+	if (info->use_spare)
+		ndcr |= NDCR_SPARE_EN;
+	else
+		ndcr &= ~NDCR_SPARE_EN;
+
 	ndcr |= NDCR_ND_RUN;
 
 	/* clear status bits and run */
@@ -333,7 +348,8 @@
 	nand_writel(info, NDSR, NDSR_MASK);
 }
 
-static void enable_int(struct pxa3xx_nand_info *info, uint32_t int_mask)
+static void __maybe_unused
+enable_int(struct pxa3xx_nand_info *info, uint32_t int_mask)
 {
 	uint32_t ndcr;
 
@@ -373,6 +389,7 @@
 	}
 }
 
+#ifdef ARCH_HAS_DMA
 static void start_data_dma(struct pxa3xx_nand_info *info)
 {
 	struct pxa_dma_desc *desc = info->data_desc;
@@ -419,6 +436,10 @@
 	enable_int(info, NDCR_INT_MASK);
 	nand_writel(info, NDSR, NDSR_WRDREQ | NDSR_RDDREQ);
 }
+#else
+static void start_data_dma(struct pxa3xx_nand_info *info)
+{}
+#endif
 
 static irqreturn_t pxa3xx_nand_irq(int irq, void *devid)
 {
@@ -467,9 +488,22 @@
 		nand_writel(info, NDSR, NDSR_WRCMDREQ);
 		status &= ~NDSR_WRCMDREQ;
 		info->state = STATE_CMD_HANDLE;
+
+		/*
+		 * Command buffer registers NDCB{0-2} (and optionally NDCB3)
+		 * must be loaded by writing directly either 12 or 16
+		 * bytes directly to NDCB0, four bytes at a time.
+		 *
+		 * Direct write access to NDCB1, NDCB2 and NDCB3 is ignored
+		 * but each NDCBx register can be read.
+		 */
 		nand_writel(info, NDCB0, info->ndcb0);
 		nand_writel(info, NDCB0, info->ndcb1);
 		nand_writel(info, NDCB0, info->ndcb2);
+
+		/* NDCB3 register is available in NFCv2 (Armada 370/XP SoC) */
+		if (info->variant == PXA3XX_NAND_VARIANT_ARMADA370)
+			nand_writel(info, NDCB0, info->ndcb3);
 	}
 
 	/* clear NDSR to let the controller exit the IRQ */
@@ -491,7 +525,6 @@
 static int prepare_command_pool(struct pxa3xx_nand_info *info, int command,
 		uint16_t column, int page_addr)
 {
-	uint16_t cmd;
 	int addr_cycle, exec_cmd;
 	struct pxa3xx_nand_host *host;
 	struct mtd_info *mtd;
@@ -506,6 +539,8 @@
 	info->buf_count		= 0;
 	info->oob_size		= 0;
 	info->use_ecc		= 0;
+	info->use_spare		= 1;
+	info->use_dma		= (use_dma) ? 1 : 0;
 	info->is_ready		= 0;
 	info->retcode		= ERR_NONE;
 	if (info->cs != 0)
@@ -520,12 +555,16 @@
 	case NAND_CMD_READOOB:
 		pxa3xx_set_datasize(info);
 		break;
+	case NAND_CMD_PARAM:
+		info->use_spare = 0;
+		break;
 	case NAND_CMD_SEQIN:
 		exec_cmd = 0;
 		break;
 	default:
 		info->ndcb1 = 0;
 		info->ndcb2 = 0;
+		info->ndcb3 = 0;
 		break;
 	}
 
@@ -535,21 +574,17 @@
 	switch (command) {
 	case NAND_CMD_READOOB:
 	case NAND_CMD_READ0:
-		cmd = host->cmdset->read1;
-		if (command == NAND_CMD_READOOB)
-			info->buf_start = mtd->writesize + column;
-		else
-			info->buf_start = column;
+		info->buf_start = column;
+		info->ndcb0 |= NDCB0_CMD_TYPE(0)
+				| addr_cycle
+				| NAND_CMD_READ0;
 
-		if (unlikely(host->page_size < PAGE_CHUNK_SIZE))
-			info->ndcb0 |= NDCB0_CMD_TYPE(0)
-					| addr_cycle
-					| (cmd & NDCB0_CMD1_MASK);
-		else
-			info->ndcb0 |= NDCB0_CMD_TYPE(0)
-					| NDCB0_DBC
-					| addr_cycle
-					| cmd;
+		if (command == NAND_CMD_READOOB)
+			info->buf_start += mtd->writesize;
+
+		/* Second command setting for large pages */
+		if (host->page_size >= PAGE_CHUNK_SIZE)
+			info->ndcb0 |= NDCB0_DBC | (NAND_CMD_READSTART << 8);
 
 	case NAND_CMD_SEQIN:
 		/* small page addr setting */
@@ -580,49 +615,58 @@
 			break;
 		}
 
-		cmd = host->cmdset->program;
 		info->ndcb0 |= NDCB0_CMD_TYPE(0x1)
 				| NDCB0_AUTO_RS
 				| NDCB0_ST_ROW_EN
 				| NDCB0_DBC
-				| cmd
+				| (NAND_CMD_PAGEPROG << 8)
+				| NAND_CMD_SEQIN
 				| addr_cycle;
 		break;
 
+	case NAND_CMD_PARAM:
+		info->buf_count = 256;
+		info->ndcb0 |= NDCB0_CMD_TYPE(0)
+				| NDCB0_ADDR_CYC(1)
+				| NDCB0_LEN_OVRD
+				| command;
+		info->ndcb1 = (column & 0xFF);
+		info->ndcb3 = 256;
+		info->data_size = 256;
+		break;
+
 	case NAND_CMD_READID:
-		cmd = host->cmdset->read_id;
 		info->buf_count = host->read_id_bytes;
 		info->ndcb0 |= NDCB0_CMD_TYPE(3)
 				| NDCB0_ADDR_CYC(1)
-				| cmd;
+				| command;
+		info->ndcb1 = (column & 0xFF);
 
 		info->data_size = 8;
 		break;
 	case NAND_CMD_STATUS:
-		cmd = host->cmdset->read_status;
 		info->buf_count = 1;
 		info->ndcb0 |= NDCB0_CMD_TYPE(4)
 				| NDCB0_ADDR_CYC(1)
-				| cmd;
+				| command;
 
 		info->data_size = 8;
 		break;
 
 	case NAND_CMD_ERASE1:
-		cmd = host->cmdset->erase;
 		info->ndcb0 |= NDCB0_CMD_TYPE(2)
 				| NDCB0_AUTO_RS
 				| NDCB0_ADDR_CYC(3)
 				| NDCB0_DBC
-				| cmd;
+				| (NAND_CMD_ERASE2 << 8)
+				| NAND_CMD_ERASE1;
 		info->ndcb1 = page_addr;
 		info->ndcb2 = 0;
 
 		break;
 	case NAND_CMD_RESET:
-		cmd = host->cmdset->reset;
 		info->ndcb0 |= NDCB0_CMD_TYPE(5)
-				| cmd;
+				| command;
 
 		break;
 
@@ -652,7 +696,7 @@
 	 * "byte" address into a "word" address appropriate
 	 * for indexing a word-oriented device
 	 */
-	if (host->reg_ndcr & NDCR_DWIDTH_M)
+	if (info->reg_ndcr & NDCR_DWIDTH_M)
 		column /= 2;
 
 	/*
@@ -662,8 +706,8 @@
 	 */
 	if (info->cs != host->cs) {
 		info->cs = host->cs;
-		nand_writel(info, NDTR0CS0, host->ndtr0cs0);
-		nand_writel(info, NDTR1CS0, host->ndtr1cs0);
+		nand_writel(info, NDTR0CS0, info->ndtr0cs0);
+		nand_writel(info, NDTR1CS0, info->ndtr1cs0);
 	}
 
 	info->state = STATE_PREPARED;
@@ -803,7 +847,7 @@
 				    const struct pxa3xx_nand_flash *f)
 {
 	struct platform_device *pdev = info->pdev;
-	struct pxa3xx_nand_platform_data *pdata = pdev->dev.platform_data;
+	struct pxa3xx_nand_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	struct pxa3xx_nand_host *host = info->host[info->cs];
 	uint32_t ndcr = 0x0; /* enable all interrupts */
 
@@ -818,7 +862,6 @@
 	}
 
 	/* calculate flash information */
-	host->cmdset = &default_cmdset;
 	host->page_size = f->page_size;
 	host->read_id_bytes = (f->page_size == 2048) ? 4 : 2;
 
@@ -840,7 +883,7 @@
 	ndcr |= NDCR_RD_ID_CNT(host->read_id_bytes);
 	ndcr |= NDCR_SPARE_EN; /* enable spare by default */
 
-	host->reg_ndcr = ndcr;
+	info->reg_ndcr = ndcr;
 
 	pxa3xx_nand_set_timing(host, f->timing);
 	return 0;
@@ -863,12 +906,9 @@
 		host->read_id_bytes = 2;
 	}
 
-	host->reg_ndcr = ndcr & ~NDCR_INT_MASK;
-	host->cmdset = &default_cmdset;
-
-	host->ndtr0cs0 = nand_readl(info, NDTR0CS0);
-	host->ndtr1cs0 = nand_readl(info, NDTR1CS0);
-
+	info->reg_ndcr = ndcr & ~NDCR_INT_MASK;
+	info->ndtr0cs0 = nand_readl(info, NDTR0CS0);
+	info->ndtr1cs0 = nand_readl(info, NDTR1CS0);
 	return 0;
 }
 
@@ -878,6 +918,7 @@
  */
 #define MAX_BUFF_SIZE	PAGE_SIZE
 
+#ifdef ARCH_HAS_DMA
 static int pxa3xx_nand_init_buff(struct pxa3xx_nand_info *info)
 {
 	struct platform_device *pdev = info->pdev;
@@ -912,6 +953,32 @@
 	return 0;
 }
 
+static void pxa3xx_nand_free_buff(struct pxa3xx_nand_info *info)
+{
+	struct platform_device *pdev = info->pdev;
+	if (use_dma) {
+		pxa_free_dma(info->data_dma_ch);
+		dma_free_coherent(&pdev->dev, MAX_BUFF_SIZE,
+				  info->data_buff, info->data_buff_phys);
+	} else {
+		kfree(info->data_buff);
+	}
+}
+#else
+static int pxa3xx_nand_init_buff(struct pxa3xx_nand_info *info)
+{
+	info->data_buff = kmalloc(MAX_BUFF_SIZE, GFP_KERNEL);
+	if (info->data_buff == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+static void pxa3xx_nand_free_buff(struct pxa3xx_nand_info *info)
+{
+	kfree(info->data_buff);
+}
+#endif
+
 static int pxa3xx_nand_sensing(struct pxa3xx_nand_info *info)
 {
 	struct mtd_info *mtd;
@@ -934,7 +1001,7 @@
 	struct pxa3xx_nand_host *host = mtd->priv;
 	struct pxa3xx_nand_info *info = host->info_data;
 	struct platform_device *pdev = info->pdev;
-	struct pxa3xx_nand_platform_data *pdata = pdev->dev.platform_data;
+	struct pxa3xx_nand_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	struct nand_flash_dev pxa3xx_flash_ids[2], *def = NULL;
 	const struct pxa3xx_nand_flash *f = NULL;
 	struct nand_chip *chip = mtd->priv;
@@ -1003,7 +1070,7 @@
 	chip->ecc.size = host->page_size;
 	chip->ecc.strength = 1;
 
-	if (host->reg_ndcr & NDCR_DWIDTH_M)
+	if (info->reg_ndcr & NDCR_DWIDTH_M)
 		chip->options |= NAND_BUSWIDTH_16;
 
 	if (nand_scan_ident(mtd, 1, def))
@@ -1019,8 +1086,6 @@
 		host->row_addr_cycles = 3;
 	else
 		host->row_addr_cycles = 2;
-
-	mtd->name = mtd_names[0];
 	return nand_scan_tail(mtd);
 }
 
@@ -1034,13 +1099,11 @@
 	struct resource *r;
 	int ret, irq, cs;
 
-	pdata = pdev->dev.platform_data;
-	info = kzalloc(sizeof(*info) + (sizeof(*mtd) +
-		       sizeof(*host)) * pdata->num_cs, GFP_KERNEL);
-	if (!info) {
-		dev_err(&pdev->dev, "failed to allocate memory\n");
+	pdata = dev_get_platdata(&pdev->dev);
+	info = devm_kzalloc(&pdev->dev, sizeof(*info) + (sizeof(*mtd) +
+			    sizeof(*host)) * pdata->num_cs, GFP_KERNEL);
+	if (!info)
 		return -ENOMEM;
-	}
 
 	info->pdev = pdev;
 	for (cs = 0; cs < pdata->num_cs; cs++) {
@@ -1069,72 +1132,64 @@
 
 	spin_lock_init(&chip->controller->lock);
 	init_waitqueue_head(&chip->controller->wq);
-	info->clk = clk_get(&pdev->dev, NULL);
+	info->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(info->clk)) {
 		dev_err(&pdev->dev, "failed to get nand clock\n");
-		ret = PTR_ERR(info->clk);
-		goto fail_free_mtd;
+		return PTR_ERR(info->clk);
 	}
-	clk_enable(info->clk);
+	ret = clk_prepare_enable(info->clk);
+	if (ret < 0)
+		return ret;
 
-	/*
-	 * This is a dirty hack to make this driver work from devicetree
-	 * bindings. It can be removed once we have a prober DMA controller
-	 * framework for DT.
-	 */
-	if (pdev->dev.of_node && cpu_is_pxa3xx()) {
-		info->drcmr_dat = 97;
-		info->drcmr_cmd = 99;
-	} else {
-		r = platform_get_resource(pdev, IORESOURCE_DMA, 0);
-		if (r == NULL) {
-			dev_err(&pdev->dev, "no resource defined for data DMA\n");
-			ret = -ENXIO;
-			goto fail_put_clk;
-		}
-		info->drcmr_dat = r->start;
+	if (use_dma) {
+		/*
+		 * This is a dirty hack to make this driver work from
+		 * devicetree bindings. It can be removed once we have
+		 * a prober DMA controller framework for DT.
+		 */
+		if (pdev->dev.of_node &&
+		    of_machine_is_compatible("marvell,pxa3xx")) {
+			info->drcmr_dat = 97;
+			info->drcmr_cmd = 99;
+		} else {
+			r = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+			if (r == NULL) {
+				dev_err(&pdev->dev,
+					"no resource defined for data DMA\n");
+				ret = -ENXIO;
+				goto fail_disable_clk;
+			}
+			info->drcmr_dat = r->start;
 
-		r = platform_get_resource(pdev, IORESOURCE_DMA, 1);
-		if (r == NULL) {
-			dev_err(&pdev->dev, "no resource defined for command DMA\n");
-			ret = -ENXIO;
-			goto fail_put_clk;
+			r = platform_get_resource(pdev, IORESOURCE_DMA, 1);
+			if (r == NULL) {
+				dev_err(&pdev->dev,
+					"no resource defined for cmd DMA\n");
+				ret = -ENXIO;
+				goto fail_disable_clk;
+			}
+			info->drcmr_cmd = r->start;
 		}
-		info->drcmr_cmd = r->start;
 	}
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
 		dev_err(&pdev->dev, "no IRQ resource defined\n");
 		ret = -ENXIO;
-		goto fail_put_clk;
+		goto fail_disable_clk;
 	}
 
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (r == NULL) {
-		dev_err(&pdev->dev, "no IO memory resource defined\n");
-		ret = -ENODEV;
-		goto fail_put_clk;
-	}
-
-	r = request_mem_region(r->start, resource_size(r), pdev->name);
-	if (r == NULL) {
-		dev_err(&pdev->dev, "failed to request memory resource\n");
-		ret = -EBUSY;
-		goto fail_put_clk;
-	}
-
-	info->mmio_base = ioremap(r->start, resource_size(r));
-	if (info->mmio_base == NULL) {
-		dev_err(&pdev->dev, "ioremap() failed\n");
-		ret = -ENODEV;
-		goto fail_free_res;
+	info->mmio_base = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(info->mmio_base)) {
+		ret = PTR_ERR(info->mmio_base);
+		goto fail_disable_clk;
 	}
 	info->mmio_phys = r->start;
 
 	ret = pxa3xx_nand_init_buff(info);
 	if (ret)
-		goto fail_free_io;
+		goto fail_disable_clk;
 
 	/* initialize all interrupts to be disabled */
 	disable_int(info, NDSR_MASK);
@@ -1152,21 +1207,9 @@
 
 fail_free_buf:
 	free_irq(irq, info);
-	if (use_dma) {
-		pxa_free_dma(info->data_dma_ch);
-		dma_free_coherent(&pdev->dev, MAX_BUFF_SIZE,
-			info->data_buff, info->data_buff_phys);
-	} else
-		kfree(info->data_buff);
-fail_free_io:
-	iounmap(info->mmio_base);
-fail_free_res:
-	release_mem_region(r->start, resource_size(r));
-fail_put_clk:
-	clk_disable(info->clk);
-	clk_put(info->clk);
-fail_free_mtd:
-	kfree(info);
+	pxa3xx_nand_free_buff(info);
+fail_disable_clk:
+	clk_disable_unprepare(info->clk);
 	return ret;
 }
 
@@ -1174,44 +1217,48 @@
 {
 	struct pxa3xx_nand_info *info = platform_get_drvdata(pdev);
 	struct pxa3xx_nand_platform_data *pdata;
-	struct resource *r;
 	int irq, cs;
 
 	if (!info)
 		return 0;
 
-	pdata = pdev->dev.platform_data;
-	platform_set_drvdata(pdev, NULL);
+	pdata = dev_get_platdata(&pdev->dev);
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq >= 0)
 		free_irq(irq, info);
-	if (use_dma) {
-		pxa_free_dma(info->data_dma_ch);
-		dma_free_writecombine(&pdev->dev, MAX_BUFF_SIZE,
-				info->data_buff, info->data_buff_phys);
-	} else
-		kfree(info->data_buff);
+	pxa3xx_nand_free_buff(info);
 
-	iounmap(info->mmio_base);
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	release_mem_region(r->start, resource_size(r));
-
-	clk_disable(info->clk);
-	clk_put(info->clk);
+	clk_disable_unprepare(info->clk);
 
 	for (cs = 0; cs < pdata->num_cs; cs++)
 		nand_release(info->host[cs]->mtd);
-	kfree(info);
 	return 0;
 }
 
 #ifdef CONFIG_OF
 static struct of_device_id pxa3xx_nand_dt_ids[] = {
-	{ .compatible = "marvell,pxa3xx-nand" },
+	{
+		.compatible = "marvell,pxa3xx-nand",
+		.data       = (void *)PXA3XX_NAND_VARIANT_PXA,
+	},
+	{
+		.compatible = "marvell,armada370-nand",
+		.data       = (void *)PXA3XX_NAND_VARIANT_ARMADA370,
+	},
 	{}
 };
-MODULE_DEVICE_TABLE(of, i2c_pxa_dt_ids);
+MODULE_DEVICE_TABLE(of, pxa3xx_nand_dt_ids);
+
+static enum pxa3xx_nand_variant
+pxa3xx_nand_get_variant(struct platform_device *pdev)
+{
+	const struct of_device_id *of_id =
+			of_match_device(pxa3xx_nand_dt_ids, &pdev->dev);
+	if (!of_id)
+		return PXA3XX_NAND_VARIANT_PXA;
+	return (enum pxa3xx_nand_variant)of_id->data;
+}
 
 static int pxa3xx_nand_probe_dt(struct platform_device *pdev)
 {
@@ -1251,11 +1298,18 @@
 	struct pxa3xx_nand_info *info;
 	int ret, cs, probe_success;
 
+#ifndef ARCH_HAS_DMA
+	if (use_dma) {
+		use_dma = 0;
+		dev_warn(&pdev->dev,
+			 "This platform can't do DMA on this device\n");
+	}
+#endif
 	ret = pxa3xx_nand_probe_dt(pdev);
 	if (ret)
 		return ret;
 
-	pdata = pdev->dev.platform_data;
+	pdata = dev_get_platdata(&pdev->dev);
 	if (!pdata) {
 		dev_err(&pdev->dev, "no platform data defined\n");
 		return -ENODEV;
@@ -1268,10 +1322,14 @@
 	}
 
 	info = platform_get_drvdata(pdev);
+	info->variant = pxa3xx_nand_get_variant(pdev);
 	probe_success = 0;
 	for (cs = 0; cs < pdata->num_cs; cs++) {
+		struct mtd_info *mtd = info->host[cs]->mtd;
+
+		mtd->name = pdev->name;
 		info->cs = cs;
-		ret = pxa3xx_nand_scan(info->host[cs]->mtd);
+		ret = pxa3xx_nand_scan(mtd);
 		if (ret) {
 			dev_warn(&pdev->dev, "failed to scan nand at cs %d\n",
 				cs);
@@ -1279,7 +1337,7 @@
 		}
 
 		ppdata.of_node = pdev->dev.of_node;
-		ret = mtd_device_parse_register(info->host[cs]->mtd, NULL,
+		ret = mtd_device_parse_register(mtd, NULL,
 						&ppdata, pdata->parts[cs],
 						pdata->nr_parts[cs]);
 		if (!ret)
@@ -1302,7 +1360,7 @@
 	struct mtd_info *mtd;
 	int cs;
 
-	pdata = pdev->dev.platform_data;
+	pdata = dev_get_platdata(&pdev->dev);
 	if (info->state) {
 		dev_err(&pdev->dev, "driver busy, state = %d\n", info->state);
 		return -EAGAIN;
@@ -1323,7 +1381,7 @@
 	struct mtd_info *mtd;
 	int cs;
 
-	pdata = pdev->dev.platform_data;
+	pdata = dev_get_platdata(&pdev->dev);
 	/* We don't want to handle interrupt without calling mtd routine */
 	disable_int(info, NDCR_INT_MASK);
 
diff --git a/drivers/mtd/nand/r852.c b/drivers/mtd/nand/r852.c
index 4495f85..9dcf02d 100644
--- a/drivers/mtd/nand/r852.c
+++ b/drivers/mtd/nand/r852.c
@@ -229,7 +229,7 @@
 /*
  * Program data lines of the nand chip to send data to it
  */
-void r852_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
+static void r852_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
 {
 	struct r852_device *dev = r852_get_dev(mtd);
 	uint32_t reg;
@@ -261,7 +261,7 @@
 /*
  * Read data lines of the nand chip to retrieve data
  */
-void r852_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+static void r852_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
 {
 	struct r852_device *dev = r852_get_dev(mtd);
 	uint32_t reg;
@@ -312,7 +312,7 @@
 /*
  * Control several chip lines & send commands
  */
-void r852_cmdctl(struct mtd_info *mtd, int dat, unsigned int ctrl)
+static void r852_cmdctl(struct mtd_info *mtd, int dat, unsigned int ctrl)
 {
 	struct r852_device *dev = r852_get_dev(mtd);
 
@@ -357,7 +357,7 @@
  * Wait till card is ready.
  * based on nand_wait, but returns errors on DMA error
  */
-int r852_wait(struct mtd_info *mtd, struct nand_chip *chip)
+static int r852_wait(struct mtd_info *mtd, struct nand_chip *chip)
 {
 	struct r852_device *dev = chip->priv;
 
@@ -386,7 +386,7 @@
  * Check if card is ready
  */
 
-int r852_ready(struct mtd_info *mtd)
+static int r852_ready(struct mtd_info *mtd)
 {
 	struct r852_device *dev = r852_get_dev(mtd);
 	return !(r852_read_reg(dev, R852_CARD_STA) & R852_CARD_STA_BUSY);
@@ -397,7 +397,7 @@
  * Set ECC engine mode
 */
 
-void r852_ecc_hwctl(struct mtd_info *mtd, int mode)
+static void r852_ecc_hwctl(struct mtd_info *mtd, int mode)
 {
 	struct r852_device *dev = r852_get_dev(mtd);
 
@@ -429,7 +429,7 @@
  * Calculate ECC, only used for writes
  */
 
-int r852_ecc_calculate(struct mtd_info *mtd, const uint8_t *dat,
+static int r852_ecc_calculate(struct mtd_info *mtd, const uint8_t *dat,
 							uint8_t *ecc_code)
 {
 	struct r852_device *dev = r852_get_dev(mtd);
@@ -461,7 +461,7 @@
  * Correct the data using ECC, hw did almost everything for us
  */
 
-int r852_ecc_correct(struct mtd_info *mtd, uint8_t *dat,
+static int r852_ecc_correct(struct mtd_info *mtd, uint8_t *dat,
 				uint8_t *read_ecc, uint8_t *calc_ecc)
 {
 	uint16_t ecc_reg;
@@ -529,7 +529,7 @@
  * Start the nand engine
  */
 
-void r852_engine_enable(struct r852_device *dev)
+static void r852_engine_enable(struct r852_device *dev)
 {
 	if (r852_read_reg_dword(dev, R852_HW) & R852_HW_UNKNOWN) {
 		r852_write_reg(dev, R852_CTL, R852_CTL_RESET | R852_CTL_ON);
@@ -547,7 +547,7 @@
  * Stop the nand engine
  */
 
-void r852_engine_disable(struct r852_device *dev)
+static void r852_engine_disable(struct r852_device *dev)
 {
 	r852_write_reg_dword(dev, R852_HW, 0);
 	r852_write_reg(dev, R852_CTL, R852_CTL_RESET);
@@ -557,7 +557,7 @@
  * Test if card is present
  */
 
-void r852_card_update_present(struct r852_device *dev)
+static void r852_card_update_present(struct r852_device *dev)
 {
 	unsigned long flags;
 	uint8_t reg;
@@ -572,7 +572,7 @@
  * Update card detection IRQ state according to current card state
  * which is read in r852_card_update_present
  */
-void r852_update_card_detect(struct r852_device *dev)
+static void r852_update_card_detect(struct r852_device *dev)
 {
 	int card_detect_reg = r852_read_reg(dev, R852_CARD_IRQ_ENABLE);
 	dev->card_unstable = 0;
@@ -586,8 +586,8 @@
 	r852_write_reg(dev, R852_CARD_IRQ_ENABLE, card_detect_reg);
 }
 
-ssize_t r852_media_type_show(struct device *sys_dev,
-		struct device_attribute *attr, char *buf)
+static ssize_t r852_media_type_show(struct device *sys_dev,
+			struct device_attribute *attr, char *buf)
 {
 	struct mtd_info *mtd = container_of(sys_dev, struct mtd_info, dev);
 	struct r852_device *dev = r852_get_dev(mtd);
@@ -597,11 +597,11 @@
 	return strlen(data);
 }
 
-DEVICE_ATTR(media_type, S_IRUGO, r852_media_type_show, NULL);
+static DEVICE_ATTR(media_type, S_IRUGO, r852_media_type_show, NULL);
 
 
 /* Detect properties of card in slot */
-void r852_update_media_status(struct r852_device *dev)
+static void r852_update_media_status(struct r852_device *dev)
 {
 	uint8_t reg;
 	unsigned long flags;
@@ -630,7 +630,7 @@
  * Register the nand device
  * Called when the card is detected
  */
-int r852_register_nand_device(struct r852_device *dev)
+static int r852_register_nand_device(struct r852_device *dev)
 {
 	dev->mtd = kzalloc(sizeof(struct mtd_info), GFP_KERNEL);
 
@@ -668,7 +668,7 @@
  * Unregister the card
  */
 
-void r852_unregister_nand_device(struct r852_device *dev)
+static void r852_unregister_nand_device(struct r852_device *dev)
 {
 	if (!dev->card_registred)
 		return;
@@ -682,7 +682,7 @@
 }
 
 /* Card state updater */
-void r852_card_detect_work(struct work_struct *work)
+static void r852_card_detect_work(struct work_struct *work)
 {
 	struct r852_device *dev =
 		container_of(work, struct r852_device, card_detect_work.work);
@@ -821,7 +821,7 @@
 	return ret;
 }
 
-int  r852_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
+static int  r852_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
 {
 	int error;
 	struct nand_chip *chip;
@@ -961,7 +961,7 @@
 	return error;
 }
 
-void r852_remove(struct pci_dev *pci_dev)
+static void r852_remove(struct pci_dev *pci_dev)
 {
 	struct r852_device *dev = pci_get_drvdata(pci_dev);
 
@@ -992,7 +992,7 @@
 	pci_disable_device(pci_dev);
 }
 
-void r852_shutdown(struct pci_dev *pci_dev)
+static void r852_shutdown(struct pci_dev *pci_dev)
 {
 	struct r852_device *dev = pci_get_drvdata(pci_dev);
 
@@ -1002,7 +1002,7 @@
 	pci_disable_device(pci_dev);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int r852_suspend(struct device *device)
 {
 	struct r852_device *dev = pci_get_drvdata(to_pci_dev(device));
@@ -1055,9 +1055,6 @@
 	r852_update_card_detect(dev);
 	return 0;
 }
-#else
-#define r852_suspend	NULL
-#define r852_resume	NULL
 #endif
 
 static const struct pci_device_id r852_pci_id_tbl[] = {
diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c
index d65afd2..d65cbe9 100644
--- a/drivers/mtd/nand/s3c2410.c
+++ b/drivers/mtd/nand/s3c2410.c
@@ -150,7 +150,7 @@
 
 static struct s3c2410_platform_nand *to_nand_plat(struct platform_device *dev)
 {
-	return dev->dev.platform_data;
+	return dev_get_platdata(&dev->dev);
 }
 
 static inline int allow_clk_suspend(struct s3c2410_nand_info *info)
@@ -697,8 +697,6 @@
 {
 	struct s3c2410_nand_info *info = to_nand_info(pdev);
 
-	platform_set_drvdata(pdev, NULL);
-
 	if (info == NULL)
 		return 0;
 
diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
index e57e18e..a3c84eb 100644
--- a/drivers/mtd/nand/sh_flctl.c
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -137,7 +137,7 @@
 	dma_cap_mask_t mask;
 	struct dma_slave_config cfg;
 	struct platform_device *pdev = flctl->pdev;
-	struct sh_flctl_platform_data *pdata = pdev->dev.platform_data;
+	struct sh_flctl_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	int ret;
 
 	if (!pdata)
@@ -1131,7 +1131,7 @@
 	if (pdev->dev.of_node)
 		pdata = flctl_parse_dt(&pdev->dev);
 	else
-		pdata = pdev->dev.platform_data;
+		pdata = dev_get_platdata(&pdev->dev);
 
 	if (!pdata) {
 		dev_err(&pdev->dev, "no setup data defined\n");
diff --git a/drivers/mtd/nand/sharpsl.c b/drivers/mtd/nand/sharpsl.c
index 127bc42..87908d7 100644
--- a/drivers/mtd/nand/sharpsl.c
+++ b/drivers/mtd/nand/sharpsl.c
@@ -112,7 +112,7 @@
 	struct resource *r;
 	int err = 0;
 	struct sharpsl_nand *sharpsl;
-	struct sharpsl_nand_platform_data *data = pdev->dev.platform_data;
+	struct sharpsl_nand_platform_data *data = dev_get_platdata(&pdev->dev);
 
 	if (!data) {
 		dev_err(&pdev->dev, "no platform data!\n");
@@ -194,7 +194,6 @@
 	nand_release(&sharpsl->mtd);
 
 err_scan:
-	platform_set_drvdata(pdev, NULL);
 	iounmap(sharpsl->io);
 err_ioremap:
 err_get_res:
@@ -212,8 +211,6 @@
 	/* Release resources, unregister device */
 	nand_release(&sharpsl->mtd);
 
-	platform_set_drvdata(pdev, NULL);
-
 	iounmap(sharpsl->io);
 
 	/* Free the MTD device structure */
diff --git a/drivers/mtd/nand/sm_common.c b/drivers/mtd/nand/sm_common.c
index e8181ed..e06b5e5 100644
--- a/drivers/mtd/nand/sm_common.c
+++ b/drivers/mtd/nand/sm_common.c
@@ -42,7 +42,7 @@
 {
 	struct mtd_oob_ops ops;
 	struct sm_oob oob;
-	int ret, error = 0;
+	int ret;
 
 	memset(&oob, -1, SM_OOB_SIZE);
 	oob.block_status = 0x0F;
@@ -61,11 +61,10 @@
 		printk(KERN_NOTICE
 			"sm_common: can't mark sector at %i as bad\n",
 								(int)ofs);
-		error = -EIO;
-	} else
-		mtd->ecc_stats.badblocks++;
+		return -EIO;
+	}
 
-	return error;
+	return 0;
 }
 
 static struct nand_flash_dev nand_smartmedia_flash_ids[] = {
diff --git a/drivers/mtd/nand/tmio_nand.c b/drivers/mtd/nand/tmio_nand.c
index 508e9e0..396530d 100644
--- a/drivers/mtd/nand/tmio_nand.c
+++ b/drivers/mtd/nand/tmio_nand.c
@@ -357,7 +357,7 @@
 
 static int tmio_probe(struct platform_device *dev)
 {
-	struct tmio_nand_data *data = dev->dev.platform_data;
+	struct tmio_nand_data *data = dev_get_platdata(&dev->dev);
 	struct resource *fcr = platform_get_resource(dev,
 			IORESOURCE_MEM, 0);
 	struct resource *ccr = platform_get_resource(dev,
diff --git a/drivers/mtd/nand/txx9ndfmc.c b/drivers/mtd/nand/txx9ndfmc.c
index 7ed654c..235714a 100644
--- a/drivers/mtd/nand/txx9ndfmc.c
+++ b/drivers/mtd/nand/txx9ndfmc.c
@@ -87,7 +87,7 @@
 static void __iomem *ndregaddr(struct platform_device *dev, unsigned int reg)
 {
 	struct txx9ndfmc_drvdata *drvdata = platform_get_drvdata(dev);
-	struct txx9ndfmc_platform_data *plat = dev->dev.platform_data;
+	struct txx9ndfmc_platform_data *plat = dev_get_platdata(&dev->dev);
 
 	return drvdata->base + (reg << plat->shift);
 }
@@ -138,7 +138,7 @@
 	struct nand_chip *chip = mtd->priv;
 	struct txx9ndfmc_priv *txx9_priv = chip->priv;
 	struct platform_device *dev = txx9_priv->dev;
-	struct txx9ndfmc_platform_data *plat = dev->dev.platform_data;
+	struct txx9ndfmc_platform_data *plat = dev_get_platdata(&dev->dev);
 
 	if (ctrl & NAND_CTRL_CHANGE) {
 		u32 mcr = txx9ndfmc_read(dev, TXX9_NDFMCR);
@@ -225,7 +225,7 @@
 
 static void txx9ndfmc_initialize(struct platform_device *dev)
 {
-	struct txx9ndfmc_platform_data *plat = dev->dev.platform_data;
+	struct txx9ndfmc_platform_data *plat = dev_get_platdata(&dev->dev);
 	struct txx9ndfmc_drvdata *drvdata = platform_get_drvdata(dev);
 	int tmout = 100;
 
@@ -274,19 +274,17 @@
 
 static int __init txx9ndfmc_probe(struct platform_device *dev)
 {
-	struct txx9ndfmc_platform_data *plat = dev->dev.platform_data;
+	struct txx9ndfmc_platform_data *plat = dev_get_platdata(&dev->dev);
 	int hold, spw;
 	int i;
 	struct txx9ndfmc_drvdata *drvdata;
 	unsigned long gbusclk = plat->gbus_clock;
 	struct resource *res;
 
-	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -ENODEV;
 	drvdata = devm_kzalloc(&dev->dev, sizeof(*drvdata), GFP_KERNEL);
 	if (!drvdata)
 		return -ENOMEM;
+	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
 	drvdata->base = devm_ioremap_resource(&dev->dev, res);
 	if (IS_ERR(drvdata->base))
 		return PTR_ERR(drvdata->base);
@@ -387,7 +385,6 @@
 	struct txx9ndfmc_drvdata *drvdata = platform_get_drvdata(dev);
 	int i;
 
-	platform_set_drvdata(dev, NULL);
 	if (!drvdata)
 		return 0;
 	for (i = 0; i < MAX_TXX9NDFMC_DEV; i++) {
diff --git a/drivers/mtd/ofpart.c b/drivers/mtd/ofpart.c
index 553d6d6..d64f8c3 100644
--- a/drivers/mtd/ofpart.c
+++ b/drivers/mtd/ofpart.c
@@ -20,6 +20,11 @@
 #include <linux/slab.h>
 #include <linux/mtd/partitions.h>
 
+static bool node_has_compatible(struct device_node *pp)
+{
+	return of_get_property(pp, "compatible", NULL);
+}
+
 static int parse_ofpart_partitions(struct mtd_info *master,
 				   struct mtd_partition **pparts,
 				   struct mtd_part_parser_data *data)
@@ -38,10 +43,13 @@
 		return 0;
 
 	/* First count the subnodes */
-	pp = NULL;
 	nr_parts = 0;
-	while ((pp = of_get_next_child(node, pp)))
+	for_each_child_of_node(node,  pp) {
+		if (node_has_compatible(pp))
+			continue;
+
 		nr_parts++;
+	}
 
 	if (nr_parts == 0)
 		return 0;
@@ -50,13 +58,15 @@
 	if (!*pparts)
 		return -ENOMEM;
 
-	pp = NULL;
 	i = 0;
-	while ((pp = of_get_next_child(node, pp))) {
+	for_each_child_of_node(node,  pp) {
 		const __be32 *reg;
 		int len;
 		int a_cells, s_cells;
 
+		if (node_has_compatible(pp))
+			continue;
+
 		reg = of_get_property(pp, "reg", &len);
 		if (!reg) {
 			nr_parts--;
diff --git a/drivers/mtd/onenand/generic.c b/drivers/mtd/onenand/generic.c
index 9f11562..63699ff 100644
--- a/drivers/mtd/onenand/generic.c
+++ b/drivers/mtd/onenand/generic.c
@@ -38,7 +38,7 @@
 static int generic_onenand_probe(struct platform_device *pdev)
 {
 	struct onenand_info *info;
-	struct onenand_platform_data *pdata = pdev->dev.platform_data;
+	struct onenand_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	struct resource *res = pdev->resource;
 	unsigned long size = resource_size(res);
 	int err;
@@ -94,8 +94,6 @@
 	struct resource *res = pdev->resource;
 	unsigned long size = resource_size(res);
 
-	platform_set_drvdata(pdev, NULL);
-
 	if (info) {
 		onenand_release(&info->mtd);
 		release_mem_region(res->start, size);
diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c
index d98b198..558071b 100644
--- a/drivers/mtd/onenand/omap2.c
+++ b/drivers/mtd/onenand/omap2.c
@@ -639,7 +639,7 @@
 	struct resource *res;
 	struct mtd_part_parser_data ppdata = {};
 
-	pdata = pdev->dev.platform_data;
+	pdata = dev_get_platdata(&pdev->dev);
 	if (pdata == NULL) {
 		dev_err(&pdev->dev, "platform data missing\n");
 		return -ENODEV;
@@ -810,7 +810,6 @@
 	if (c->dma_channel != -1)
 		omap_free_dma(c->dma_channel);
 	omap2_onenand_shutdown(pdev);
-	platform_set_drvdata(pdev, NULL);
 	if (c->gpio_irq) {
 		free_irq(gpio_to_irq(c->gpio_irq), c);
 		gpio_free(c->gpio_irq);
diff --git a/drivers/mtd/onenand/onenand_bbt.c b/drivers/mtd/onenand/onenand_bbt.c
index 66fe3b7..08d0085 100644
--- a/drivers/mtd/onenand/onenand_bbt.c
+++ b/drivers/mtd/onenand/onenand_bbt.c
@@ -133,7 +133,6 @@
 {
 	struct onenand_chip *this = mtd->priv;
 
-        bd->options &= ~NAND_BBT_SCANEMPTY;
 	return create_bbt(mtd, this->page_buf, bd, -1);
 }
 
diff --git a/drivers/mtd/onenand/samsung.c b/drivers/mtd/onenand/samsung.c
index 2cf7408..df7400d 100644
--- a/drivers/mtd/onenand/samsung.c
+++ b/drivers/mtd/onenand/samsung.c
@@ -867,7 +867,7 @@
 	struct resource *r;
 	int size, err;
 
-	pdata = pdev->dev.platform_data;
+	pdata = dev_get_platdata(&pdev->dev);
 	/* No need to check pdata. the platform data is optional */
 
 	size = sizeof(struct mtd_info) + sizeof(struct onenand_chip);
@@ -1073,7 +1073,6 @@
 	release_mem_region(onenand->base_res->start,
 			   resource_size(onenand->base_res));
 
-	platform_set_drvdata(pdev, NULL);
 	kfree(onenand->oob_buf);
 	kfree(onenand->page_buf);
 	kfree(onenand);
diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c
index f9d5615..4b8e895 100644
--- a/drivers/mtd/sm_ftl.c
+++ b/drivers/mtd/sm_ftl.c
@@ -22,7 +22,7 @@
 
 
 
-struct workqueue_struct *cache_flush_workqueue;
+static struct workqueue_struct *cache_flush_workqueue;
 
 static int cache_timeout = 1000;
 module_param(cache_timeout, int, S_IRUGO);
@@ -41,7 +41,7 @@
 	int len;
 };
 
-ssize_t sm_attr_show(struct device *dev, struct device_attribute *attr,
+static ssize_t sm_attr_show(struct device *dev, struct device_attribute *attr,
 		     char *buf)
 {
 	struct sm_sysfs_attribute *sm_attr =
@@ -54,7 +54,7 @@
 
 #define NUM_ATTRIBUTES 1
 #define SM_CIS_VENDOR_OFFSET 0x59
-struct attribute_group *sm_create_sysfs_attributes(struct sm_ftl *ftl)
+static struct attribute_group *sm_create_sysfs_attributes(struct sm_ftl *ftl)
 {
 	struct attribute_group *attr_group;
 	struct attribute **attributes;
@@ -107,7 +107,7 @@
 	return NULL;
 }
 
-void sm_delete_sysfs_attributes(struct sm_ftl *ftl)
+static void sm_delete_sysfs_attributes(struct sm_ftl *ftl)
 {
 	struct attribute **attributes = ftl->disk_attributes->attrs;
 	int i;
@@ -571,7 +571,7 @@
 };
 /* Find out media parameters.
  * This ideally has to be based on nand id, but for now device size is enough */
-int sm_get_media_info(struct sm_ftl *ftl, struct mtd_info *mtd)
+static int sm_get_media_info(struct sm_ftl *ftl, struct mtd_info *mtd)
 {
 	int i;
 	int size_in_megs = mtd->size / (1024 * 1024);
@@ -878,7 +878,7 @@
 }
 
 /* Get and automatically initialize an FTL mapping for one zone */
-struct ftl_zone *sm_get_zone(struct sm_ftl *ftl, int zone_num)
+static struct ftl_zone *sm_get_zone(struct sm_ftl *ftl, int zone_num)
 {
 	struct ftl_zone *zone;
 	int error;
@@ -899,7 +899,7 @@
 /* ----------------- cache handling ------------------------------------------*/
 
 /* Initialize the one block cache */
-void sm_cache_init(struct sm_ftl *ftl)
+static void sm_cache_init(struct sm_ftl *ftl)
 {
 	ftl->cache_data_invalid_bitmap = 0xFFFFFFFF;
 	ftl->cache_clean = 1;
@@ -909,7 +909,7 @@
 }
 
 /* Put sector in one block cache */
-void sm_cache_put(struct sm_ftl *ftl, char *buffer, int boffset)
+static void sm_cache_put(struct sm_ftl *ftl, char *buffer, int boffset)
 {
 	memcpy(ftl->cache_data + boffset, buffer, SM_SECTOR_SIZE);
 	clear_bit(boffset / SM_SECTOR_SIZE, &ftl->cache_data_invalid_bitmap);
@@ -917,7 +917,7 @@
 }
 
 /* Read a sector from the cache */
-int sm_cache_get(struct sm_ftl *ftl, char *buffer, int boffset)
+static int sm_cache_get(struct sm_ftl *ftl, char *buffer, int boffset)
 {
 	if (test_bit(boffset / SM_SECTOR_SIZE,
 		&ftl->cache_data_invalid_bitmap))
@@ -928,7 +928,7 @@
 }
 
 /* Write the cache to hardware */
-int sm_cache_flush(struct sm_ftl *ftl)
+static int sm_cache_flush(struct sm_ftl *ftl)
 {
 	struct ftl_zone *zone;
 
@@ -1274,10 +1274,10 @@
 static __init int sm_module_init(void)
 {
 	int error = 0;
-	cache_flush_workqueue = create_freezable_workqueue("smflush");
 
-	if (IS_ERR(cache_flush_workqueue))
-		return PTR_ERR(cache_flush_workqueue);
+	cache_flush_workqueue = create_freezable_workqueue("smflush");
+	if (!cache_flush_workqueue)
+		return -ENOMEM;
 
 	error = register_mtd_blktrans(&sm_ftl_ops);
 	if (error)
diff --git a/drivers/mtd/tests/Makefile b/drivers/mtd/tests/Makefile
index bd0065c0..937a829 100644
--- a/drivers/mtd/tests/Makefile
+++ b/drivers/mtd/tests/Makefile
@@ -7,3 +7,12 @@
 obj-$(CONFIG_MTD_TESTS) += mtd_torturetest.o
 obj-$(CONFIG_MTD_TESTS) += mtd_nandecctest.o
 obj-$(CONFIG_MTD_TESTS) += mtd_nandbiterrs.o
+
+mtd_oobtest-objs := oobtest.o mtd_test.o
+mtd_pagetest-objs := pagetest.o mtd_test.o
+mtd_readtest-objs := readtest.o mtd_test.o
+mtd_speedtest-objs := speedtest.o mtd_test.o
+mtd_stresstest-objs := stresstest.o mtd_test.o
+mtd_subpagetest-objs := subpagetest.o mtd_test.o
+mtd_torturetest-objs := torturetest.o mtd_test.o
+mtd_nandbiterrs-objs := nandbiterrs.o mtd_test.o
diff --git a/drivers/mtd/tests/mtd_test.c b/drivers/mtd/tests/mtd_test.c
new file mode 100644
index 0000000..c818a63
--- /dev/null
+++ b/drivers/mtd/tests/mtd_test.c
@@ -0,0 +1,114 @@
+#define pr_fmt(fmt) "mtd_test: " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/printk.h>
+
+#include "mtd_test.h"
+
+int mtdtest_erase_eraseblock(struct mtd_info *mtd, unsigned int ebnum)
+{
+	int err;
+	struct erase_info ei;
+	loff_t addr = ebnum * mtd->erasesize;
+
+	memset(&ei, 0, sizeof(struct erase_info));
+	ei.mtd  = mtd;
+	ei.addr = addr;
+	ei.len  = mtd->erasesize;
+
+	err = mtd_erase(mtd, &ei);
+	if (err) {
+		pr_info("error %d while erasing EB %d\n", err, ebnum);
+		return err;
+	}
+
+	if (ei.state == MTD_ERASE_FAILED) {
+		pr_info("some erase error occurred at EB %d\n", ebnum);
+		return -EIO;
+	}
+	return 0;
+}
+
+static int is_block_bad(struct mtd_info *mtd, unsigned int ebnum)
+{
+	int ret;
+	loff_t addr = ebnum * mtd->erasesize;
+
+	ret = mtd_block_isbad(mtd, addr);
+	if (ret)
+		pr_info("block %d is bad\n", ebnum);
+
+	return ret;
+}
+
+int mtdtest_scan_for_bad_eraseblocks(struct mtd_info *mtd, unsigned char *bbt,
+					unsigned int eb, int ebcnt)
+{
+	int i, bad = 0;
+
+	if (!mtd_can_have_bb(mtd))
+		return 0;
+
+	pr_info("scanning for bad eraseblocks\n");
+	for (i = 0; i < ebcnt; ++i) {
+		bbt[i] = is_block_bad(mtd, eb + i) ? 1 : 0;
+		if (bbt[i])
+			bad += 1;
+		cond_resched();
+	}
+	pr_info("scanned %d eraseblocks, %d are bad\n", i, bad);
+
+	return 0;
+}
+
+int mtdtest_erase_good_eraseblocks(struct mtd_info *mtd, unsigned char *bbt,
+				unsigned int eb, int ebcnt)
+{
+	int err;
+	unsigned int i;
+
+	for (i = 0; i < ebcnt; ++i) {
+		if (bbt[i])
+			continue;
+		err = mtdtest_erase_eraseblock(mtd, eb + i);
+		if (err)
+			return err;
+		cond_resched();
+	}
+
+	return 0;
+}
+
+int mtdtest_read(struct mtd_info *mtd, loff_t addr, size_t size, void *buf)
+{
+	size_t read;
+	int err;
+
+	err = mtd_read(mtd, addr, size, &read, buf);
+	/* Ignore corrected ECC errors */
+	if (mtd_is_bitflip(err))
+		err = 0;
+	if (!err && read != size)
+		err = -EIO;
+	if (err)
+		pr_err("error: read failed at %#llx\n", addr);
+
+	return err;
+}
+
+int mtdtest_write(struct mtd_info *mtd, loff_t addr, size_t size,
+		const void *buf)
+{
+	size_t written;
+	int err;
+
+	err = mtd_write(mtd, addr, size, &written, buf);
+	if (!err && written != size)
+		err = -EIO;
+	if (err)
+		pr_err("error: write failed at %#llx\n", addr);
+
+	return err;
+}
diff --git a/drivers/mtd/tests/mtd_test.h b/drivers/mtd/tests/mtd_test.h
new file mode 100644
index 0000000..f437c77
--- /dev/null
+++ b/drivers/mtd/tests/mtd_test.h
@@ -0,0 +1,11 @@
+#include <linux/mtd/mtd.h>
+
+int mtdtest_erase_eraseblock(struct mtd_info *mtd, unsigned int ebnum);
+int mtdtest_scan_for_bad_eraseblocks(struct mtd_info *mtd, unsigned char *bbt,
+					unsigned int eb, int ebcnt);
+int mtdtest_erase_good_eraseblocks(struct mtd_info *mtd, unsigned char *bbt,
+				unsigned int eb, int ebcnt);
+
+int mtdtest_read(struct mtd_info *mtd, loff_t addr, size_t size, void *buf);
+int mtdtest_write(struct mtd_info *mtd, loff_t addr, size_t size,
+		const void *buf);
diff --git a/drivers/mtd/tests/mtd_nandbiterrs.c b/drivers/mtd/tests/nandbiterrs.c
similarity index 93%
rename from drivers/mtd/tests/mtd_nandbiterrs.c
rename to drivers/mtd/tests/nandbiterrs.c
index 207bf9a..3cd3aab 100644
--- a/drivers/mtd/tests/mtd_nandbiterrs.c
+++ b/drivers/mtd/tests/nandbiterrs.c
@@ -49,6 +49,7 @@
 #include <linux/err.h>
 #include <linux/mtd/nand.h>
 #include <linux/slab.h>
+#include "mtd_test.h"
 
 static int dev;
 module_param(dev, int, S_IRUGO);
@@ -98,47 +99,13 @@
 	return c;
 }
 
-static int erase_block(void)
-{
-	int err;
-	struct erase_info ei;
-	loff_t addr = eraseblock * mtd->erasesize;
-
-	pr_info("erase_block\n");
-
-	memset(&ei, 0, sizeof(struct erase_info));
-	ei.mtd  = mtd;
-	ei.addr = addr;
-	ei.len  = mtd->erasesize;
-
-	err = mtd_erase(mtd, &ei);
-	if (err || ei.state == MTD_ERASE_FAILED) {
-		pr_err("error %d while erasing\n", err);
-		if (!err)
-			err = -EIO;
-		return err;
-	}
-
-	return 0;
-}
-
 /* Writes wbuffer to page */
 static int write_page(int log)
 {
-	int err = 0;
-	size_t written;
-
 	if (log)
 		pr_info("write_page\n");
 
-	err = mtd_write(mtd, offset, mtd->writesize, &written, wbuffer);
-	if (err || written != mtd->writesize) {
-		pr_err("error: write failed at %#llx\n", (long long)offset);
-		if (!err)
-			err = -EIO;
-	}
-
-	return err;
+	return mtdtest_write(mtd, offset, mtd->writesize, wbuffer);
 }
 
 /* Re-writes the data area while leaving the OOB alone. */
@@ -415,7 +382,7 @@
 		goto exit_rbuffer;
 	}
 
-	err = erase_block();
+	err = mtdtest_erase_eraseblock(mtd, eraseblock);
 	if (err)
 		goto exit_error;
 
@@ -428,7 +395,7 @@
 		goto exit_error;
 
 	/* We leave the block un-erased in case of test failure. */
-	err = erase_block();
+	err = mtdtest_erase_eraseblock(mtd, eraseblock);
 	if (err)
 		goto exit_error;
 
diff --git a/drivers/mtd/tests/mtd_oobtest.c b/drivers/mtd/tests/oobtest.c
similarity index 89%
rename from drivers/mtd/tests/mtd_oobtest.c
rename to drivers/mtd/tests/oobtest.c
index 3e24b37..ff35c46 100644
--- a/drivers/mtd/tests/mtd_oobtest.c
+++ b/drivers/mtd/tests/oobtest.c
@@ -31,6 +31,8 @@
 #include <linux/sched.h>
 #include <linux/random.h>
 
+#include "mtd_test.h"
+
 static int dev = -EINVAL;
 module_param(dev, int, S_IRUGO);
 MODULE_PARM_DESC(dev, "MTD device number to use");
@@ -49,49 +51,6 @@
 static int vary_offset;
 static struct rnd_state rnd_state;
 
-static int erase_eraseblock(int ebnum)
-{
-	int err;
-	struct erase_info ei;
-	loff_t addr = ebnum * mtd->erasesize;
-
-	memset(&ei, 0, sizeof(struct erase_info));
-	ei.mtd  = mtd;
-	ei.addr = addr;
-	ei.len  = mtd->erasesize;
-
-	err = mtd_erase(mtd, &ei);
-	if (err) {
-		pr_err("error %d while erasing EB %d\n", err, ebnum);
-		return err;
-	}
-
-	if (ei.state == MTD_ERASE_FAILED) {
-		pr_err("some erase error occurred at EB %d\n", ebnum);
-		return -EIO;
-	}
-
-	return 0;
-}
-
-static int erase_whole_device(void)
-{
-	int err;
-	unsigned int i;
-
-	pr_info("erasing whole device\n");
-	for (i = 0; i < ebcnt; ++i) {
-		if (bbt[i])
-			continue;
-		err = erase_eraseblock(i);
-		if (err)
-			return err;
-		cond_resched();
-	}
-	pr_info("erased %u eraseblocks\n", i);
-	return 0;
-}
-
 static void do_vary_offset(void)
 {
 	use_len -= 1;
@@ -304,38 +263,6 @@
 	return 0;
 }
 
-static int is_block_bad(int ebnum)
-{
-	int ret;
-	loff_t addr = ebnum * mtd->erasesize;
-
-	ret = mtd_block_isbad(mtd, addr);
-	if (ret)
-		pr_info("block %d is bad\n", ebnum);
-	return ret;
-}
-
-static int scan_for_bad_eraseblocks(void)
-{
-	int i, bad = 0;
-
-	bbt = kmalloc(ebcnt, GFP_KERNEL);
-	if (!bbt) {
-		pr_err("error: cannot allocate memory\n");
-		return -ENOMEM;
-	}
-
-	pr_info("scanning for bad eraseblocks\n");
-	for (i = 0; i < ebcnt; ++i) {
-		bbt[i] = is_block_bad(i) ? 1 : 0;
-		if (bbt[i])
-			bad += 1;
-		cond_resched();
-	}
-	pr_info("scanned %d eraseblocks, %d are bad\n", i, bad);
-	return 0;
-}
-
 static int __init mtd_oobtest_init(void)
 {
 	int err = 0;
@@ -380,17 +307,16 @@
 
 	err = -ENOMEM;
 	readbuf = kmalloc(mtd->erasesize, GFP_KERNEL);
-	if (!readbuf) {
-		pr_err("error: cannot allocate memory\n");
+	if (!readbuf)
 		goto out;
-	}
 	writebuf = kmalloc(mtd->erasesize, GFP_KERNEL);
-	if (!writebuf) {
-		pr_err("error: cannot allocate memory\n");
+	if (!writebuf)
 		goto out;
-	}
+	bbt = kzalloc(ebcnt, GFP_KERNEL);
+	if (!bbt)
+		goto out;
 
-	err = scan_for_bad_eraseblocks();
+	err = mtdtest_scan_for_bad_eraseblocks(mtd, bbt, 0, ebcnt);
 	if (err)
 		goto out;
 
@@ -402,7 +328,7 @@
 	/* First test: write all OOB, read it back and verify */
 	pr_info("test 1 of 5\n");
 
-	err = erase_whole_device();
+	err = mtdtest_erase_good_eraseblocks(mtd, bbt, 0, ebcnt);
 	if (err)
 		goto out;
 
@@ -422,7 +348,7 @@
 	 */
 	pr_info("test 2 of 5\n");
 
-	err = erase_whole_device();
+	err = mtdtest_erase_good_eraseblocks(mtd, bbt, 0, ebcnt);
 	if (err)
 		goto out;
 
@@ -452,7 +378,7 @@
 	 */
 	pr_info("test 3 of 5\n");
 
-	err = erase_whole_device();
+	err = mtdtest_erase_good_eraseblocks(mtd, bbt, 0, ebcnt);
 	if (err)
 		goto out;
 
@@ -485,7 +411,7 @@
 	/* Fourth test: try to write off end of device */
 	pr_info("test 4 of 5\n");
 
-	err = erase_whole_device();
+	err = mtdtest_erase_good_eraseblocks(mtd, bbt, 0, ebcnt);
 	if (err)
 		goto out;
 
@@ -577,7 +503,7 @@
 			errcnt += 1;
 		}
 
-		err = erase_eraseblock(ebcnt - 1);
+		err = mtdtest_erase_eraseblock(mtd, ebcnt - 1);
 		if (err)
 			goto out;
 
@@ -626,7 +552,7 @@
 	pr_info("test 5 of 5\n");
 
 	/* Erase all eraseblocks */
-	err = erase_whole_device();
+	err = mtdtest_erase_good_eraseblocks(mtd, bbt, 0, ebcnt);
 	if (err)
 		goto out;
 
diff --git a/drivers/mtd/tests/mtd_pagetest.c b/drivers/mtd/tests/pagetest.c
similarity index 63%
rename from drivers/mtd/tests/mtd_pagetest.c
rename to drivers/mtd/tests/pagetest.c
index 0c1140b..44b96e9 100644
--- a/drivers/mtd/tests/mtd_pagetest.c
+++ b/drivers/mtd/tests/pagetest.c
@@ -31,6 +31,8 @@
 #include <linux/sched.h>
 #include <linux/random.h>
 
+#include "mtd_test.h"
+
 static int dev = -EINVAL;
 module_param(dev, int, S_IRUGO);
 MODULE_PARM_DESC(dev, "MTD device number to use");
@@ -48,52 +50,18 @@
 static int errcnt;
 static struct rnd_state rnd_state;
 
-static int erase_eraseblock(int ebnum)
-{
-	int err;
-	struct erase_info ei;
-	loff_t addr = ebnum * mtd->erasesize;
-
-	memset(&ei, 0, sizeof(struct erase_info));
-	ei.mtd  = mtd;
-	ei.addr = addr;
-	ei.len  = mtd->erasesize;
-
-	err = mtd_erase(mtd, &ei);
-	if (err) {
-		pr_err("error %d while erasing EB %d\n", err, ebnum);
-		return err;
-	}
-
-	if (ei.state == MTD_ERASE_FAILED) {
-		pr_err("some erase error occurred at EB %d\n",
-		       ebnum);
-		return -EIO;
-	}
-
-	return 0;
-}
-
 static int write_eraseblock(int ebnum)
 {
-	int err = 0;
-	size_t written;
 	loff_t addr = ebnum * mtd->erasesize;
 
 	prandom_bytes_state(&rnd_state, writebuf, mtd->erasesize);
 	cond_resched();
-	err = mtd_write(mtd, addr, mtd->erasesize, &written, writebuf);
-	if (err || written != mtd->erasesize)
-		pr_err("error: write failed at %#llx\n",
-		       (long long)addr);
-
-	return err;
+	return mtdtest_write(mtd, addr, mtd->erasesize, writebuf);
 }
 
 static int verify_eraseblock(int ebnum)
 {
 	uint32_t j;
-	size_t read;
 	int err = 0, i;
 	loff_t addr0, addrn;
 	loff_t addr = ebnum * mtd->erasesize;
@@ -109,31 +77,16 @@
 	prandom_bytes_state(&rnd_state, writebuf, mtd->erasesize);
 	for (j = 0; j < pgcnt - 1; ++j, addr += pgsize) {
 		/* Do a read to set the internal dataRAMs to different data */
-		err = mtd_read(mtd, addr0, bufsize, &read, twopages);
-		if (mtd_is_bitflip(err))
-			err = 0;
-		if (err || read != bufsize) {
-			pr_err("error: read failed at %#llx\n",
-			       (long long)addr0);
+		err = mtdtest_read(mtd, addr0, bufsize, twopages);
+		if (err)
 			return err;
-		}
-		err = mtd_read(mtd, addrn - bufsize, bufsize, &read, twopages);
-		if (mtd_is_bitflip(err))
-			err = 0;
-		if (err || read != bufsize) {
-			pr_err("error: read failed at %#llx\n",
-			       (long long)(addrn - bufsize));
+		err = mtdtest_read(mtd, addrn - bufsize, bufsize, twopages);
+		if (err)
 			return err;
-		}
 		memset(twopages, 0, bufsize);
-		err = mtd_read(mtd, addr, bufsize, &read, twopages);
-		if (mtd_is_bitflip(err))
-			err = 0;
-		if (err || read != bufsize) {
-			pr_err("error: read failed at %#llx\n",
-			       (long long)addr);
+		err = mtdtest_read(mtd, addr, bufsize, twopages);
+		if (err)
 			break;
-		}
 		if (memcmp(twopages, writebuf + (j * pgsize), bufsize)) {
 			pr_err("error: verify failed at %#llx\n",
 			       (long long)addr);
@@ -145,31 +98,16 @@
 		struct rnd_state old_state = rnd_state;
 
 		/* Do a read to set the internal dataRAMs to different data */
-		err = mtd_read(mtd, addr0, bufsize, &read, twopages);
-		if (mtd_is_bitflip(err))
-			err = 0;
-		if (err || read != bufsize) {
-			pr_err("error: read failed at %#llx\n",
-			       (long long)addr0);
+		err = mtdtest_read(mtd, addr0, bufsize, twopages);
+		if (err)
 			return err;
-		}
-		err = mtd_read(mtd, addrn - bufsize, bufsize, &read, twopages);
-		if (mtd_is_bitflip(err))
-			err = 0;
-		if (err || read != bufsize) {
-			pr_err("error: read failed at %#llx\n",
-			       (long long)(addrn - bufsize));
+		err = mtdtest_read(mtd, addrn - bufsize, bufsize, twopages);
+		if (err)
 			return err;
-		}
 		memset(twopages, 0, bufsize);
-		err = mtd_read(mtd, addr, bufsize, &read, twopages);
-		if (mtd_is_bitflip(err))
-			err = 0;
-		if (err || read != bufsize) {
-			pr_err("error: read failed at %#llx\n",
-			       (long long)addr);
+		err = mtdtest_read(mtd, addr, bufsize, twopages);
+		if (err)
 			return err;
-		}
 		memcpy(boundary, writebuf + mtd->erasesize - pgsize, pgsize);
 		prandom_bytes_state(&rnd_state, boundary + pgsize, pgsize);
 		if (memcmp(twopages, boundary, bufsize)) {
@@ -184,17 +122,14 @@
 
 static int crosstest(void)
 {
-	size_t read;
 	int err = 0, i;
 	loff_t addr, addr0, addrn;
 	unsigned char *pp1, *pp2, *pp3, *pp4;
 
 	pr_info("crosstest\n");
 	pp1 = kmalloc(pgsize * 4, GFP_KERNEL);
-	if (!pp1) {
-		pr_err("error: cannot allocate memory\n");
+	if (!pp1)
 		return -ENOMEM;
-	}
 	pp2 = pp1 + pgsize;
 	pp3 = pp2 + pgsize;
 	pp4 = pp3 + pgsize;
@@ -210,24 +145,16 @@
 
 	/* Read 2nd-to-last page to pp1 */
 	addr = addrn - pgsize - pgsize;
-	err = mtd_read(mtd, addr, pgsize, &read, pp1);
-	if (mtd_is_bitflip(err))
-		err = 0;
-	if (err || read != pgsize) {
-		pr_err("error: read failed at %#llx\n",
-		       (long long)addr);
+	err = mtdtest_read(mtd, addr, pgsize, pp1);
+	if (err) {
 		kfree(pp1);
 		return err;
 	}
 
 	/* Read 3rd-to-last page to pp1 */
 	addr = addrn - pgsize - pgsize - pgsize;
-	err = mtd_read(mtd, addr, pgsize, &read, pp1);
-	if (mtd_is_bitflip(err))
-		err = 0;
-	if (err || read != pgsize) {
-		pr_err("error: read failed at %#llx\n",
-		       (long long)addr);
+	err = mtdtest_read(mtd, addr, pgsize, pp1);
+	if (err) {
 		kfree(pp1);
 		return err;
 	}
@@ -235,12 +162,8 @@
 	/* Read first page to pp2 */
 	addr = addr0;
 	pr_info("reading page at %#llx\n", (long long)addr);
-	err = mtd_read(mtd, addr, pgsize, &read, pp2);
-	if (mtd_is_bitflip(err))
-		err = 0;
-	if (err || read != pgsize) {
-		pr_err("error: read failed at %#llx\n",
-		       (long long)addr);
+	err = mtdtest_read(mtd, addr, pgsize, pp2);
+	if (err) {
 		kfree(pp1);
 		return err;
 	}
@@ -248,12 +171,8 @@
 	/* Read last page to pp3 */
 	addr = addrn - pgsize;
 	pr_info("reading page at %#llx\n", (long long)addr);
-	err = mtd_read(mtd, addr, pgsize, &read, pp3);
-	if (mtd_is_bitflip(err))
-		err = 0;
-	if (err || read != pgsize) {
-		pr_err("error: read failed at %#llx\n",
-		       (long long)addr);
+	err = mtdtest_read(mtd, addr, pgsize, pp3);
+	if (err) {
 		kfree(pp1);
 		return err;
 	}
@@ -261,12 +180,8 @@
 	/* Read first page again to pp4 */
 	addr = addr0;
 	pr_info("reading page at %#llx\n", (long long)addr);
-	err = mtd_read(mtd, addr, pgsize, &read, pp4);
-	if (mtd_is_bitflip(err))
-		err = 0;
-	if (err || read != pgsize) {
-		pr_err("error: read failed at %#llx\n",
-		       (long long)addr);
+	err = mtdtest_read(mtd, addr, pgsize, pp4);
+	if (err) {
 		kfree(pp1);
 		return err;
 	}
@@ -285,7 +200,6 @@
 
 static int erasecrosstest(void)
 {
-	size_t read, written;
 	int err = 0, i, ebnum, ebnum2;
 	loff_t addr0;
 	char *readbuf = twopages;
@@ -304,30 +218,22 @@
 		ebnum2 -= 1;
 
 	pr_info("erasing block %d\n", ebnum);
-	err = erase_eraseblock(ebnum);
+	err = mtdtest_erase_eraseblock(mtd, ebnum);
 	if (err)
 		return err;
 
 	pr_info("writing 1st page of block %d\n", ebnum);
 	prandom_bytes_state(&rnd_state, writebuf, pgsize);
 	strcpy(writebuf, "There is no data like this!");
-	err = mtd_write(mtd, addr0, pgsize, &written, writebuf);
-	if (err || written != pgsize) {
-		pr_info("error: write failed at %#llx\n",
-		       (long long)addr0);
-		return err ? err : -1;
-	}
+	err = mtdtest_write(mtd, addr0, pgsize, writebuf);
+	if (err)
+		return err;
 
 	pr_info("reading 1st page of block %d\n", ebnum);
 	memset(readbuf, 0, pgsize);
-	err = mtd_read(mtd, addr0, pgsize, &read, readbuf);
-	if (mtd_is_bitflip(err))
-		err = 0;
-	if (err || read != pgsize) {
-		pr_err("error: read failed at %#llx\n",
-		       (long long)addr0);
-		return err ? err : -1;
-	}
+	err = mtdtest_read(mtd, addr0, pgsize, readbuf);
+	if (err)
+		return err;
 
 	pr_info("verifying 1st page of block %d\n", ebnum);
 	if (memcmp(writebuf, readbuf, pgsize)) {
@@ -337,35 +243,27 @@
 	}
 
 	pr_info("erasing block %d\n", ebnum);
-	err = erase_eraseblock(ebnum);
+	err = mtdtest_erase_eraseblock(mtd, ebnum);
 	if (err)
 		return err;
 
 	pr_info("writing 1st page of block %d\n", ebnum);
 	prandom_bytes_state(&rnd_state, writebuf, pgsize);
 	strcpy(writebuf, "There is no data like this!");
-	err = mtd_write(mtd, addr0, pgsize, &written, writebuf);
-	if (err || written != pgsize) {
-		pr_err("error: write failed at %#llx\n",
-		       (long long)addr0);
-		return err ? err : -1;
-	}
+	err = mtdtest_write(mtd, addr0, pgsize, writebuf);
+	if (err)
+		return err;
 
 	pr_info("erasing block %d\n", ebnum2);
-	err = erase_eraseblock(ebnum2);
+	err = mtdtest_erase_eraseblock(mtd, ebnum2);
 	if (err)
 		return err;
 
 	pr_info("reading 1st page of block %d\n", ebnum);
 	memset(readbuf, 0, pgsize);
-	err = mtd_read(mtd, addr0, pgsize, &read, readbuf);
-	if (mtd_is_bitflip(err))
-		err = 0;
-	if (err || read != pgsize) {
-		pr_err("error: read failed at %#llx\n",
-		       (long long)addr0);
-		return err ? err : -1;
-	}
+	err = mtdtest_read(mtd, addr0, pgsize, readbuf);
+	if (err)
+		return err;
 
 	pr_info("verifying 1st page of block %d\n", ebnum);
 	if (memcmp(writebuf, readbuf, pgsize)) {
@@ -381,7 +279,6 @@
 
 static int erasetest(void)
 {
-	size_t read, written;
 	int err = 0, i, ebnum, ok = 1;
 	loff_t addr0;
 
@@ -395,33 +292,25 @@
 	}
 
 	pr_info("erasing block %d\n", ebnum);
-	err = erase_eraseblock(ebnum);
+	err = mtdtest_erase_eraseblock(mtd, ebnum);
 	if (err)
 		return err;
 
 	pr_info("writing 1st page of block %d\n", ebnum);
 	prandom_bytes_state(&rnd_state, writebuf, pgsize);
-	err = mtd_write(mtd, addr0, pgsize, &written, writebuf);
-	if (err || written != pgsize) {
-		pr_err("error: write failed at %#llx\n",
-		       (long long)addr0);
-		return err ? err : -1;
-	}
+	err = mtdtest_write(mtd, addr0, pgsize, writebuf);
+	if (err)
+		return err;
 
 	pr_info("erasing block %d\n", ebnum);
-	err = erase_eraseblock(ebnum);
+	err = mtdtest_erase_eraseblock(mtd, ebnum);
 	if (err)
 		return err;
 
 	pr_info("reading 1st page of block %d\n", ebnum);
-	err = mtd_read(mtd, addr0, pgsize, &read, twopages);
-	if (mtd_is_bitflip(err))
-		err = 0;
-	if (err || read != pgsize) {
-		pr_err("error: read failed at %#llx\n",
-		       (long long)addr0);
-		return err ? err : -1;
-	}
+	err = mtdtest_read(mtd, addr0, pgsize, twopages);
+	if (err)
+		return err;
 
 	pr_info("verifying 1st page of block %d is all 0xff\n",
 	       ebnum);
@@ -440,38 +329,6 @@
 	return err;
 }
 
-static int is_block_bad(int ebnum)
-{
-	loff_t addr = ebnum * mtd->erasesize;
-	int ret;
-
-	ret = mtd_block_isbad(mtd, addr);
-	if (ret)
-		pr_info("block %d is bad\n", ebnum);
-	return ret;
-}
-
-static int scan_for_bad_eraseblocks(void)
-{
-	int i, bad = 0;
-
-	bbt = kzalloc(ebcnt, GFP_KERNEL);
-	if (!bbt) {
-		pr_err("error: cannot allocate memory\n");
-		return -ENOMEM;
-	}
-
-	pr_info("scanning for bad eraseblocks\n");
-	for (i = 0; i < ebcnt; ++i) {
-		bbt[i] = is_block_bad(i) ? 1 : 0;
-		if (bbt[i])
-			bad += 1;
-		cond_resched();
-	}
-	pr_info("scanned %d eraseblocks, %d are bad\n", i, bad);
-	return 0;
-}
-
 static int __init mtd_pagetest_init(void)
 {
 	int err = 0;
@@ -516,36 +373,28 @@
 	err = -ENOMEM;
 	bufsize = pgsize * 2;
 	writebuf = kmalloc(mtd->erasesize, GFP_KERNEL);
-	if (!writebuf) {
-		pr_err("error: cannot allocate memory\n");
+	if (!writebuf)
 		goto out;
-	}
 	twopages = kmalloc(bufsize, GFP_KERNEL);
-	if (!twopages) {
-		pr_err("error: cannot allocate memory\n");
+	if (!twopages)
 		goto out;
-	}
 	boundary = kmalloc(bufsize, GFP_KERNEL);
-	if (!boundary) {
-		pr_err("error: cannot allocate memory\n");
+	if (!boundary)
 		goto out;
-	}
 
-	err = scan_for_bad_eraseblocks();
+	bbt = kzalloc(ebcnt, GFP_KERNEL);
+	if (!bbt)
+		goto out;
+	err = mtdtest_scan_for_bad_eraseblocks(mtd, bbt, 0, ebcnt);
 	if (err)
 		goto out;
 
 	/* Erase all eraseblocks */
 	pr_info("erasing whole device\n");
-	for (i = 0; i < ebcnt; ++i) {
-		if (bbt[i])
-			continue;
-		err = erase_eraseblock(i);
-		if (err)
-			goto out;
-		cond_resched();
-	}
-	pr_info("erased %u eraseblocks\n", i);
+	err = mtdtest_erase_good_eraseblocks(mtd, bbt, 0, ebcnt);
+	if (err)
+		goto out;
+	pr_info("erased %u eraseblocks\n", ebcnt);
 
 	/* Write all eraseblocks */
 	prandom_seed_state(&rnd_state, 1);
diff --git a/drivers/mtd/tests/mtd_readtest.c b/drivers/mtd/tests/readtest.c
similarity index 83%
rename from drivers/mtd/tests/mtd_readtest.c
rename to drivers/mtd/tests/readtest.c
index 266de04..626e66d 100644
--- a/drivers/mtd/tests/mtd_readtest.c
+++ b/drivers/mtd/tests/readtest.c
@@ -29,6 +29,8 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 
+#include "mtd_test.h"
+
 static int dev = -EINVAL;
 module_param(dev, int, S_IRUGO);
 MODULE_PARM_DESC(dev, "MTD device number to use");
@@ -44,7 +46,6 @@
 
 static int read_eraseblock_by_page(int ebnum)
 {
-	size_t read;
 	int i, ret, err = 0;
 	loff_t addr = ebnum * mtd->erasesize;
 	void *buf = iobuf;
@@ -52,16 +53,10 @@
 
 	for (i = 0; i < pgcnt; i++) {
 		memset(buf, 0 , pgsize);
-		ret = mtd_read(mtd, addr, pgsize, &read, buf);
-		if (ret == -EUCLEAN)
-			ret = 0;
-		if (ret || read != pgsize) {
-			pr_err("error: read failed at %#llx\n",
-			       (long long)addr);
+		ret = mtdtest_read(mtd, addr, pgsize, buf);
+		if (ret) {
 			if (!err)
 				err = ret;
-			if (!err)
-				err = -EINVAL;
 		}
 		if (mtd->oobsize) {
 			struct mtd_oob_ops ops;
@@ -127,41 +122,6 @@
 		}
 }
 
-static int is_block_bad(int ebnum)
-{
-	loff_t addr = ebnum * mtd->erasesize;
-	int ret;
-
-	ret = mtd_block_isbad(mtd, addr);
-	if (ret)
-		pr_info("block %d is bad\n", ebnum);
-	return ret;
-}
-
-static int scan_for_bad_eraseblocks(void)
-{
-	int i, bad = 0;
-
-	bbt = kzalloc(ebcnt, GFP_KERNEL);
-	if (!bbt) {
-		pr_err("error: cannot allocate memory\n");
-		return -ENOMEM;
-	}
-
-	if (!mtd_can_have_bb(mtd))
-		return 0;
-
-	pr_info("scanning for bad eraseblocks\n");
-	for (i = 0; i < ebcnt; ++i) {
-		bbt[i] = is_block_bad(i) ? 1 : 0;
-		if (bbt[i])
-			bad += 1;
-		cond_resched();
-	}
-	pr_info("scanned %d eraseblocks, %d are bad\n", i, bad);
-	return 0;
-}
-
 static int __init mtd_readtest_init(void)
 {
 	uint64_t tmp;
@@ -204,17 +164,16 @@
 
 	err = -ENOMEM;
 	iobuf = kmalloc(mtd->erasesize, GFP_KERNEL);
-	if (!iobuf) {
-		pr_err("error: cannot allocate memory\n");
+	if (!iobuf)
 		goto out;
-	}
 	iobuf1 = kmalloc(mtd->erasesize, GFP_KERNEL);
-	if (!iobuf1) {
-		pr_err("error: cannot allocate memory\n");
+	if (!iobuf1)
 		goto out;
-	}
 
-	err = scan_for_bad_eraseblocks();
+	bbt = kzalloc(ebcnt, GFP_KERNEL);
+	if (!bbt)
+		goto out;
+	err = mtdtest_scan_for_bad_eraseblocks(mtd, bbt, 0, ebcnt);
 	if (err)
 		goto out;
 
diff --git a/drivers/mtd/tests/mtd_speedtest.c b/drivers/mtd/tests/speedtest.c
similarity index 69%
rename from drivers/mtd/tests/mtd_speedtest.c
rename to drivers/mtd/tests/speedtest.c
index a6ce9c1..87ff6a2 100644
--- a/drivers/mtd/tests/mtd_speedtest.c
+++ b/drivers/mtd/tests/speedtest.c
@@ -30,6 +30,8 @@
 #include <linux/sched.h>
 #include <linux/random.h>
 
+#include "mtd_test.h"
+
 static int dev = -EINVAL;
 module_param(dev, int, S_IRUGO);
 MODULE_PARM_DESC(dev, "MTD device number to use");
@@ -49,33 +51,6 @@
 static int goodebcnt;
 static struct timeval start, finish;
 
-
-static int erase_eraseblock(int ebnum)
-{
-	int err;
-	struct erase_info ei;
-	loff_t addr = ebnum * mtd->erasesize;
-
-	memset(&ei, 0, sizeof(struct erase_info));
-	ei.mtd  = mtd;
-	ei.addr = addr;
-	ei.len  = mtd->erasesize;
-
-	err = mtd_erase(mtd, &ei);
-	if (err) {
-		pr_err("error %d while erasing EB %d\n", err, ebnum);
-		return err;
-	}
-
-	if (ei.state == MTD_ERASE_FAILED) {
-		pr_err("some erase error occurred at EB %d\n",
-		       ebnum);
-		return -EIO;
-	}
-
-	return 0;
-}
-
 static int multiblock_erase(int ebnum, int blocks)
 {
 	int err;
@@ -103,54 +78,23 @@
 	return 0;
 }
 
-static int erase_whole_device(void)
-{
-	int err;
-	unsigned int i;
-
-	for (i = 0; i < ebcnt; ++i) {
-		if (bbt[i])
-			continue;
-		err = erase_eraseblock(i);
-		if (err)
-			return err;
-		cond_resched();
-	}
-	return 0;
-}
-
 static int write_eraseblock(int ebnum)
 {
-	size_t written;
-	int err = 0;
 	loff_t addr = ebnum * mtd->erasesize;
 
-	err = mtd_write(mtd, addr, mtd->erasesize, &written, iobuf);
-	if (err || written != mtd->erasesize) {
-		pr_err("error: write failed at %#llx\n", addr);
-		if (!err)
-			err = -EINVAL;
-	}
-
-	return err;
+	return mtdtest_write(mtd, addr, mtd->erasesize, iobuf);
 }
 
 static int write_eraseblock_by_page(int ebnum)
 {
-	size_t written;
 	int i, err = 0;
 	loff_t addr = ebnum * mtd->erasesize;
 	void *buf = iobuf;
 
 	for (i = 0; i < pgcnt; i++) {
-		err = mtd_write(mtd, addr, pgsize, &written, buf);
-		if (err || written != pgsize) {
-			pr_err("error: write failed at %#llx\n",
-			       addr);
-			if (!err)
-				err = -EINVAL;
+		err = mtdtest_write(mtd, addr, pgsize, buf);
+		if (err)
 			break;
-		}
 		addr += pgsize;
 		buf += pgsize;
 	}
@@ -160,74 +104,41 @@
 
 static int write_eraseblock_by_2pages(int ebnum)
 {
-	size_t written, sz = pgsize * 2;
+	size_t sz = pgsize * 2;
 	int i, n = pgcnt / 2, err = 0;
 	loff_t addr = ebnum * mtd->erasesize;
 	void *buf = iobuf;
 
 	for (i = 0; i < n; i++) {
-		err = mtd_write(mtd, addr, sz, &written, buf);
-		if (err || written != sz) {
-			pr_err("error: write failed at %#llx\n",
-			       addr);
-			if (!err)
-				err = -EINVAL;
+		err = mtdtest_write(mtd, addr, sz, buf);
+		if (err)
 			return err;
-		}
 		addr += sz;
 		buf += sz;
 	}
-	if (pgcnt % 2) {
-		err = mtd_write(mtd, addr, pgsize, &written, buf);
-		if (err || written != pgsize) {
-			pr_err("error: write failed at %#llx\n",
-			       addr);
-			if (!err)
-				err = -EINVAL;
-		}
-	}
+	if (pgcnt % 2)
+		err = mtdtest_write(mtd, addr, pgsize, buf);
 
 	return err;
 }
 
 static int read_eraseblock(int ebnum)
 {
-	size_t read;
-	int err = 0;
 	loff_t addr = ebnum * mtd->erasesize;
 
-	err = mtd_read(mtd, addr, mtd->erasesize, &read, iobuf);
-	/* Ignore corrected ECC errors */
-	if (mtd_is_bitflip(err))
-		err = 0;
-	if (err || read != mtd->erasesize) {
-		pr_err("error: read failed at %#llx\n", addr);
-		if (!err)
-			err = -EINVAL;
-	}
-
-	return err;
+	return mtdtest_read(mtd, addr, mtd->erasesize, iobuf);
 }
 
 static int read_eraseblock_by_page(int ebnum)
 {
-	size_t read;
 	int i, err = 0;
 	loff_t addr = ebnum * mtd->erasesize;
 	void *buf = iobuf;
 
 	for (i = 0; i < pgcnt; i++) {
-		err = mtd_read(mtd, addr, pgsize, &read, buf);
-		/* Ignore corrected ECC errors */
-		if (mtd_is_bitflip(err))
-			err = 0;
-		if (err || read != pgsize) {
-			pr_err("error: read failed at %#llx\n",
-			       addr);
-			if (!err)
-				err = -EINVAL;
+		err = mtdtest_read(mtd, addr, pgsize, buf);
+		if (err)
 			break;
-		}
 		addr += pgsize;
 		buf += pgsize;
 	}
@@ -237,53 +148,24 @@
 
 static int read_eraseblock_by_2pages(int ebnum)
 {
-	size_t read, sz = pgsize * 2;
+	size_t sz = pgsize * 2;
 	int i, n = pgcnt / 2, err = 0;
 	loff_t addr = ebnum * mtd->erasesize;
 	void *buf = iobuf;
 
 	for (i = 0; i < n; i++) {
-		err = mtd_read(mtd, addr, sz, &read, buf);
-		/* Ignore corrected ECC errors */
-		if (mtd_is_bitflip(err))
-			err = 0;
-		if (err || read != sz) {
-			pr_err("error: read failed at %#llx\n",
-			       addr);
-			if (!err)
-				err = -EINVAL;
+		err = mtdtest_read(mtd, addr, sz, buf);
+		if (err)
 			return err;
-		}
 		addr += sz;
 		buf += sz;
 	}
-	if (pgcnt % 2) {
-		err = mtd_read(mtd, addr, pgsize, &read, buf);
-		/* Ignore corrected ECC errors */
-		if (mtd_is_bitflip(err))
-			err = 0;
-		if (err || read != pgsize) {
-			pr_err("error: read failed at %#llx\n",
-			       addr);
-			if (!err)
-				err = -EINVAL;
-		}
-	}
+	if (pgcnt % 2)
+		err = mtdtest_read(mtd, addr, pgsize, buf);
 
 	return err;
 }
 
-static int is_block_bad(int ebnum)
-{
-	loff_t addr = ebnum * mtd->erasesize;
-	int ret;
-
-	ret = mtd_block_isbad(mtd, addr);
-	if (ret)
-		pr_info("block %d is bad\n", ebnum);
-	return ret;
-}
-
 static inline void start_timing(void)
 {
 	do_gettimeofday(&start);
@@ -308,32 +190,6 @@
 	return k;
 }
 
-static int scan_for_bad_eraseblocks(void)
-{
-	int i, bad = 0;
-
-	bbt = kzalloc(ebcnt, GFP_KERNEL);
-	if (!bbt) {
-		pr_err("error: cannot allocate memory\n");
-		return -ENOMEM;
-	}
-
-	if (!mtd_can_have_bb(mtd))
-		goto out;
-
-	pr_info("scanning for bad eraseblocks\n");
-	for (i = 0; i < ebcnt; ++i) {
-		bbt[i] = is_block_bad(i) ? 1 : 0;
-		if (bbt[i])
-			bad += 1;
-		cond_resched();
-	}
-	pr_info("scanned %d eraseblocks, %d are bad\n", i, bad);
-out:
-	goodebcnt = ebcnt - bad;
-	return 0;
-}
-
 static int __init mtd_speedtest_init(void)
 {
 	int err, i, blocks, j, k;
@@ -384,18 +240,23 @@
 
 	err = -ENOMEM;
 	iobuf = kmalloc(mtd->erasesize, GFP_KERNEL);
-	if (!iobuf) {
-		pr_err("error: cannot allocate memory\n");
+	if (!iobuf)
 		goto out;
-	}
 
 	prandom_bytes(iobuf, mtd->erasesize);
 
-	err = scan_for_bad_eraseblocks();
+	bbt = kzalloc(ebcnt, GFP_KERNEL);
+	if (!bbt)
+		goto out;
+	err = mtdtest_scan_for_bad_eraseblocks(mtd, bbt, 0, ebcnt);
 	if (err)
 		goto out;
+	for (i = 0; i < ebcnt; i++) {
+		if (!bbt[i])
+			goodebcnt++;
+	}
 
-	err = erase_whole_device();
+	err = mtdtest_erase_good_eraseblocks(mtd, bbt, 0, ebcnt);
 	if (err)
 		goto out;
 
@@ -429,7 +290,7 @@
 	speed = calc_speed();
 	pr_info("eraseblock read speed is %ld KiB/s\n", speed);
 
-	err = erase_whole_device();
+	err = mtdtest_erase_good_eraseblocks(mtd, bbt, 0, ebcnt);
 	if (err)
 		goto out;
 
@@ -463,7 +324,7 @@
 	speed = calc_speed();
 	pr_info("page read speed is %ld KiB/s\n", speed);
 
-	err = erase_whole_device();
+	err = mtdtest_erase_good_eraseblocks(mtd, bbt, 0, ebcnt);
 	if (err)
 		goto out;
 
@@ -500,14 +361,9 @@
 	/* Erase all eraseblocks */
 	pr_info("Testing erase speed\n");
 	start_timing();
-	for (i = 0; i < ebcnt; ++i) {
-		if (bbt[i])
-			continue;
-		err = erase_eraseblock(i);
-		if (err)
-			goto out;
-		cond_resched();
-	}
+	err = mtdtest_erase_good_eraseblocks(mtd, bbt, 0, ebcnt);
+	if (err)
+		goto out;
 	stop_timing();
 	speed = calc_speed();
 	pr_info("erase speed is %ld KiB/s\n", speed);
diff --git a/drivers/mtd/tests/mtd_stresstest.c b/drivers/mtd/tests/stresstest.c
similarity index 74%
rename from drivers/mtd/tests/mtd_stresstest.c
rename to drivers/mtd/tests/stresstest.c
index 787f539..c9d42cc 100644
--- a/drivers/mtd/tests/mtd_stresstest.c
+++ b/drivers/mtd/tests/stresstest.c
@@ -31,6 +31,8 @@
 #include <linux/vmalloc.h>
 #include <linux/random.h>
 
+#include "mtd_test.h"
+
 static int dev = -EINVAL;
 module_param(dev, int, S_IRUGO);
 MODULE_PARM_DESC(dev, "MTD device number to use");
@@ -81,49 +83,11 @@
 	return len;
 }
 
-static int erase_eraseblock(int ebnum)
-{
-	int err;
-	struct erase_info ei;
-	loff_t addr = ebnum * mtd->erasesize;
-
-	memset(&ei, 0, sizeof(struct erase_info));
-	ei.mtd  = mtd;
-	ei.addr = addr;
-	ei.len  = mtd->erasesize;
-
-	err = mtd_erase(mtd, &ei);
-	if (unlikely(err)) {
-		pr_err("error %d while erasing EB %d\n", err, ebnum);
-		return err;
-	}
-
-	if (unlikely(ei.state == MTD_ERASE_FAILED)) {
-		pr_err("some erase error occurred at EB %d\n",
-		       ebnum);
-		return -EIO;
-	}
-
-	return 0;
-}
-
-static int is_block_bad(int ebnum)
-{
-	loff_t addr = ebnum * mtd->erasesize;
-	int ret;
-
-	ret = mtd_block_isbad(mtd, addr);
-	if (ret)
-		pr_info("block %d is bad\n", ebnum);
-	return ret;
-}
-
 static int do_read(void)
 {
-	size_t read;
 	int eb = rand_eb();
 	int offs = rand_offs();
-	int len = rand_len(offs), err;
+	int len = rand_len(offs);
 	loff_t addr;
 
 	if (bbt[eb + 1]) {
@@ -133,28 +97,17 @@
 			len = mtd->erasesize - offs;
 	}
 	addr = eb * mtd->erasesize + offs;
-	err = mtd_read(mtd, addr, len, &read, readbuf);
-	if (mtd_is_bitflip(err))
-		err = 0;
-	if (unlikely(err || read != len)) {
-		pr_err("error: read failed at 0x%llx\n",
-		       (long long)addr);
-		if (!err)
-			err = -EINVAL;
-		return err;
-	}
-	return 0;
+	return mtdtest_read(mtd, addr, len, readbuf);
 }
 
 static int do_write(void)
 {
 	int eb = rand_eb(), offs, err, len;
-	size_t written;
 	loff_t addr;
 
 	offs = offsets[eb];
 	if (offs >= mtd->erasesize) {
-		err = erase_eraseblock(eb);
+		err = mtdtest_erase_eraseblock(mtd, eb);
 		if (err)
 			return err;
 		offs = offsets[eb] = 0;
@@ -165,21 +118,16 @@
 		if (bbt[eb + 1])
 			len = mtd->erasesize - offs;
 		else {
-			err = erase_eraseblock(eb + 1);
+			err = mtdtest_erase_eraseblock(mtd, eb + 1);
 			if (err)
 				return err;
 			offsets[eb + 1] = 0;
 		}
 	}
 	addr = eb * mtd->erasesize + offs;
-	err = mtd_write(mtd, addr, len, &written, writebuf);
-	if (unlikely(err || written != len)) {
-		pr_err("error: write failed at 0x%llx\n",
-		       (long long)addr);
-		if (!err)
-			err = -EINVAL;
+	err = mtdtest_write(mtd, addr, len, writebuf);
+	if (unlikely(err))
 		return err;
-	}
 	offs += len;
 	while (offs > mtd->erasesize) {
 		offsets[eb++] = mtd->erasesize;
@@ -197,30 +145,6 @@
 		return do_write();
 }
 
-static int scan_for_bad_eraseblocks(void)
-{
-	int i, bad = 0;
-
-	bbt = kzalloc(ebcnt, GFP_KERNEL);
-	if (!bbt) {
-		pr_err("error: cannot allocate memory\n");
-		return -ENOMEM;
-	}
-
-	if (!mtd_can_have_bb(mtd))
-		return 0;
-
-	pr_info("scanning for bad eraseblocks\n");
-	for (i = 0; i < ebcnt; ++i) {
-		bbt[i] = is_block_bad(i) ? 1 : 0;
-		if (bbt[i])
-			bad += 1;
-		cond_resched();
-	}
-	pr_info("scanned %d eraseblocks, %d are bad\n", i, bad);
-	return 0;
-}
-
 static int __init mtd_stresstest_init(void)
 {
 	int err;
@@ -276,15 +200,16 @@
 	readbuf = vmalloc(bufsize);
 	writebuf = vmalloc(bufsize);
 	offsets = kmalloc(ebcnt * sizeof(int), GFP_KERNEL);
-	if (!readbuf || !writebuf || !offsets) {
-		pr_err("error: cannot allocate memory\n");
+	if (!readbuf || !writebuf || !offsets)
 		goto out;
-	}
 	for (i = 0; i < ebcnt; i++)
 		offsets[i] = mtd->erasesize;
 	prandom_bytes(writebuf, bufsize);
 
-	err = scan_for_bad_eraseblocks();
+	bbt = kzalloc(ebcnt, GFP_KERNEL);
+	if (!bbt)
+		goto out;
+	err = mtdtest_scan_for_bad_eraseblocks(mtd, bbt, 0, ebcnt);
 	if (err)
 		goto out;
 
diff --git a/drivers/mtd/tests/mtd_subpagetest.c b/drivers/mtd/tests/subpagetest.c
similarity index 86%
rename from drivers/mtd/tests/mtd_subpagetest.c
rename to drivers/mtd/tests/subpagetest.c
index aade56f..e2c0adf 100644
--- a/drivers/mtd/tests/mtd_subpagetest.c
+++ b/drivers/mtd/tests/subpagetest.c
@@ -30,6 +30,8 @@
 #include <linux/sched.h>
 #include <linux/random.h>
 
+#include "mtd_test.h"
+
 static int dev = -EINVAL;
 module_param(dev, int, S_IRUGO);
 MODULE_PARM_DESC(dev, "MTD device number to use");
@@ -51,50 +53,6 @@
 	memset(buf, 0, len);
 }
 
-static int erase_eraseblock(int ebnum)
-{
-	int err;
-	struct erase_info ei;
-	loff_t addr = ebnum * mtd->erasesize;
-
-	memset(&ei, 0, sizeof(struct erase_info));
-	ei.mtd  = mtd;
-	ei.addr = addr;
-	ei.len  = mtd->erasesize;
-
-	err = mtd_erase(mtd, &ei);
-	if (err) {
-		pr_err("error %d while erasing EB %d\n", err, ebnum);
-		return err;
-	}
-
-	if (ei.state == MTD_ERASE_FAILED) {
-		pr_err("some erase error occurred at EB %d\n",
-		       ebnum);
-		return -EIO;
-	}
-
-	return 0;
-}
-
-static int erase_whole_device(void)
-{
-	int err;
-	unsigned int i;
-
-	pr_info("erasing whole device\n");
-	for (i = 0; i < ebcnt; ++i) {
-		if (bbt[i])
-			continue;
-		err = erase_eraseblock(i);
-		if (err)
-			return err;
-		cond_resched();
-	}
-	pr_info("erased %u eraseblocks\n", i);
-	return 0;
-}
-
 static int write_eraseblock(int ebnum)
 {
 	size_t written;
@@ -317,38 +275,6 @@
 	return 0;
 }
 
-static int is_block_bad(int ebnum)
-{
-	loff_t addr = ebnum * mtd->erasesize;
-	int ret;
-
-	ret = mtd_block_isbad(mtd, addr);
-	if (ret)
-		pr_info("block %d is bad\n", ebnum);
-	return ret;
-}
-
-static int scan_for_bad_eraseblocks(void)
-{
-	int i, bad = 0;
-
-	bbt = kzalloc(ebcnt, GFP_KERNEL);
-	if (!bbt) {
-		pr_err("error: cannot allocate memory\n");
-		return -ENOMEM;
-	}
-
-	pr_info("scanning for bad eraseblocks\n");
-	for (i = 0; i < ebcnt; ++i) {
-		bbt[i] = is_block_bad(i) ? 1 : 0;
-		if (bbt[i])
-			bad += 1;
-		cond_resched();
-	}
-	pr_info("scanned %d eraseblocks, %d are bad\n", i, bad);
-	return 0;
-}
-
 static int __init mtd_subpagetest_init(void)
 {
 	int err = 0;
@@ -393,21 +319,20 @@
 	err = -ENOMEM;
 	bufsize = subpgsize * 32;
 	writebuf = kmalloc(bufsize, GFP_KERNEL);
-	if (!writebuf) {
-		pr_info("error: cannot allocate memory\n");
+	if (!writebuf)
 		goto out;
-	}
 	readbuf = kmalloc(bufsize, GFP_KERNEL);
-	if (!readbuf) {
-		pr_info("error: cannot allocate memory\n");
+	if (!readbuf)
 		goto out;
-	}
+	bbt = kzalloc(ebcnt, GFP_KERNEL);
+	if (!bbt)
+		goto out;
 
-	err = scan_for_bad_eraseblocks();
+	err = mtdtest_scan_for_bad_eraseblocks(mtd, bbt, 0, ebcnt);
 	if (err)
 		goto out;
 
-	err = erase_whole_device();
+	err = mtdtest_erase_good_eraseblocks(mtd, bbt, 0, ebcnt);
 	if (err)
 		goto out;
 
@@ -439,7 +364,7 @@
 	}
 	pr_info("verified %u eraseblocks\n", i);
 
-	err = erase_whole_device();
+	err = mtdtest_erase_good_eraseblocks(mtd, bbt, 0, ebcnt);
 	if (err)
 		goto out;
 
@@ -477,7 +402,7 @@
 	}
 	pr_info("verified %u eraseblocks\n", i);
 
-	err = erase_whole_device();
+	err = mtdtest_erase_good_eraseblocks(mtd, bbt, 0, ebcnt);
 	if (err)
 		goto out;
 
diff --git a/drivers/mtd/tests/mtd_torturetest.c b/drivers/mtd/tests/torturetest.c
similarity index 90%
rename from drivers/mtd/tests/mtd_torturetest.c
rename to drivers/mtd/tests/torturetest.c
index 3a9f6a6..eeab969 100644
--- a/drivers/mtd/tests/mtd_torturetest.c
+++ b/drivers/mtd/tests/torturetest.c
@@ -32,6 +32,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include "mtd_test.h"
 
 #define RETRIES 3
 
@@ -93,35 +94,6 @@
 }
 
 /*
- * Erase eraseblock number @ebnum.
- */
-static inline int erase_eraseblock(int ebnum)
-{
-	int err;
-	struct erase_info ei;
-	loff_t addr = ebnum * mtd->erasesize;
-
-	memset(&ei, 0, sizeof(struct erase_info));
-	ei.mtd  = mtd;
-	ei.addr = addr;
-	ei.len  = mtd->erasesize;
-
-	err = mtd_erase(mtd, &ei);
-	if (err) {
-		pr_err("error %d while erasing EB %d\n", err, ebnum);
-		return err;
-	}
-
-	if (ei.state == MTD_ERASE_FAILED) {
-		pr_err("some erase error occurred at EB %d\n",
-		       ebnum);
-		return -EIO;
-	}
-
-	return 0;
-}
-
-/*
  * Check that the contents of eraseblock number @enbum is equivalent to the
  * @buf buffer.
  */
@@ -208,7 +180,7 @@
 static int __init tort_init(void)
 {
 	int err = 0, i, infinite = !cycles_count;
-	int *bad_ebs;
+	unsigned char *bad_ebs;
 
 	printk(KERN_INFO "\n");
 	printk(KERN_INFO "=================================================\n");
@@ -265,7 +237,7 @@
 	if (!check_buf)
 		goto out_patt_FF;
 
-	bad_ebs = kcalloc(ebcnt, sizeof(*bad_ebs), GFP_KERNEL);
+	bad_ebs = kzalloc(ebcnt, GFP_KERNEL);
 	if (!bad_ebs)
 		goto out_check_buf;
 
@@ -283,40 +255,16 @@
 		}
 	}
 
-	/*
-	 * Check if there is a bad eraseblock among those we are going to test.
-	 */
-	if (mtd_can_have_bb(mtd)) {
-		for (i = eb; i < eb + ebcnt; i++) {
-			err = mtd_block_isbad(mtd, (loff_t)i * mtd->erasesize);
-
-			if (err < 0) {
-				pr_info("block_isbad() returned %d "
-				       "for EB %d\n", err, i);
-				goto out;
-			}
-
-			if (err) {
-				pr_err("EB %d is bad. Skip it.\n", i);
-				bad_ebs[i - eb] = 1;
-			}
-		}
-	}
+	err = mtdtest_scan_for_bad_eraseblocks(mtd, bad_ebs, eb, ebcnt);
+	if (err)
+		goto out;
 
 	start_timing();
 	while (1) {
 		int i;
 		void *patt;
 
-		/* Erase all eraseblocks */
-		for (i = eb; i < eb + ebcnt; i++) {
-			if (bad_ebs[i - eb])
-				continue;
-			err = erase_eraseblock(i);
-			if (err)
-				goto out;
-			cond_resched();
-		}
+		mtdtest_erase_good_eraseblocks(mtd, bad_ebs, eb, ebcnt);
 
 		/* Check if the eraseblocks contain only 0xFF bytes */
 		if (check) {
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 78cc760..9d2009a 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -74,4 +74,10 @@
 	depends on MTD
 	def_bool y
 
+config OF_RESERVED_MEM
+	depends on OF_FLATTREE && (DMA_CMA || (HAVE_GENERIC_DMA_COHERENT && HAVE_MEMBLOCK))
+	def_bool y
+	help
+	  Initialization code for DMA reserved memory
+
 endmenu # OF
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index efd0510..ed9660a 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -9,3 +9,4 @@
 obj-$(CONFIG_OF_PCI)	+= of_pci.o
 obj-$(CONFIG_OF_PCI_IRQ)  += of_pci_irq.o
 obj-$(CONFIG_OF_MTD)	+= of_mtd.o
+obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o
diff --git a/drivers/of/base.c b/drivers/of/base.c
index e486e41..865d3f6 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1176,65 +1176,10 @@
 }
 EXPORT_SYMBOL_GPL(of_property_count_strings);
 
-/**
- * of_parse_phandle - Resolve a phandle property to a device_node pointer
- * @np: Pointer to device node holding phandle property
- * @phandle_name: Name of property holding a phandle value
- * @index: For properties holding a table of phandles, this is the index into
- *         the table
- *
- * Returns the device_node pointer with refcount incremented.  Use
- * of_node_put() on it when done.
- */
-struct device_node *of_parse_phandle(const struct device_node *np,
-				     const char *phandle_name, int index)
-{
-	const __be32 *phandle;
-	int size;
-
-	phandle = of_get_property(np, phandle_name, &size);
-	if ((!phandle) || (size < sizeof(*phandle) * (index + 1)))
-		return NULL;
-
-	return of_find_node_by_phandle(be32_to_cpup(phandle + index));
-}
-EXPORT_SYMBOL(of_parse_phandle);
-
-/**
- * of_parse_phandle_with_args() - Find a node pointed by phandle in a list
- * @np:		pointer to a device tree node containing a list
- * @list_name:	property name that contains a list
- * @cells_name:	property name that specifies phandles' arguments count
- * @index:	index of a phandle to parse out
- * @out_args:	optional pointer to output arguments structure (will be filled)
- *
- * This function is useful to parse lists of phandles and their arguments.
- * Returns 0 on success and fills out_args, on error returns appropriate
- * errno value.
- *
- * Caller is responsible to call of_node_put() on the returned out_args->node
- * pointer.
- *
- * Example:
- *
- * phandle1: node1 {
- * 	#list-cells = <2>;
- * }
- *
- * phandle2: node2 {
- * 	#list-cells = <1>;
- * }
- *
- * node3 {
- * 	list = <&phandle1 1 2 &phandle2 3>;
- * }
- *
- * To get a device_node of the `node2' node you may call this:
- * of_parse_phandle_with_args(node3, "list", "#list-cells", 1, &args);
- */
 static int __of_parse_phandle_with_args(const struct device_node *np,
 					const char *list_name,
-					const char *cells_name, int index,
+					const char *cells_name,
+					int cell_count, int index,
 					struct of_phandle_args *out_args)
 {
 	const __be32 *list, *list_end;
@@ -1262,19 +1207,32 @@
 		if (phandle) {
 			/*
 			 * Find the provider node and parse the #*-cells
-			 * property to determine the argument length
+			 * property to determine the argument length.
+			 *
+			 * This is not needed if the cell count is hard-coded
+			 * (i.e. cells_name not set, but cell_count is set),
+			 * except when we're going to return the found node
+			 * below.
 			 */
-			node = of_find_node_by_phandle(phandle);
-			if (!node) {
-				pr_err("%s: could not find phandle\n",
-					 np->full_name);
-				goto err;
+			if (cells_name || cur_index == index) {
+				node = of_find_node_by_phandle(phandle);
+				if (!node) {
+					pr_err("%s: could not find phandle\n",
+						np->full_name);
+					goto err;
+				}
 			}
-			if (of_property_read_u32(node, cells_name, &count)) {
-				pr_err("%s: could not get %s for %s\n",
-					 np->full_name, cells_name,
-					 node->full_name);
-				goto err;
+
+			if (cells_name) {
+				if (of_property_read_u32(node, cells_name,
+							 &count)) {
+					pr_err("%s: could not get %s for %s\n",
+						np->full_name, cells_name,
+						node->full_name);
+					goto err;
+				}
+			} else {
+				count = cell_count;
 			}
 
 			/*
@@ -1334,17 +1292,117 @@
 	return rc;
 }
 
+/**
+ * of_parse_phandle - Resolve a phandle property to a device_node pointer
+ * @np: Pointer to device node holding phandle property
+ * @phandle_name: Name of property holding a phandle value
+ * @index: For properties holding a table of phandles, this is the index into
+ *         the table
+ *
+ * Returns the device_node pointer with refcount incremented.  Use
+ * of_node_put() on it when done.
+ */
+struct device_node *of_parse_phandle(const struct device_node *np,
+				     const char *phandle_name, int index)
+{
+	struct of_phandle_args args;
+
+	if (index < 0)
+		return NULL;
+
+	if (__of_parse_phandle_with_args(np, phandle_name, NULL, 0,
+					 index, &args))
+		return NULL;
+
+	return args.np;
+}
+EXPORT_SYMBOL(of_parse_phandle);
+
+/**
+ * of_parse_phandle_with_args() - Find a node pointed by phandle in a list
+ * @np:		pointer to a device tree node containing a list
+ * @list_name:	property name that contains a list
+ * @cells_name:	property name that specifies phandles' arguments count
+ * @index:	index of a phandle to parse out
+ * @out_args:	optional pointer to output arguments structure (will be filled)
+ *
+ * This function is useful to parse lists of phandles and their arguments.
+ * Returns 0 on success and fills out_args, on error returns appropriate
+ * errno value.
+ *
+ * Caller is responsible to call of_node_put() on the returned out_args->node
+ * pointer.
+ *
+ * Example:
+ *
+ * phandle1: node1 {
+ * 	#list-cells = <2>;
+ * }
+ *
+ * phandle2: node2 {
+ * 	#list-cells = <1>;
+ * }
+ *
+ * node3 {
+ * 	list = <&phandle1 1 2 &phandle2 3>;
+ * }
+ *
+ * To get a device_node of the `node2' node you may call this:
+ * of_parse_phandle_with_args(node3, "list", "#list-cells", 1, &args);
+ */
 int of_parse_phandle_with_args(const struct device_node *np, const char *list_name,
 				const char *cells_name, int index,
 				struct of_phandle_args *out_args)
 {
 	if (index < 0)
 		return -EINVAL;
-	return __of_parse_phandle_with_args(np, list_name, cells_name, index, out_args);
+	return __of_parse_phandle_with_args(np, list_name, cells_name, 0,
+					    index, out_args);
 }
 EXPORT_SYMBOL(of_parse_phandle_with_args);
 
 /**
+ * of_parse_phandle_with_fixed_args() - Find a node pointed by phandle in a list
+ * @np:		pointer to a device tree node containing a list
+ * @list_name:	property name that contains a list
+ * @cell_count: number of argument cells following the phandle
+ * @index:	index of a phandle to parse out
+ * @out_args:	optional pointer to output arguments structure (will be filled)
+ *
+ * This function is useful to parse lists of phandles and their arguments.
+ * Returns 0 on success and fills out_args, on error returns appropriate
+ * errno value.
+ *
+ * Caller is responsible to call of_node_put() on the returned out_args->node
+ * pointer.
+ *
+ * Example:
+ *
+ * phandle1: node1 {
+ * }
+ *
+ * phandle2: node2 {
+ * }
+ *
+ * node3 {
+ * 	list = <&phandle1 0 2 &phandle2 2 3>;
+ * }
+ *
+ * To get a device_node of the `node2' node you may call this:
+ * of_parse_phandle_with_fixed_args(node3, "list", 2, 1, &args);
+ */
+int of_parse_phandle_with_fixed_args(const struct device_node *np,
+				const char *list_name, int cell_count,
+				int index, struct of_phandle_args *out_args)
+{
+	if (index < 0)
+		return -EINVAL;
+	return __of_parse_phandle_with_args(np, list_name, NULL, cell_count,
+					   index, out_args);
+}
+EXPORT_SYMBOL(of_parse_phandle_with_fixed_args);
+
+/**
  * of_count_phandle_with_args() - Find the number of phandles references in a property
  * @np:		pointer to a device tree node containing a list
  * @list_name:	property name that contains a list
@@ -1362,7 +1420,8 @@
 int of_count_phandle_with_args(const struct device_node *np, const char *list_name,
 				const char *cells_name)
 {
-	return __of_parse_phandle_with_args(np, list_name, cells_name, -1, NULL);
+	return __of_parse_phandle_with_args(np, list_name, cells_name, 0, -1,
+					    NULL);
 }
 EXPORT_SYMBOL(of_count_phandle_with_args);
 
@@ -1734,6 +1793,7 @@
 		ap = dt_alloc(sizeof(*ap) + len + 1, 4);
 		if (!ap)
 			continue;
+		memset(ap, 0, sizeof(*ap) + len + 1);
 		ap->alias = start;
 		of_alias_add(ap, np, id, start, len);
 	}
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index b10ba00..229dd9d 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -11,12 +11,14 @@
 
 #include <linux/kernel.h>
 #include <linux/initrd.h>
+#include <linux/memblock.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
+#include <linux/random.h>
 
 #include <asm/setup.h>  /* for COMMAND_LINE_SIZE */
 #ifdef CONFIG_PPC
@@ -125,13 +127,13 @@
 	return score;
 }
 
-static void *unflatten_dt_alloc(unsigned long *mem, unsigned long size,
+static void *unflatten_dt_alloc(void **mem, unsigned long size,
 				       unsigned long align)
 {
 	void *res;
 
-	*mem = ALIGN(*mem, align);
-	res = (void *)*mem;
+	*mem = PTR_ALIGN(*mem, align);
+	res = *mem;
 	*mem += size;
 
 	return res;
@@ -146,9 +148,9 @@
  * @allnextpp: pointer to ->allnext from last allocated device_node
  * @fpsize: Size of the node path up at the current depth.
  */
-static unsigned long unflatten_dt_node(struct boot_param_header *blob,
-				unsigned long mem,
-				unsigned long *p,
+static void * unflatten_dt_node(struct boot_param_header *blob,
+				void *mem,
+				void **p,
 				struct device_node *dad,
 				struct device_node ***allnextpp,
 				unsigned long fpsize)
@@ -161,15 +163,15 @@
 	int has_name = 0;
 	int new_format = 0;
 
-	tag = be32_to_cpup((__be32 *)(*p));
+	tag = be32_to_cpup(*p);
 	if (tag != OF_DT_BEGIN_NODE) {
 		pr_err("Weird tag at start of node: %x\n", tag);
 		return mem;
 	}
 	*p += 4;
-	pathp = (char *)*p;
+	pathp = *p;
 	l = allocl = strlen(pathp) + 1;
-	*p = ALIGN(*p + l, 4);
+	*p = PTR_ALIGN(*p + l, 4);
 
 	/* version 0x10 has a more compact unit name here instead of the full
 	 * path. we accumulate the full path size using "fpsize", we'll rebuild
@@ -201,7 +203,6 @@
 				__alignof__(struct device_node));
 	if (allnextpp) {
 		char *fn;
-		memset(np, 0, sizeof(*np));
 		np->full_name = fn = ((char *)np) + sizeof(*np);
 		if (new_format) {
 			/* rebuild full path for new format */
@@ -239,7 +240,7 @@
 		u32 sz, noff;
 		char *pname;
 
-		tag = be32_to_cpup((__be32 *)(*p));
+		tag = be32_to_cpup(*p);
 		if (tag == OF_DT_NOP) {
 			*p += 4;
 			continue;
@@ -247,11 +248,11 @@
 		if (tag != OF_DT_PROP)
 			break;
 		*p += 4;
-		sz = be32_to_cpup((__be32 *)(*p));
-		noff = be32_to_cpup((__be32 *)((*p) + 4));
+		sz = be32_to_cpup(*p);
+		noff = be32_to_cpup(*p + 4);
 		*p += 8;
 		if (be32_to_cpu(blob->version) < 0x10)
-			*p = ALIGN(*p, sz >= 8 ? 8 : 4);
+			*p = PTR_ALIGN(*p, sz >= 8 ? 8 : 4);
 
 		pname = of_fdt_get_string(blob, noff);
 		if (pname == NULL) {
@@ -281,11 +282,11 @@
 				np->phandle = be32_to_cpup((__be32 *)*p);
 			pp->name = pname;
 			pp->length = sz;
-			pp->value = (void *)*p;
+			pp->value = *p;
 			*prev_pp = pp;
 			prev_pp = &pp->next;
 		}
-		*p = ALIGN((*p) + sz, 4);
+		*p = PTR_ALIGN((*p) + sz, 4);
 	}
 	/* with version 0x10 we may not have the name property, recreate
 	 * it here from the unit name if absent
@@ -334,7 +335,7 @@
 		else
 			mem = unflatten_dt_node(blob, mem, p, np, allnextpp,
 						fpsize);
-		tag = be32_to_cpup((__be32 *)(*p));
+		tag = be32_to_cpup(*p);
 	}
 	if (tag != OF_DT_END_NODE) {
 		pr_err("Weird tag at end of node: %x\n", tag);
@@ -360,7 +361,8 @@
 			     struct device_node **mynodes,
 			     void * (*dt_alloc)(u64 size, u64 align))
 {
-	unsigned long start, mem, size;
+	unsigned long size;
+	void *start, *mem;
 	struct device_node **allnextp = mynodes;
 
 	pr_debug(" -> unflatten_device_tree()\n");
@@ -381,32 +383,28 @@
 	}
 
 	/* First pass, scan for size */
-	start = ((unsigned long)blob) +
-		be32_to_cpu(blob->off_dt_struct);
-	size = unflatten_dt_node(blob, 0, &start, NULL, NULL, 0);
-	size = (size | 3) + 1;
+	start = ((void *)blob) + be32_to_cpu(blob->off_dt_struct);
+	size = (unsigned long)unflatten_dt_node(blob, 0, &start, NULL, NULL, 0);
+	size = ALIGN(size, 4);
 
 	pr_debug("  size is %lx, allocating...\n", size);
 
 	/* Allocate memory for the expanded device tree */
-	mem = (unsigned long)
-		dt_alloc(size + 4, __alignof__(struct device_node));
+	mem = dt_alloc(size + 4, __alignof__(struct device_node));
+	memset(mem, 0, size);
 
-	memset((void *)mem, 0, size);
+	*(__be32 *)(mem + size) = cpu_to_be32(0xdeadbeef);
 
-	((__be32 *)mem)[size / 4] = cpu_to_be32(0xdeadbeef);
-
-	pr_debug("  unflattening %lx...\n", mem);
+	pr_debug("  unflattening %p...\n", mem);
 
 	/* Second pass, do actual unflattening */
-	start = ((unsigned long)blob) +
-		be32_to_cpu(blob->off_dt_struct);
+	start = ((void *)blob) + be32_to_cpu(blob->off_dt_struct);
 	unflatten_dt_node(blob, mem, &start, NULL, &allnextp, 0);
-	if (be32_to_cpup((__be32 *)start) != OF_DT_END)
-		pr_warning("Weird tag at end of tree: %08x\n", *((u32 *)start));
-	if (be32_to_cpu(((__be32 *)mem)[size / 4]) != 0xdeadbeef)
+	if (be32_to_cpup(start) != OF_DT_END)
+		pr_warning("Weird tag at end of tree: %08x\n", be32_to_cpup(start));
+	if (be32_to_cpup(mem + size) != 0xdeadbeef)
 		pr_warning("End of tree marker overwritten: %08x\n",
-			   be32_to_cpu(((__be32 *)mem)[size / 4]));
+			   be32_to_cpup(mem + size));
 	*allnextp = NULL;
 
 	pr_debug(" <- unflatten_device_tree()\n");
@@ -545,6 +543,82 @@
 	return of_fdt_match(initial_boot_params, node, compat);
 }
 
+struct fdt_scan_status {
+	const char *name;
+	int namelen;
+	int depth;
+	int found;
+	int (*iterator)(unsigned long node, const char *uname, int depth, void *data);
+	void *data;
+};
+
+/**
+ * fdt_scan_node_by_path - iterator for of_scan_flat_dt_by_path function
+ */
+static int __init fdt_scan_node_by_path(unsigned long node, const char *uname,
+					int depth, void *data)
+{
+	struct fdt_scan_status *st = data;
+
+	/*
+	 * if scan at the requested fdt node has been completed,
+	 * return -ENXIO to abort further scanning
+	 */
+	if (depth <= st->depth)
+		return -ENXIO;
+
+	/* requested fdt node has been found, so call iterator function */
+	if (st->found)
+		return st->iterator(node, uname, depth, st->data);
+
+	/* check if scanning automata is entering next level of fdt nodes */
+	if (depth == st->depth + 1 &&
+	    strncmp(st->name, uname, st->namelen) == 0 &&
+	    uname[st->namelen] == 0) {
+		st->depth += 1;
+		if (st->name[st->namelen] == 0) {
+			st->found = 1;
+		} else {
+			const char *next = st->name + st->namelen + 1;
+			st->name = next;
+			st->namelen = strcspn(next, "/");
+		}
+		return 0;
+	}
+
+	/* scan next fdt node */
+	return 0;
+}
+
+/**
+ * of_scan_flat_dt_by_path - scan flattened tree blob and call callback on each
+ *			     child of the given path.
+ * @path: path to start searching for children
+ * @it: callback function
+ * @data: context data pointer
+ *
+ * This function is used to scan the flattened device-tree starting from the
+ * node given by path. It is used to extract information (like reserved
+ * memory), which is required on ealy boot before we can unflatten the tree.
+ */
+int __init of_scan_flat_dt_by_path(const char *path,
+	int (*it)(unsigned long node, const char *name, int depth, void *data),
+	void *data)
+{
+	struct fdt_scan_status st = {path, 0, -1, 0, it, data};
+	int ret = 0;
+
+	if (initial_boot_params)
+                ret = of_scan_flat_dt(fdt_scan_node_by_path, &st);
+
+	if (!st.found)
+		return -ENOENT;
+	else if (ret == -ENXIO)	/* scan has been completed */
+		return 0;
+	else
+		return ret;
+}
+
 #ifdef CONFIG_BLK_DEV_INITRD
 /**
  * early_init_dt_check_for_initrd - Decode initrd location from flat tree
@@ -552,7 +626,8 @@
  */
 void __init early_init_dt_check_for_initrd(unsigned long node)
 {
-	unsigned long start, end, len;
+	u64 start, end;
+	unsigned long len;
 	__be32 *prop;
 
 	pr_debug("Looking for initrd properties... ");
@@ -560,15 +635,16 @@
 	prop = of_get_flat_dt_prop(node, "linux,initrd-start", &len);
 	if (!prop)
 		return;
-	start = of_read_ulong(prop, len/4);
+	start = of_read_number(prop, len/4);
 
 	prop = of_get_flat_dt_prop(node, "linux,initrd-end", &len);
 	if (!prop)
 		return;
-	end = of_read_ulong(prop, len/4);
+	end = of_read_number(prop, len/4);
 
 	early_init_dt_setup_initrd_arch(start, end);
-	pr_debug("initrd_start=0x%lx  initrd_end=0x%lx\n", start, end);
+	pr_debug("initrd_start=0x%llx  initrd_end=0x%llx\n",
+		 (unsigned long long)start, (unsigned long long)end);
 }
 #else
 inline void early_init_dt_check_for_initrd(unsigned long node)
@@ -698,6 +774,17 @@
 	return 1;
 }
 
+#ifdef CONFIG_HAVE_MEMBLOCK
+/*
+ * called from unflatten_device_tree() to bootstrap devicetree itself
+ * Architectures can override this definition if memblock isn't used
+ */
+void * __init __weak early_init_dt_alloc_memory_arch(u64 size, u64 align)
+{
+	return __va(memblock_alloc(size, align));
+}
+#endif
+
 /**
  * unflatten_device_tree - create tree of device_nodes from flat blob
  *
@@ -716,3 +803,14 @@
 }
 
 #endif /* CONFIG_OF_EARLY_FLATTREE */
+
+/* Feed entire flattened device tree into the random pool */
+static int __init add_fdt_randomness(void)
+{
+	if (initial_boot_params)
+		add_device_randomness(initial_boot_params,
+				be32_to_cpu(initial_boot_params->totalsize));
+
+	return 0;
+}
+core_initcall(add_fdt_randomness);
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 1264923..1752988 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -28,7 +28,7 @@
 
 /**
  * irq_of_parse_and_map - Parse and map an interrupt into linux virq space
- * @device: Device node of the device whose interrupt is to be mapped
+ * @dev: Device node of the device whose interrupt is to be mapped
  * @index: Index of the interrupt to map
  *
  * This function is a wrapper that chains of_irq_map_one() and
diff --git a/drivers/of/of_net.c b/drivers/of/of_net.c
index ea174c8..8f9be2e 100644
--- a/drivers/of/of_net.c
+++ b/drivers/of/of_net.c
@@ -39,7 +39,7 @@
  * The function gets phy interface string from property 'phy-mode',
  * and return its index in phy_modes table, or errno in error case.
  */
-const int of_get_phy_mode(struct device_node *np)
+int of_get_phy_mode(struct device_node *np)
 {
 	const char *pm;
 	int err, i;
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
new file mode 100644
index 0000000..a754b84
--- /dev/null
+++ b/drivers/of/of_reserved_mem.c
@@ -0,0 +1,175 @@
+/*
+ * Device tree based initialization code for reserved memory.
+ *
+ * Copyright (c) 2013 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com
+ * Author: Marek Szyprowski <m.szyprowski@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License or (at your optional) any later version of the license.
+ */
+
+#include <asm/dma-contiguous.h>
+
+#include <linux/memblock.h>
+#include <linux/err.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+#include <linux/mm.h>
+#include <linux/sizes.h>
+#include <linux/mm_types.h>
+#include <linux/dma-contiguous.h>
+#include <linux/dma-mapping.h>
+#include <linux/of_reserved_mem.h>
+
+#define MAX_RESERVED_REGIONS	16
+struct reserved_mem {
+	phys_addr_t		base;
+	unsigned long		size;
+	struct cma		*cma;
+	char			name[32];
+};
+static struct reserved_mem reserved_mem[MAX_RESERVED_REGIONS];
+static int reserved_mem_count;
+
+static int __init fdt_scan_reserved_mem(unsigned long node, const char *uname,
+					int depth, void *data)
+{
+	struct reserved_mem *rmem = &reserved_mem[reserved_mem_count];
+	phys_addr_t base, size;
+	int is_cma, is_reserved;
+	unsigned long len;
+	const char *status;
+	__be32 *prop;
+
+	is_cma = IS_ENABLED(CONFIG_DMA_CMA) &&
+	       of_flat_dt_is_compatible(node, "linux,contiguous-memory-region");
+	is_reserved = of_flat_dt_is_compatible(node, "reserved-memory-region");
+
+	if (!is_reserved && !is_cma) {
+		/* ignore node and scan next one */
+		return 0;
+	}
+
+	status = of_get_flat_dt_prop(node, "status", &len);
+	if (status && strcmp(status, "okay") != 0) {
+		/* ignore disabled node nad scan next one */
+		return 0;
+	}
+
+	prop = of_get_flat_dt_prop(node, "reg", &len);
+	if (!prop || (len < (dt_root_size_cells + dt_root_addr_cells) *
+			     sizeof(__be32))) {
+		pr_err("Reserved mem: node %s, incorrect \"reg\" property\n",
+		       uname);
+		/* ignore node and scan next one */
+		return 0;
+	}
+	base = dt_mem_next_cell(dt_root_addr_cells, &prop);
+	size = dt_mem_next_cell(dt_root_size_cells, &prop);
+
+	if (!size) {
+		/* ignore node and scan next one */
+		return 0;
+	}
+
+	pr_info("Reserved mem: found %s, memory base %lx, size %ld MiB\n",
+		uname, (unsigned long)base, (unsigned long)size / SZ_1M);
+
+	if (reserved_mem_count == ARRAY_SIZE(reserved_mem))
+		return -ENOSPC;
+
+	rmem->base = base;
+	rmem->size = size;
+	strlcpy(rmem->name, uname, sizeof(rmem->name));
+
+	if (is_cma) {
+		struct cma *cma;
+		if (dma_contiguous_reserve_area(size, base, 0, &cma) == 0) {
+			rmem->cma = cma;
+			reserved_mem_count++;
+			if (of_get_flat_dt_prop(node,
+						"linux,default-contiguous-region",
+						NULL))
+				dma_contiguous_set_default(cma);
+		}
+	} else if (is_reserved) {
+		if (memblock_remove(base, size) == 0)
+			reserved_mem_count++;
+		else
+			pr_err("Failed to reserve memory for %s\n", uname);
+	}
+
+	return 0;
+}
+
+static struct reserved_mem *get_dma_memory_region(struct device *dev)
+{
+	struct device_node *node;
+	const char *name;
+	int i;
+
+	node = of_parse_phandle(dev->of_node, "memory-region", 0);
+	if (!node)
+		return NULL;
+
+	name = kbasename(node->full_name);
+	for (i = 0; i < reserved_mem_count; i++)
+		if (strcmp(name, reserved_mem[i].name) == 0)
+			return &reserved_mem[i];
+	return NULL;
+}
+
+/**
+ * of_reserved_mem_device_init() - assign reserved memory region to given device
+ *
+ * This function assign memory region pointed by "memory-region" device tree
+ * property to the given device.
+ */
+void of_reserved_mem_device_init(struct device *dev)
+{
+	struct reserved_mem *region = get_dma_memory_region(dev);
+	if (!region)
+		return;
+
+	if (region->cma) {
+		dev_set_cma_area(dev, region->cma);
+		pr_info("Assigned CMA %s to %s device\n", region->name,
+			dev_name(dev));
+	} else {
+		if (dma_declare_coherent_memory(dev, region->base, region->base,
+		    region->size, DMA_MEMORY_MAP | DMA_MEMORY_EXCLUSIVE) != 0)
+			pr_info("Declared reserved memory %s to %s device\n",
+				region->name, dev_name(dev));
+	}
+}
+
+/**
+ * of_reserved_mem_device_release() - release reserved memory device structures
+ *
+ * This function releases structures allocated for memory region handling for
+ * the given device.
+ */
+void of_reserved_mem_device_release(struct device *dev)
+{
+	struct reserved_mem *region = get_dma_memory_region(dev);
+	if (!region && !region->cma)
+		dma_release_declared_memory(dev);
+}
+
+/**
+ * early_init_dt_scan_reserved_mem() - create reserved memory regions
+ *
+ * This function grabs memory from early allocator for device exclusive use
+ * defined in device tree structures. It should be called by arch specific code
+ * once the early allocator (memblock) has been activated and all other
+ * subsystems have already allocated/reserved memory.
+ */
+void __init early_init_dt_scan_reserved_mem(void)
+{
+	of_scan_flat_dt_by_path("/memory/reserved-memory",
+				fdt_scan_reserved_mem, NULL);
+}
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index e0a6514..9b439ac 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -21,6 +21,7 @@
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
+#include <linux/of_reserved_mem.h>
 #include <linux/platform_device.h>
 
 const struct of_device_id of_default_bus_match_table[] = {
@@ -196,7 +197,7 @@
  * Returns pointer to created platform device, or NULL if a device was not
  * registered.  Unavailable devices will not get registered.
  */
-struct platform_device *of_platform_device_create_pdata(
+static struct platform_device *of_platform_device_create_pdata(
 					struct device_node *np,
 					const char *bus_id,
 					void *platform_data,
@@ -218,6 +219,8 @@
 	dev->dev.bus = &platform_bus_type;
 	dev->dev.platform_data = platform_data;
 
+	of_reserved_mem_device_init(&dev->dev);
+
 	/* We do not fill the DMA ops for platform devices by default.
 	 * This is currently the responsibility of the platform code
 	 * to do such, possibly using a device notifier
@@ -225,6 +228,7 @@
 
 	if (of_device_add(dev) != 0) {
 		platform_device_put(dev);
+		of_reserved_mem_device_release(&dev->dev);
 		return NULL;
 	}
 
@@ -264,8 +268,11 @@
 		return NULL;
 
 	dev = amba_device_alloc(NULL, 0, 0);
-	if (!dev)
+	if (!dev) {
+		pr_err("%s(): amba_device_alloc() failed for %s\n",
+		       __func__, node->full_name);
 		return NULL;
+	}
 
 	/* setup generic device info */
 	dev->dev.coherent_dma_mask = ~0;
@@ -290,12 +297,18 @@
 		dev->irq[i] = irq_of_parse_and_map(node, i);
 
 	ret = of_address_to_resource(node, 0, &dev->res);
-	if (ret)
+	if (ret) {
+		pr_err("%s(): of_address_to_resource() failed (%d) for %s\n",
+		       __func__, ret, node->full_name);
 		goto err_free;
+	}
 
 	ret = amba_device_add(dev, &iomem_resource);
-	if (ret)
+	if (ret) {
+		pr_err("%s(): amba_device_add() failed (%d) for %s\n",
+		       __func__, ret, node->full_name);
 		goto err_free;
+	}
 
 	return dev;
 
@@ -374,6 +387,10 @@
 	}
 
 	if (of_device_is_compatible(bus, "arm,primecell")) {
+		/*
+		 * Don't return an error here to keep compatibility with older
+		 * device tree files.
+		 */
 		of_amba_device_create(bus, bus_id, platform_data, parent);
 		return 0;
 	}
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index cef6002..6ab71b9 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -13,6 +13,7 @@
 
 #include <linux/device.h>
 #include <linux/eventfd.h>
+#include <linux/file.h>
 #include <linux/interrupt.h>
 #include <linux/iommu.h>
 #include <linux/module.h>
@@ -227,6 +228,110 @@
 	return 0;
 }
 
+static int vfio_pci_count_devs(struct pci_dev *pdev, void *data)
+{
+	(*(int *)data)++;
+	return 0;
+}
+
+struct vfio_pci_fill_info {
+	int max;
+	int cur;
+	struct vfio_pci_dependent_device *devices;
+};
+
+static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data)
+{
+	struct vfio_pci_fill_info *fill = data;
+	struct iommu_group *iommu_group;
+
+	if (fill->cur == fill->max)
+		return -EAGAIN; /* Something changed, try again */
+
+	iommu_group = iommu_group_get(&pdev->dev);
+	if (!iommu_group)
+		return -EPERM; /* Cannot reset non-isolated devices */
+
+	fill->devices[fill->cur].group_id = iommu_group_id(iommu_group);
+	fill->devices[fill->cur].segment = pci_domain_nr(pdev->bus);
+	fill->devices[fill->cur].bus = pdev->bus->number;
+	fill->devices[fill->cur].devfn = pdev->devfn;
+	fill->cur++;
+	iommu_group_put(iommu_group);
+	return 0;
+}
+
+struct vfio_pci_group_entry {
+	struct vfio_group *group;
+	int id;
+};
+
+struct vfio_pci_group_info {
+	int count;
+	struct vfio_pci_group_entry *groups;
+};
+
+static int vfio_pci_validate_devs(struct pci_dev *pdev, void *data)
+{
+	struct vfio_pci_group_info *info = data;
+	struct iommu_group *group;
+	int id, i;
+
+	group = iommu_group_get(&pdev->dev);
+	if (!group)
+		return -EPERM;
+
+	id = iommu_group_id(group);
+
+	for (i = 0; i < info->count; i++)
+		if (info->groups[i].id == id)
+			break;
+
+	iommu_group_put(group);
+
+	return (i == info->count) ? -EINVAL : 0;
+}
+
+static bool vfio_pci_dev_below_slot(struct pci_dev *pdev, struct pci_slot *slot)
+{
+	for (; pdev; pdev = pdev->bus->self)
+		if (pdev->bus == slot->bus)
+			return (pdev->slot == slot);
+	return false;
+}
+
+struct vfio_pci_walk_info {
+	int (*fn)(struct pci_dev *, void *data);
+	void *data;
+	struct pci_dev *pdev;
+	bool slot;
+	int ret;
+};
+
+static int vfio_pci_walk_wrapper(struct pci_dev *pdev, void *data)
+{
+	struct vfio_pci_walk_info *walk = data;
+
+	if (!walk->slot || vfio_pci_dev_below_slot(pdev, walk->pdev->slot))
+		walk->ret = walk->fn(pdev, walk->data);
+
+	return walk->ret;
+}
+
+static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev,
+					 int (*fn)(struct pci_dev *,
+						   void *data), void *data,
+					 bool slot)
+{
+	struct vfio_pci_walk_info walk = {
+		.fn = fn, .data = data, .pdev = pdev, .slot = slot, .ret = 0,
+	};
+
+	pci_walk_bus(pdev->bus, vfio_pci_walk_wrapper, &walk);
+
+	return walk.ret;
+}
+
 static long vfio_pci_ioctl(void *device_data,
 			   unsigned int cmd, unsigned long arg)
 {
@@ -407,10 +512,189 @@
 
 		return ret;
 
-	} else if (cmd == VFIO_DEVICE_RESET)
+	} else if (cmd == VFIO_DEVICE_RESET) {
 		return vdev->reset_works ?
 			pci_reset_function(vdev->pdev) : -EINVAL;
 
+	} else if (cmd == VFIO_DEVICE_GET_PCI_HOT_RESET_INFO) {
+		struct vfio_pci_hot_reset_info hdr;
+		struct vfio_pci_fill_info fill = { 0 };
+		struct vfio_pci_dependent_device *devices = NULL;
+		bool slot = false;
+		int ret = 0;
+
+		minsz = offsetofend(struct vfio_pci_hot_reset_info, count);
+
+		if (copy_from_user(&hdr, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (hdr.argsz < minsz)
+			return -EINVAL;
+
+		hdr.flags = 0;
+
+		/* Can we do a slot or bus reset or neither? */
+		if (!pci_probe_reset_slot(vdev->pdev->slot))
+			slot = true;
+		else if (pci_probe_reset_bus(vdev->pdev->bus))
+			return -ENODEV;
+
+		/* How many devices are affected? */
+		ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
+						    vfio_pci_count_devs,
+						    &fill.max, slot);
+		if (ret)
+			return ret;
+
+		WARN_ON(!fill.max); /* Should always be at least one */
+
+		/*
+		 * If there's enough space, fill it now, otherwise return
+		 * -ENOSPC and the number of devices affected.
+		 */
+		if (hdr.argsz < sizeof(hdr) + (fill.max * sizeof(*devices))) {
+			ret = -ENOSPC;
+			hdr.count = fill.max;
+			goto reset_info_exit;
+		}
+
+		devices = kcalloc(fill.max, sizeof(*devices), GFP_KERNEL);
+		if (!devices)
+			return -ENOMEM;
+
+		fill.devices = devices;
+
+		ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
+						    vfio_pci_fill_devs,
+						    &fill, slot);
+
+		/*
+		 * If a device was removed between counting and filling,
+		 * we may come up short of fill.max.  If a device was
+		 * added, we'll have a return of -EAGAIN above.
+		 */
+		if (!ret)
+			hdr.count = fill.cur;
+
+reset_info_exit:
+		if (copy_to_user((void __user *)arg, &hdr, minsz))
+			ret = -EFAULT;
+
+		if (!ret) {
+			if (copy_to_user((void __user *)(arg + minsz), devices,
+					 hdr.count * sizeof(*devices)))
+				ret = -EFAULT;
+		}
+
+		kfree(devices);
+		return ret;
+
+	} else if (cmd == VFIO_DEVICE_PCI_HOT_RESET) {
+		struct vfio_pci_hot_reset hdr;
+		int32_t *group_fds;
+		struct vfio_pci_group_entry *groups;
+		struct vfio_pci_group_info info;
+		bool slot = false;
+		int i, count = 0, ret = 0;
+
+		minsz = offsetofend(struct vfio_pci_hot_reset, count);
+
+		if (copy_from_user(&hdr, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (hdr.argsz < minsz || hdr.flags)
+			return -EINVAL;
+
+		/* Can we do a slot or bus reset or neither? */
+		if (!pci_probe_reset_slot(vdev->pdev->slot))
+			slot = true;
+		else if (pci_probe_reset_bus(vdev->pdev->bus))
+			return -ENODEV;
+
+		/*
+		 * We can't let userspace give us an arbitrarily large
+		 * buffer to copy, so verify how many we think there
+		 * could be.  Note groups can have multiple devices so
+		 * one group per device is the max.
+		 */
+		ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
+						    vfio_pci_count_devs,
+						    &count, slot);
+		if (ret)
+			return ret;
+
+		/* Somewhere between 1 and count is OK */
+		if (!hdr.count || hdr.count > count)
+			return -EINVAL;
+
+		group_fds = kcalloc(hdr.count, sizeof(*group_fds), GFP_KERNEL);
+		groups = kcalloc(hdr.count, sizeof(*groups), GFP_KERNEL);
+		if (!group_fds || !groups) {
+			kfree(group_fds);
+			kfree(groups);
+			return -ENOMEM;
+		}
+
+		if (copy_from_user(group_fds, (void __user *)(arg + minsz),
+				   hdr.count * sizeof(*group_fds))) {
+			kfree(group_fds);
+			kfree(groups);
+			return -EFAULT;
+		}
+
+		/*
+		 * For each group_fd, get the group through the vfio external
+		 * user interface and store the group and iommu ID.  This
+		 * ensures the group is held across the reset.
+		 */
+		for (i = 0; i < hdr.count; i++) {
+			struct vfio_group *group;
+			struct fd f = fdget(group_fds[i]);
+			if (!f.file) {
+				ret = -EBADF;
+				break;
+			}
+
+			group = vfio_group_get_external_user(f.file);
+			fdput(f);
+			if (IS_ERR(group)) {
+				ret = PTR_ERR(group);
+				break;
+			}
+
+			groups[i].group = group;
+			groups[i].id = vfio_external_user_iommu_id(group);
+		}
+
+		kfree(group_fds);
+
+		/* release reference to groups on error */
+		if (ret)
+			goto hot_reset_release;
+
+		info.count = hdr.count;
+		info.groups = groups;
+
+		/*
+		 * Test whether all the affected devices are contained
+		 * by the set of groups provided by the user.
+		 */
+		ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
+						    vfio_pci_validate_devs,
+						    &info, slot);
+		if (!ret)
+			/* User has access, do the reset */
+			ret = slot ? pci_reset_slot(vdev->pdev->slot) :
+				     pci_reset_bus(vdev->pdev->bus);
+
+hot_reset_release:
+		for (i--; i >= 0; i--)
+			vfio_group_put_external_user(groups[i].group);
+
+		kfree(groups);
+		return ret;
+	}
+
 	return -ENOTTY;
 }
 
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index affa347..ffd0632 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -1012,6 +1012,7 @@
 static int vfio_cap_len(struct vfio_pci_device *vdev, u8 cap, u8 pos)
 {
 	struct pci_dev *pdev = vdev->pdev;
+	u32 dword;
 	u16 word;
 	u8 byte;
 	int ret;
@@ -1025,7 +1026,9 @@
 			return pcibios_err_to_errno(ret);
 
 		if (PCI_X_CMD_VERSION(word)) {
-			vdev->extended_caps = true;
+			/* Test for extended capabilities */
+			pci_read_config_dword(pdev, PCI_CFG_SPACE_SIZE, &dword);
+			vdev->extended_caps = (dword != 0);
 			return PCI_CAP_PCIX_SIZEOF_V2;
 		} else
 			return PCI_CAP_PCIX_SIZEOF_V0;
@@ -1037,9 +1040,11 @@
 
 		return byte;
 	case PCI_CAP_ID_EXP:
-		/* length based on version */
-		vdev->extended_caps = true;
+		/* Test for extended capabilities */
+		pci_read_config_dword(pdev, PCI_CFG_SPACE_SIZE, &dword);
+		vdev->extended_caps = (dword != 0);
 
+		/* length based on version */
 		if ((pcie_caps_reg(pdev) & PCI_EXP_FLAGS_VERS) == 1)
 			return PCI_CAP_EXP_ENDPOINT_SIZEOF_V1;
 		else
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 4bc704e..641bc87 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -130,8 +130,8 @@
 			 void (*thread)(struct vfio_pci_device *, void *),
 			 void *data, struct virqfd **pvirqfd, int fd)
 {
-	struct file *file = NULL;
-	struct eventfd_ctx *ctx = NULL;
+	struct fd irqfd;
+	struct eventfd_ctx *ctx;
 	struct virqfd *virqfd;
 	int ret = 0;
 	unsigned int events;
@@ -149,16 +149,16 @@
 	INIT_WORK(&virqfd->shutdown, virqfd_shutdown);
 	INIT_WORK(&virqfd->inject, virqfd_inject);
 
-	file = eventfd_fget(fd);
-	if (IS_ERR(file)) {
-		ret = PTR_ERR(file);
-		goto fail;
+	irqfd = fdget(fd);
+	if (!irqfd.file) {
+		ret = -EBADF;
+		goto err_fd;
 	}
 
-	ctx = eventfd_ctx_fileget(file);
+	ctx = eventfd_ctx_fileget(irqfd.file);
 	if (IS_ERR(ctx)) {
 		ret = PTR_ERR(ctx);
-		goto fail;
+		goto err_ctx;
 	}
 
 	virqfd->eventfd = ctx;
@@ -174,7 +174,7 @@
 	if (*pvirqfd) {
 		spin_unlock_irq(&vdev->irqlock);
 		ret = -EBUSY;
-		goto fail;
+		goto err_busy;
 	}
 	*pvirqfd = virqfd;
 
@@ -187,7 +187,7 @@
 	init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup);
 	init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc);
 
-	events = file->f_op->poll(file, &virqfd->pt);
+	events = irqfd.file->f_op->poll(irqfd.file, &virqfd->pt);
 
 	/*
 	 * Check if there was an event already pending on the eventfd
@@ -202,17 +202,14 @@
 	 * Do not drop the file until the irqfd is fully initialized,
 	 * otherwise we might race against the POLLHUP.
 	 */
-	fput(file);
+	fdput(irqfd);
 
 	return 0;
-
-fail:
-	if (ctx && !IS_ERR(ctx))
-		eventfd_ctx_put(ctx);
-
-	if (file && !IS_ERR(file))
-		fput(file);
-
+err_busy:
+	eventfd_ctx_put(ctx);
+err_ctx:
+	fdput(irqfd);
+err_fd:
 	kfree(virqfd);
 
 	return ret;
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 842f450..1eab4ac 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1109,7 +1109,7 @@
 		 * We can't use anon_inode_getfd() because we need to modify
 		 * the f_mode flags directly to allow more than just ioctls
 		 */
-		ret = get_unused_fd();
+		ret = get_unused_fd_flags(O_CLOEXEC);
 		if (ret < 0) {
 			device->ops->release(device->device_data);
 			break;
@@ -1353,6 +1353,68 @@
 };
 
 /**
+ * External user API, exported by symbols to be linked dynamically.
+ *
+ * The protocol includes:
+ *  1. do normal VFIO init operation:
+ *	- opening a new container;
+ *	- attaching group(s) to it;
+ *	- setting an IOMMU driver for a container.
+ * When IOMMU is set for a container, all groups in it are
+ * considered ready to use by an external user.
+ *
+ * 2. User space passes a group fd to an external user.
+ * The external user calls vfio_group_get_external_user()
+ * to verify that:
+ *	- the group is initialized;
+ *	- IOMMU is set for it.
+ * If both checks passed, vfio_group_get_external_user()
+ * increments the container user counter to prevent
+ * the VFIO group from disposal before KVM exits.
+ *
+ * 3. The external user calls vfio_external_user_iommu_id()
+ * to know an IOMMU ID.
+ *
+ * 4. When the external KVM finishes, it calls
+ * vfio_group_put_external_user() to release the VFIO group.
+ * This call decrements the container user counter.
+ */
+struct vfio_group *vfio_group_get_external_user(struct file *filep)
+{
+	struct vfio_group *group = filep->private_data;
+
+	if (filep->f_op != &vfio_group_fops)
+		return ERR_PTR(-EINVAL);
+
+	if (!atomic_inc_not_zero(&group->container_users))
+		return ERR_PTR(-EINVAL);
+
+	if (!group->container->iommu_driver ||
+			!vfio_group_viable(group)) {
+		atomic_dec(&group->container_users);
+		return ERR_PTR(-EINVAL);
+	}
+
+	vfio_group_get(group);
+
+	return group;
+}
+EXPORT_SYMBOL_GPL(vfio_group_get_external_user);
+
+void vfio_group_put_external_user(struct vfio_group *group)
+{
+	vfio_group_put(group);
+	vfio_group_try_dissolve_container(group);
+}
+EXPORT_SYMBOL_GPL(vfio_group_put_external_user);
+
+int vfio_external_user_iommu_id(struct vfio_group *group)
+{
+	return iommu_group_id(group->iommu_group);
+}
+EXPORT_SYMBOL_GPL(vfio_external_user_iommu_id);
+
+/**
  * Module/class support
  */
 static char *vfio_devnode(struct device *dev, umode_t *mode)
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 1aba255..98917fc 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -766,7 +766,7 @@
 	kfree(vp_dev);
 }
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 static int virtio_pci_freeze(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
@@ -824,7 +824,7 @@
 	.id_table	= virtio_pci_id_table,
 	.probe		= virtio_pci_probe,
 	.remove		= virtio_pci_remove,
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
 	.driver.pm	= &virtio_pci_pm_ops,
 #endif
 };
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 0b74d31..646337d 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -751,10 +751,6 @@
 	_enter("{%x:%u},{%s},%ho",
 	       dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
 
-	ret = -ENAMETOOLONG;
-	if (dentry->d_name.len >= AFSNAMEMAX)
-		goto error;
-
 	key = afs_request_key(dvnode->volume->cell);
 	if (IS_ERR(key)) {
 		ret = PTR_ERR(key);
@@ -816,10 +812,6 @@
 	_enter("{%x:%u},{%s}",
 	       dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name);
 
-	ret = -ENAMETOOLONG;
-	if (dentry->d_name.len >= AFSNAMEMAX)
-		goto error;
-
 	key = afs_request_key(dvnode->volume->cell);
 	if (IS_ERR(key)) {
 		ret = PTR_ERR(key);
@@ -936,10 +928,6 @@
 	_enter("{%x:%u},{%s},%ho,",
 	       dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
 
-	ret = -ENAMETOOLONG;
-	if (dentry->d_name.len >= AFSNAMEMAX)
-		goto error;
-
 	key = afs_request_key(dvnode->volume->cell);
 	if (IS_ERR(key)) {
 		ret = PTR_ERR(key);
@@ -1005,10 +993,6 @@
 	       dvnode->fid.vid, dvnode->fid.vnode,
 	       dentry->d_name.name);
 
-	ret = -ENAMETOOLONG;
-	if (dentry->d_name.len >= AFSNAMEMAX)
-		goto error;
-
 	key = afs_request_key(dvnode->volume->cell);
 	if (IS_ERR(key)) {
 		ret = PTR_ERR(key);
@@ -1053,10 +1037,6 @@
 	       dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name,
 	       content);
 
-	ret = -ENAMETOOLONG;
-	if (dentry->d_name.len >= AFSNAMEMAX)
-		goto error;
-
 	ret = -EINVAL;
 	if (strlen(content) >= AFSPATHMAX)
 		goto error;
@@ -1127,10 +1107,6 @@
 	       new_dvnode->fid.vid, new_dvnode->fid.vnode,
 	       new_dentry->d_name.name);
 
-	ret = -ENAMETOOLONG;
-	if (new_dentry->d_name.len >= AFSNAMEMAX)
-		goto error;
-
 	key = afs_request_key(orig_dvnode->volume->cell);
 	if (IS_ERR(key)) {
 		ret = PTR_ERR(key);
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 743c7c2..0f00da3 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -183,13 +183,14 @@
 	return 0;
 }
 
+/* Find the topmost mount satisfying test() */
 static int find_autofs_mount(const char *pathname,
 			     struct path *res,
 			     int test(struct path *path, void *data),
 			     void *data)
 {
 	struct path path;
-	int err = kern_path(pathname, 0, &path);
+	int err = kern_path_mountpoint(AT_FDCWD, pathname, &path, 0);
 	if (err)
 		return err;
 	err = -ENOENT;
@@ -197,10 +198,9 @@
 		if (path.dentry->d_sb->s_magic == AUTOFS_SUPER_MAGIC) {
 			if (test(&path, data)) {
 				path_get(&path);
-				if (!err) /* already found some */
-					path_put(res);
 				*res = path;
 				err = 0;
+				break;
 			}
 		}
 		if (!follow_up(&path))
@@ -486,12 +486,11 @@
  * mount if there is one or 0 if it isn't a mountpoint.
  *
  * If we aren't supplied with a file descriptor then we
- * lookup the nameidata of the path and check if it is the
- * root of a mount. If a type is given we are looking for
- * a particular autofs mount and if we don't find a match
- * we return fail. If the located nameidata path is the
- * root of a mount we return 1 along with the super magic
- * of the mount or 0 otherwise.
+ * lookup the path and check if it is the root of a mount.
+ * If a type is given we are looking for a particular autofs
+ * mount and if we don't find a match we return fail. If the
+ * located path is the root of a mount we return 1 along with
+ * the super magic of the mount or 0 otherwise.
  *
  * In both cases the the device number (as returned by
  * new_encode_dev()) is also returned.
@@ -519,9 +518,11 @@
 
 	if (!fp || param->ioctlfd == -1) {
 		if (autofs_type_any(type))
-			err = kern_path(name, LOOKUP_FOLLOW, &path);
+			err = kern_path_mountpoint(AT_FDCWD,
+						   name, &path, LOOKUP_FOLLOW);
 		else
-			err = find_autofs_mount(name, &path, test_by_type, &type);
+			err = find_autofs_mount(name, &path,
+						test_by_type, &type);
 		if (err)
 			goto out;
 		devid = new_encode_dev(path.dentry->d_sb->s_dev);
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index d4c1206..43eb559 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -378,6 +378,31 @@
 }
 
 /*
+ * check if the backing cache is updated to FS-Cache
+ * - called by FS-Cache when evaluates if need to invalidate the cache
+ */
+static bool cachefiles_check_consistency(struct fscache_operation *op)
+{
+	struct cachefiles_object *object;
+	struct cachefiles_cache *cache;
+	const struct cred *saved_cred;
+	int ret;
+
+	_enter("{OBJ%x}", op->object->debug_id);
+
+	object = container_of(op->object, struct cachefiles_object, fscache);
+	cache = container_of(object->fscache.cache,
+			     struct cachefiles_cache, cache);
+
+	cachefiles_begin_secure(cache, &saved_cred);
+	ret = cachefiles_check_auxdata(object);
+	cachefiles_end_secure(cache, saved_cred);
+
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
  * notification the attributes on an object have changed
  * - called with reads/writes excluded by FS-Cache
  */
@@ -522,4 +547,5 @@
 	.write_page		= cachefiles_write_page,
 	.uncache_page		= cachefiles_uncache_page,
 	.dissociate_pages	= cachefiles_dissociate_pages,
+	.check_consistency	= cachefiles_check_consistency,
 };
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 4938251..5349473 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -235,6 +235,7 @@
 				       struct cachefiles_xattr *auxdata);
 extern int cachefiles_update_object_xattr(struct cachefiles_object *object,
 					  struct cachefiles_xattr *auxdata);
+extern int cachefiles_check_auxdata(struct cachefiles_object *object);
 extern int cachefiles_check_object_xattr(struct cachefiles_object *object,
 					 struct cachefiles_xattr *auxdata);
 extern int cachefiles_remove_object_xattr(struct cachefiles_cache *cache,
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index 2476e51..34c88b8 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -157,6 +157,42 @@
 }
 
 /*
+ * check the consistency between the backing cache and the FS-Cache cookie
+ */
+int cachefiles_check_auxdata(struct cachefiles_object *object)
+{
+	struct cachefiles_xattr *auxbuf;
+	struct dentry *dentry = object->dentry;
+	unsigned int dlen;
+	int ret;
+
+	ASSERT(dentry);
+	ASSERT(dentry->d_inode);
+	ASSERT(object->fscache.cookie->def->check_aux);
+
+	auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, GFP_KERNEL);
+	if (!auxbuf)
+		return -ENOMEM;
+
+	auxbuf->len = vfs_getxattr(dentry, cachefiles_xattr_cache,
+				   &auxbuf->type, 512 + 1);
+	if (auxbuf->len < 1)
+		return -ESTALE;
+
+	if (auxbuf->type != object->fscache.cookie->def->type)
+		return -ESTALE;
+
+	dlen = auxbuf->len - 1;
+	ret = fscache_check_aux(&object->fscache, &auxbuf->data, dlen);
+
+	kfree(auxbuf);
+	if (ret != FSCACHE_CHECKAUX_OKAY)
+		return -ESTALE;
+
+	return 0;
+}
+
+/*
  * check the state xattr on a cache file
  * - return -ESTALE if the object should be deleted
  */
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index 49bc782..ac9a2ef 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -16,3 +16,12 @@
 
 	  If unsure, say N.
 
+if CEPH_FS
+config CEPH_FSCACHE
+	bool "Enable Ceph client caching support"
+	depends on CEPH_FS=m && FSCACHE || CEPH_FS=y && FSCACHE=y
+	help
+	  Choose Y here to enable persistent, read-only local
+	  caching support for Ceph clients using FS-Cache
+
+endif
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile
index bd35212..32e3010 100644
--- a/fs/ceph/Makefile
+++ b/fs/ceph/Makefile
@@ -9,3 +9,4 @@
 	mds_client.o mdsmap.o strings.o ceph_frag.o \
 	debugfs.o
 
+ceph-$(CONFIG_CEPH_FSCACHE) += cache.o
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 5318a3b..6df8bd4 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -11,6 +11,7 @@
 
 #include "super.h"
 #include "mds_client.h"
+#include "cache.h"
 #include <linux/ceph/osd_client.h>
 
 /*
@@ -70,15 +71,16 @@
 	struct address_space *mapping = page->mapping;
 	struct inode *inode;
 	struct ceph_inode_info *ci;
-	int undo = 0;
 	struct ceph_snap_context *snapc;
+	int ret;
 
 	if (unlikely(!mapping))
 		return !TestSetPageDirty(page);
 
-	if (TestSetPageDirty(page)) {
+	if (PageDirty(page)) {
 		dout("%p set_page_dirty %p idx %lu -- already dirty\n",
 		     mapping->host, page, page->index);
+		BUG_ON(!PagePrivate(page));
 		return 0;
 	}
 
@@ -107,35 +109,19 @@
 	     snapc, snapc->seq, snapc->num_snaps);
 	spin_unlock(&ci->i_ceph_lock);
 
-	/* now adjust page */
-	spin_lock_irq(&mapping->tree_lock);
-	if (page->mapping) {	/* Race with truncate? */
-		WARN_ON_ONCE(!PageUptodate(page));
-		account_page_dirtied(page, page->mapping);
-		radix_tree_tag_set(&mapping->page_tree,
-				page_index(page), PAGECACHE_TAG_DIRTY);
+	/*
+	 * Reference snap context in page->private.  Also set
+	 * PagePrivate so that we get invalidatepage callback.
+	 */
+	BUG_ON(PagePrivate(page));
+	page->private = (unsigned long)snapc;
+	SetPagePrivate(page);
 
-		/*
-		 * Reference snap context in page->private.  Also set
-		 * PagePrivate so that we get invalidatepage callback.
-		 */
-		page->private = (unsigned long)snapc;
-		SetPagePrivate(page);
-	} else {
-		dout("ANON set_page_dirty %p (raced truncate?)\n", page);
-		undo = 1;
-	}
+	ret = __set_page_dirty_nobuffers(page);
+	WARN_ON(!PageLocked(page));
+	WARN_ON(!page->mapping);
 
-	spin_unlock_irq(&mapping->tree_lock);
-
-	if (undo)
-		/* whoops, we failed to dirty the page */
-		ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
-
-	__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-
-	BUG_ON(!PageDirty(page));
-	return 1;
+	return ret;
 }
 
 /*
@@ -150,11 +136,19 @@
 	struct ceph_inode_info *ci;
 	struct ceph_snap_context *snapc = page_snap_context(page);
 
-	BUG_ON(!PageLocked(page));
-	BUG_ON(!PagePrivate(page));
-	BUG_ON(!page->mapping);
-
 	inode = page->mapping->host;
+	ci = ceph_inode(inode);
+
+	if (offset != 0 || length != PAGE_CACHE_SIZE) {
+		dout("%p invalidatepage %p idx %lu partial dirty page %u~%u\n",
+		     inode, page, page->index, offset, length);
+		return;
+	}
+
+	ceph_invalidate_fscache_page(inode, page);
+
+	if (!PagePrivate(page))
+		return;
 
 	/*
 	 * We can get non-dirty pages here due to races between
@@ -164,31 +158,28 @@
 	if (!PageDirty(page))
 		pr_err("%p invalidatepage %p page not dirty\n", inode, page);
 
-	if (offset == 0 && length == PAGE_CACHE_SIZE)
-		ClearPageChecked(page);
+	ClearPageChecked(page);
 
-	ci = ceph_inode(inode);
-	if (offset == 0 && length == PAGE_CACHE_SIZE) {
-		dout("%p invalidatepage %p idx %lu full dirty page\n",
-		     inode, page, page->index);
-		ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
-		ceph_put_snap_context(snapc);
-		page->private = 0;
-		ClearPagePrivate(page);
-	} else {
-		dout("%p invalidatepage %p idx %lu partial dirty page %u(%u)\n",
-		     inode, page, page->index, offset, length);
-	}
+	dout("%p invalidatepage %p idx %lu full dirty page\n",
+	     inode, page, page->index);
+
+	ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
+	ceph_put_snap_context(snapc);
+	page->private = 0;
+	ClearPagePrivate(page);
 }
 
-/* just a sanity check */
 static int ceph_releasepage(struct page *page, gfp_t g)
 {
 	struct inode *inode = page->mapping ? page->mapping->host : NULL;
 	dout("%p releasepage %p idx %lu\n", inode, page, page->index);
 	WARN_ON(PageDirty(page));
-	WARN_ON(PagePrivate(page));
-	return 0;
+
+	/* Can we release the page from the cache? */
+	if (!ceph_release_fscache_page(page, g))
+		return 0;
+
+	return !PagePrivate(page);
 }
 
 /*
@@ -198,11 +189,16 @@
 {
 	struct inode *inode = file_inode(filp);
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_osd_client *osdc = 
+	struct ceph_osd_client *osdc =
 		&ceph_inode_to_client(inode)->client->osdc;
 	int err = 0;
 	u64 len = PAGE_CACHE_SIZE;
 
+	err = ceph_readpage_from_fscache(inode, page);
+
+	if (err == 0)
+		goto out;
+
 	dout("readpage inode %p file %p page %p index %lu\n",
 	     inode, filp, page, page->index);
 	err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
@@ -220,6 +216,9 @@
 	}
 	SetPageUptodate(page);
 
+	if (err == 0)
+		ceph_readpage_to_fscache(inode, page);
+
 out:
 	return err < 0 ? err : 0;
 }
@@ -262,6 +261,7 @@
 		     page->index);
 		flush_dcache_page(page);
 		SetPageUptodate(page);
+		ceph_readpage_to_fscache(inode, page);
 		unlock_page(page);
 		page_cache_release(page);
 		bytes -= PAGE_CACHE_SIZE;
@@ -331,11 +331,12 @@
 		page = list_entry(page_list->prev, struct page, lru);
 		BUG_ON(PageLocked(page));
 		list_del(&page->lru);
-		
+
  		dout("start_read %p adding %p idx %lu\n", inode, page,
 		     page->index);
 		if (add_to_page_cache_lru(page, &inode->i_data, page->index,
 					  GFP_NOFS)) {
+			ceph_fscache_uncache_page(inode, page);
 			page_cache_release(page);
 			dout("start_read %p add_to_page_cache failed %p\n",
 			     inode, page);
@@ -378,6 +379,12 @@
 	int rc = 0;
 	int max = 0;
 
+	rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
+					 &nr_pages);
+
+	if (rc == 0)
+		goto out;
+
 	if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
 		max = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
 			>> PAGE_SHIFT;
@@ -392,6 +399,8 @@
 		BUG_ON(rc == 0);
 	}
 out:
+	ceph_fscache_readpages_cancel(inode, page_list);
+
 	dout("readpages %p file %p ret %d\n", inode, file, rc);
 	return rc;
 }
@@ -497,6 +506,8 @@
 	    CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
 		set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC);
 
+	ceph_readpage_to_fscache(inode, page);
+
 	set_page_writeback(page);
 	err = ceph_osdc_writepages(osdc, ceph_vino(inode),
 				   &ci->i_layout, snapc,
@@ -552,7 +563,6 @@
 	pagevec_release(&pvec);
 }
 
-
 /*
  * async writeback completion handler.
  *
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
new file mode 100644
index 0000000..6bfe65e
--- /dev/null
+++ b/fs/ceph/cache.c
@@ -0,0 +1,398 @@
+/*
+ * Ceph cache definitions.
+ *
+ *  Copyright (C) 2013 by Adfin Solutions, Inc. All Rights Reserved.
+ *  Written by Milosz Tanski (milosz@adfin.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include "super.h"
+#include "cache.h"
+
+struct ceph_aux_inode {
+	struct timespec	mtime;
+	loff_t          size;
+};
+
+struct fscache_netfs ceph_cache_netfs = {
+	.name		= "ceph",
+	.version	= 0,
+};
+
+static uint16_t ceph_fscache_session_get_key(const void *cookie_netfs_data,
+					     void *buffer, uint16_t maxbuf)
+{
+	const struct ceph_fs_client* fsc = cookie_netfs_data;
+	uint16_t klen;
+
+	klen = sizeof(fsc->client->fsid);
+	if (klen > maxbuf)
+		return 0;
+
+	memcpy(buffer, &fsc->client->fsid, klen);
+	return klen;
+}
+
+static const struct fscache_cookie_def ceph_fscache_fsid_object_def = {
+	.name		= "CEPH.fsid",
+	.type		= FSCACHE_COOKIE_TYPE_INDEX,
+	.get_key	= ceph_fscache_session_get_key,
+};
+
+int ceph_fscache_register(void)
+{
+	return fscache_register_netfs(&ceph_cache_netfs);
+}
+
+void ceph_fscache_unregister(void)
+{
+	fscache_unregister_netfs(&ceph_cache_netfs);
+}
+
+int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
+{
+	fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index,
+					      &ceph_fscache_fsid_object_def,
+					      fsc);
+
+	if (fsc->fscache == NULL) {
+		pr_err("Unable to resgister fsid: %p fscache cookie", fsc);
+		return 0;
+	}
+
+	fsc->revalidate_wq = alloc_workqueue("ceph-revalidate", 0, 1);
+	if (fsc->revalidate_wq == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static uint16_t ceph_fscache_inode_get_key(const void *cookie_netfs_data,
+					   void *buffer, uint16_t maxbuf)
+{
+	const struct ceph_inode_info* ci = cookie_netfs_data;
+	uint16_t klen;
+
+	/* use ceph virtual inode (id + snaphot) */
+	klen = sizeof(ci->i_vino);
+	if (klen > maxbuf)
+		return 0;
+
+	memcpy(buffer, &ci->i_vino, klen);
+	return klen;
+}
+
+static uint16_t ceph_fscache_inode_get_aux(const void *cookie_netfs_data,
+					   void *buffer, uint16_t bufmax)
+{
+	struct ceph_aux_inode aux;
+	const struct ceph_inode_info* ci = cookie_netfs_data;
+	const struct inode* inode = &ci->vfs_inode;
+
+	memset(&aux, 0, sizeof(aux));
+	aux.mtime = inode->i_mtime;
+	aux.size = inode->i_size;
+
+	memcpy(buffer, &aux, sizeof(aux));
+
+	return sizeof(aux);
+}
+
+static void ceph_fscache_inode_get_attr(const void *cookie_netfs_data,
+					uint64_t *size)
+{
+	const struct ceph_inode_info* ci = cookie_netfs_data;
+	const struct inode* inode = &ci->vfs_inode;
+
+	*size = inode->i_size;
+}
+
+static enum fscache_checkaux ceph_fscache_inode_check_aux(
+	void *cookie_netfs_data, const void *data, uint16_t dlen)
+{
+	struct ceph_aux_inode aux;
+	struct ceph_inode_info* ci = cookie_netfs_data;
+	struct inode* inode = &ci->vfs_inode;
+
+	if (dlen != sizeof(aux))
+		return FSCACHE_CHECKAUX_OBSOLETE;
+
+	memset(&aux, 0, sizeof(aux));
+	aux.mtime = inode->i_mtime;
+	aux.size = inode->i_size;
+
+	if (memcmp(data, &aux, sizeof(aux)) != 0)
+		return FSCACHE_CHECKAUX_OBSOLETE;
+
+	dout("ceph inode 0x%p cached okay", ci);
+	return FSCACHE_CHECKAUX_OKAY;
+}
+
+static void ceph_fscache_inode_now_uncached(void* cookie_netfs_data)
+{
+	struct ceph_inode_info* ci = cookie_netfs_data;
+	struct pagevec pvec;
+	pgoff_t first;
+	int loop, nr_pages;
+
+	pagevec_init(&pvec, 0);
+	first = 0;
+
+	dout("ceph inode 0x%p now uncached", ci);
+
+	while (1) {
+		nr_pages = pagevec_lookup(&pvec, ci->vfs_inode.i_mapping, first,
+					  PAGEVEC_SIZE - pagevec_count(&pvec));
+
+		if (!nr_pages)
+			break;
+
+		for (loop = 0; loop < nr_pages; loop++)
+			ClearPageFsCache(pvec.pages[loop]);
+
+		first = pvec.pages[nr_pages - 1]->index + 1;
+
+		pvec.nr = nr_pages;
+		pagevec_release(&pvec);
+		cond_resched();
+	}
+}
+
+static const struct fscache_cookie_def ceph_fscache_inode_object_def = {
+	.name		= "CEPH.inode",
+	.type		= FSCACHE_COOKIE_TYPE_DATAFILE,
+	.get_key	= ceph_fscache_inode_get_key,
+	.get_attr	= ceph_fscache_inode_get_attr,
+	.get_aux	= ceph_fscache_inode_get_aux,
+	.check_aux	= ceph_fscache_inode_check_aux,
+	.now_uncached	= ceph_fscache_inode_now_uncached,
+};
+
+void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
+					struct ceph_inode_info* ci)
+{
+	struct inode* inode = &ci->vfs_inode;
+
+	/* No caching for filesystem */
+	if (fsc->fscache == NULL)
+		return;
+
+	/* Only cache for regular files that are read only */
+	if ((ci->vfs_inode.i_mode & S_IFREG) == 0)
+		return;
+
+	/* Avoid multiple racing open requests */
+	mutex_lock(&inode->i_mutex);
+
+	if (ci->fscache)
+		goto done;
+
+	ci->fscache = fscache_acquire_cookie(fsc->fscache,
+					     &ceph_fscache_inode_object_def,
+					     ci);
+done:
+	mutex_unlock(&inode->i_mutex);
+
+}
+
+void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci)
+{
+	struct fscache_cookie* cookie;
+
+	if ((cookie = ci->fscache) == NULL)
+		return;
+
+	ci->fscache = NULL;
+
+	fscache_uncache_all_inode_pages(cookie, &ci->vfs_inode);
+	fscache_relinquish_cookie(cookie, 0);
+}
+
+static void ceph_vfs_readpage_complete(struct page *page, void *data, int error)
+{
+	if (!error)
+		SetPageUptodate(page);
+}
+
+static void ceph_vfs_readpage_complete_unlock(struct page *page, void *data, int error)
+{
+	if (!error)
+		SetPageUptodate(page);
+
+	unlock_page(page);
+}
+
+static inline int cache_valid(struct ceph_inode_info *ci)
+{
+	return ((ceph_caps_issued(ci) & CEPH_CAP_FILE_CACHE) &&
+		(ci->i_fscache_gen == ci->i_rdcache_gen));
+}
+
+
+/* Atempt to read from the fscache,
+ *
+ * This function is called from the readpage_nounlock context. DO NOT attempt to
+ * unlock the page here (or in the callback).
+ */
+int ceph_readpage_from_fscache(struct inode *inode, struct page *page)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int ret;
+
+	if (!cache_valid(ci))
+		return -ENOBUFS;
+
+	ret = fscache_read_or_alloc_page(ci->fscache, page,
+					 ceph_vfs_readpage_complete, NULL,
+					 GFP_KERNEL);
+
+	switch (ret) {
+		case 0: /* Page found */
+			dout("page read submitted\n");
+			return 0;
+		case -ENOBUFS: /* Pages were not found, and can't be */
+		case -ENODATA: /* Pages were not found */
+			dout("page/inode not in cache\n");
+			return ret;
+		default:
+			dout("%s: unknown error ret = %i\n", __func__, ret);
+			return ret;
+	}
+}
+
+int ceph_readpages_from_fscache(struct inode *inode,
+				  struct address_space *mapping,
+				  struct list_head *pages,
+				  unsigned *nr_pages)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int ret;
+
+	if (!cache_valid(ci))
+		return -ENOBUFS;
+
+	ret = fscache_read_or_alloc_pages(ci->fscache, mapping, pages, nr_pages,
+					  ceph_vfs_readpage_complete_unlock,
+					  NULL, mapping_gfp_mask(mapping));
+
+	switch (ret) {
+		case 0: /* All pages found */
+			dout("all-page read submitted\n");
+			return 0;
+		case -ENOBUFS: /* Some pages were not found, and can't be */
+		case -ENODATA: /* some pages were not found */
+			dout("page/inode not in cache\n");
+			return ret;
+		default:
+			dout("%s: unknown error ret = %i\n", __func__, ret);
+			return ret;
+	}
+}
+
+void ceph_readpage_to_fscache(struct inode *inode, struct page *page)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	int ret;
+
+	if (!PageFsCache(page))
+		return;
+
+	if (!cache_valid(ci))
+		return;
+
+	ret = fscache_write_page(ci->fscache, page, GFP_KERNEL);
+	if (ret)
+		 fscache_uncache_page(ci->fscache, page);
+}
+
+void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+
+	fscache_wait_on_page_write(ci->fscache, page);
+	fscache_uncache_page(ci->fscache, page);
+}
+
+void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
+{
+	if (fsc->revalidate_wq)
+		destroy_workqueue(fsc->revalidate_wq);
+
+	fscache_relinquish_cookie(fsc->fscache, 0);
+	fsc->fscache = NULL;
+}
+
+static void ceph_revalidate_work(struct work_struct *work)
+{
+	int issued;
+	u32 orig_gen;
+	struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
+						  i_revalidate_work);
+	struct inode *inode = &ci->vfs_inode;
+
+	spin_lock(&ci->i_ceph_lock);
+	issued = __ceph_caps_issued(ci, NULL);
+	orig_gen = ci->i_rdcache_gen;
+	spin_unlock(&ci->i_ceph_lock);
+
+	if (!(issued & CEPH_CAP_FILE_CACHE)) {
+		dout("revalidate_work lost cache before validation %p\n",
+		     inode);
+		goto out;
+	}
+
+	if (!fscache_check_consistency(ci->fscache))
+		fscache_invalidate(ci->fscache);
+
+	spin_lock(&ci->i_ceph_lock);
+	/* Update the new valid generation (backwards sanity check too) */
+	if (orig_gen > ci->i_fscache_gen) {
+		ci->i_fscache_gen = orig_gen;
+	}
+	spin_unlock(&ci->i_ceph_lock);
+
+out:
+	iput(&ci->vfs_inode);
+}
+
+void ceph_queue_revalidate(struct inode *inode)
+{
+	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
+	struct ceph_inode_info *ci = ceph_inode(inode);
+
+	if (fsc->revalidate_wq == NULL || ci->fscache == NULL)
+		return;
+
+	ihold(inode);
+
+	if (queue_work(ceph_sb_to_client(inode->i_sb)->revalidate_wq,
+		       &ci->i_revalidate_work)) {
+		dout("ceph_queue_revalidate %p\n", inode);
+	} else {
+		dout("ceph_queue_revalidate %p failed\n)", inode);
+		iput(inode);
+	}
+}
+
+void ceph_fscache_inode_init(struct ceph_inode_info *ci)
+{
+	ci->fscache = NULL;
+	/* The first load is verifed cookie open time */
+	ci->i_fscache_gen = 1;
+	INIT_WORK(&ci->i_revalidate_work, ceph_revalidate_work);
+}
diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h
new file mode 100644
index 0000000..ba94940
--- /dev/null
+++ b/fs/ceph/cache.h
@@ -0,0 +1,159 @@
+/*
+ * Ceph cache definitions.
+ *
+ *  Copyright (C) 2013 by Adfin Solutions, Inc. All Rights Reserved.
+ *  Written by Milosz Tanski (milosz@adfin.com)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#ifndef _CEPH_CACHE_H
+#define _CEPH_CACHE_H
+
+#ifdef CONFIG_CEPH_FSCACHE
+
+extern struct fscache_netfs ceph_cache_netfs;
+
+int ceph_fscache_register(void);
+void ceph_fscache_unregister(void);
+
+int ceph_fscache_register_fs(struct ceph_fs_client* fsc);
+void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc);
+
+void ceph_fscache_inode_init(struct ceph_inode_info *ci);
+void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
+					struct ceph_inode_info* ci);
+void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci);
+
+int ceph_readpage_from_fscache(struct inode *inode, struct page *page);
+int ceph_readpages_from_fscache(struct inode *inode,
+				struct address_space *mapping,
+				struct list_head *pages,
+				unsigned *nr_pages);
+void ceph_readpage_to_fscache(struct inode *inode, struct page *page);
+void ceph_invalidate_fscache_page(struct inode* inode, struct page *page);
+void ceph_queue_revalidate(struct inode *inode);
+
+static inline void ceph_fscache_invalidate(struct inode *inode)
+{
+	fscache_invalidate(ceph_inode(inode)->fscache);
+}
+
+static inline void ceph_fscache_uncache_page(struct inode *inode,
+					     struct page *page)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	return fscache_uncache_page(ci->fscache, page);
+}
+
+static inline int ceph_release_fscache_page(struct page *page, gfp_t gfp)
+{
+	struct inode* inode = page->mapping->host;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	return fscache_maybe_release_page(ci->fscache, page, gfp);
+}
+
+static inline void ceph_fscache_readpages_cancel(struct inode *inode,
+						 struct list_head *pages)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	return fscache_readpages_cancel(ci->fscache, pages);
+}
+
+#else
+
+static inline int ceph_fscache_register(void)
+{
+	return 0;
+}
+
+static inline void ceph_fscache_unregister(void)
+{
+}
+
+static inline int ceph_fscache_register_fs(struct ceph_fs_client* fsc)
+{
+	return 0;
+}
+
+static inline void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
+{
+}
+
+static inline void ceph_fscache_inode_init(struct ceph_inode_info *ci)
+{
+}
+
+static inline void ceph_fscache_register_inode_cookie(struct ceph_fs_client* parent_fsc,
+						      struct ceph_inode_info* ci)
+{
+}
+
+static inline void ceph_fscache_uncache_page(struct inode *inode,
+					     struct page *pages)
+{
+}
+
+static inline int ceph_readpage_from_fscache(struct inode* inode,
+					     struct page *page)
+{
+	return -ENOBUFS;
+}
+
+static inline int ceph_readpages_from_fscache(struct inode *inode,
+					      struct address_space *mapping,
+					      struct list_head *pages,
+					      unsigned *nr_pages)
+{
+	return -ENOBUFS;
+}
+
+static inline void ceph_readpage_to_fscache(struct inode *inode,
+					    struct page *page)
+{
+}
+
+static inline void ceph_fscache_invalidate(struct inode *inode)
+{
+}
+
+static inline void ceph_invalidate_fscache_page(struct inode *inode,
+						struct page *page)
+{
+}
+
+static inline void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci)
+{
+}
+
+static inline int ceph_release_fscache_page(struct page *page, gfp_t gfp)
+{
+	return 1;
+}
+
+static inline void ceph_fscache_readpages_cancel(struct inode *inode,
+						 struct list_head *pages)
+{
+}
+
+static inline void ceph_queue_revalidate(struct inode *inode)
+{
+}
+
+#endif
+
+#endif
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 25442b4..13976c3 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -10,6 +10,7 @@
 
 #include "super.h"
 #include "mds_client.h"
+#include "cache.h"
 #include <linux/ceph/decode.h>
 #include <linux/ceph/messenger.h>
 
@@ -479,8 +480,9 @@
 	 * i_rdcache_gen.
 	 */
 	if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
-	    (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0)
+	    (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) {
 		ci->i_rdcache_gen++;
+	}
 
 	/*
 	 * if we are newly issued FILE_SHARED, mark dir not complete; we
@@ -2072,19 +2074,17 @@
 	/* finish pending truncate */
 	while (ci->i_truncate_pending) {
 		spin_unlock(&ci->i_ceph_lock);
-		if (!(need & CEPH_CAP_FILE_WR))
-			mutex_lock(&inode->i_mutex);
 		__ceph_do_pending_vmtruncate(inode);
-		if (!(need & CEPH_CAP_FILE_WR))
-			mutex_unlock(&inode->i_mutex);
 		spin_lock(&ci->i_ceph_lock);
 	}
 
-	if (need & CEPH_CAP_FILE_WR) {
+	have = __ceph_caps_issued(ci, &implemented);
+
+	if (have & need & CEPH_CAP_FILE_WR) {
 		if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) {
 			dout("get_cap_refs %p endoff %llu > maxsize %llu\n",
 			     inode, endoff, ci->i_max_size);
-			if (endoff > ci->i_wanted_max_size) {
+			if (endoff > ci->i_requested_max_size) {
 				*check_max = 1;
 				ret = 1;
 			}
@@ -2099,7 +2099,6 @@
 			goto out;
 		}
 	}
-	have = __ceph_caps_issued(ci, &implemented);
 
 	if ((have & need) == need) {
 		/*
@@ -2141,14 +2140,17 @@
 
 	/* do we need to explicitly request a larger max_size? */
 	spin_lock(&ci->i_ceph_lock);
-	if ((endoff >= ci->i_max_size ||
-	     endoff > (inode->i_size << 1)) &&
-	    endoff > ci->i_wanted_max_size) {
+	if (endoff >= ci->i_max_size && endoff > ci->i_wanted_max_size) {
 		dout("write %p at large endoff %llu, req max_size\n",
 		     inode, endoff);
 		ci->i_wanted_max_size = endoff;
-		check = 1;
 	}
+	/* duplicate ceph_check_caps()'s logic */
+	if (ci->i_auth_cap &&
+	    (ci->i_auth_cap->issued & CEPH_CAP_FILE_WR) &&
+	    ci->i_wanted_max_size > ci->i_max_size &&
+	    ci->i_wanted_max_size > ci->i_requested_max_size)
+		check = 1;
 	spin_unlock(&ci->i_ceph_lock);
 	if (check)
 		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
@@ -2334,6 +2336,38 @@
 }
 
 /*
+ * Invalidate unlinked inode's aliases, so we can drop the inode ASAP.
+ */
+static void invalidate_aliases(struct inode *inode)
+{
+	struct dentry *dn, *prev = NULL;
+
+	dout("invalidate_aliases inode %p\n", inode);
+	d_prune_aliases(inode);
+	/*
+	 * For non-directory inode, d_find_alias() only returns
+	 * connected dentry. After calling d_invalidate(), the
+	 * dentry become disconnected.
+	 *
+	 * For directory inode, d_find_alias() can return
+	 * disconnected dentry. But directory inode should have
+	 * one alias at most.
+	 */
+	while ((dn = d_find_alias(inode))) {
+		if (dn == prev) {
+			dput(dn);
+			break;
+		}
+		d_invalidate(dn);
+		if (prev)
+			dput(prev);
+		prev = dn;
+	}
+	if (prev)
+		dput(prev);
+}
+
+/*
  * Handle a cap GRANT message from the MDS.  (Note that a GRANT may
  * actually be a revocation if it specifies a smaller cap set.)
  *
@@ -2361,8 +2395,9 @@
 	int check_caps = 0;
 	int wake = 0;
 	int writeback = 0;
-	int revoked_rdcache = 0;
 	int queue_invalidate = 0;
+	int deleted_inode = 0;
+	int queue_revalidate = 0;
 
 	dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
 	     inode, cap, mds, seq, ceph_cap_string(newcaps));
@@ -2377,9 +2412,7 @@
 	if (((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) &&
 	    (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 &&
 	    !ci->i_wrbuffer_ref) {
-		if (try_nonblocking_invalidate(inode) == 0) {
-			revoked_rdcache = 1;
-		} else {
+		if (try_nonblocking_invalidate(inode)) {
 			/* there were locked pages.. invalidate later
 			   in a separate thread. */
 			if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
@@ -2387,6 +2420,8 @@
 				ci->i_rdcache_revoking = ci->i_rdcache_gen;
 			}
 		}
+
+		ceph_fscache_invalidate(inode);
 	}
 
 	/* side effects now are allowed */
@@ -2407,8 +2442,12 @@
 		     from_kgid(&init_user_ns, inode->i_gid));
 	}
 
-	if ((issued & CEPH_CAP_LINK_EXCL) == 0)
+	if ((issued & CEPH_CAP_LINK_EXCL) == 0) {
 		set_nlink(inode, le32_to_cpu(grant->nlink));
+		if (inode->i_nlink == 0 &&
+		    (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
+			deleted_inode = 1;
+	}
 
 	if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) {
 		int len = le32_to_cpu(grant->xattr_len);
@@ -2424,6 +2463,11 @@
 		}
 	}
 
+	/* Do we need to revalidate our fscache cookie. Don't bother on the
+	 * first cache cap as we already validate at cookie creation time. */
+	if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1)
+		queue_revalidate = 1;
+
 	/* size/ctime/mtime/atime? */
 	ceph_fill_file_size(inode, issued,
 			    le32_to_cpu(grant->truncate_seq),
@@ -2508,6 +2552,7 @@
 	BUG_ON(cap->issued & ~cap->implemented);
 
 	spin_unlock(&ci->i_ceph_lock);
+
 	if (writeback)
 		/*
 		 * queue inode for writeback: we can't actually call
@@ -2517,6 +2562,10 @@
 		ceph_queue_writeback(inode);
 	if (queue_invalidate)
 		ceph_queue_invalidate(inode);
+	if (deleted_inode)
+		invalidate_aliases(inode);
+	if (queue_revalidate)
+		ceph_queue_revalidate(inode);
 	if (wake)
 		wake_up_all(&ci->i_cap_wq);
 
@@ -2673,8 +2722,10 @@
 					  truncate_seq, truncate_size, size);
 	spin_unlock(&ci->i_ceph_lock);
 
-	if (queue_trunc)
+	if (queue_trunc) {
 		ceph_queue_vmtruncate(inode);
+		ceph_fscache_invalidate(inode);
+	}
 }
 
 /*
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index a40ceda..868b61d 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -793,6 +793,8 @@
 	req->r_locked_dir = dir;
 	req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
 	req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
+	/* release LINK_SHARED on source inode (mds will lock it) */
+	req->r_old_inode_drop = CEPH_CAP_LINK_SHARED;
 	err = ceph_mdsc_do_request(mdsc, dir, req);
 	if (err) {
 		d_drop(dentry);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 2ddf061..3de8982 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -8,9 +8,11 @@
 #include <linux/namei.h>
 #include <linux/writeback.h>
 #include <linux/aio.h>
+#include <linux/falloc.h>
 
 #include "super.h"
 #include "mds_client.h"
+#include "cache.h"
 
 /*
  * Ceph file operations
@@ -68,9 +70,23 @@
 {
 	struct ceph_file_info *cf;
 	int ret = 0;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
+	struct ceph_mds_client *mdsc = fsc->mdsc;
 
 	switch (inode->i_mode & S_IFMT) {
 	case S_IFREG:
+		/* First file open request creates the cookie, we want to keep
+		 * this cookie around for the filetime of the inode as not to
+		 * have to worry about fscache register / revoke / operation
+		 * races.
+		 *
+		 * Also, if we know the operation is going to invalidate data
+		 * (non readonly) just nuke the cache right away.
+		 */
+		ceph_fscache_register_inode_cookie(mdsc->fsc, ci);
+		if ((fmode & CEPH_FILE_MODE_WR))
+			ceph_fscache_invalidate(inode);
 	case S_IFDIR:
 		dout("init_file %p %p 0%o (regular)\n", inode, file,
 		     inode->i_mode);
@@ -181,6 +197,7 @@
 		spin_unlock(&ci->i_ceph_lock);
 		return ceph_init_file(inode, file, fmode);
 	}
+
 	spin_unlock(&ci->i_ceph_lock);
 
 	dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
@@ -191,6 +208,7 @@
 	}
 	req->r_inode = inode;
 	ihold(inode);
+
 	req->r_num_caps = 1;
 	if (flags & (O_CREAT|O_TRUNC))
 		parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
@@ -313,9 +331,9 @@
 {
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	u64 pos, this_len;
+	u64 pos, this_len, left;
 	int io_align, page_align;
-	int left, pages_left;
+	int pages_left;
 	int read;
 	struct page **page_pos;
 	int ret;
@@ -346,47 +364,40 @@
 		ret = 0;
 	hit_stripe = this_len < left;
 	was_short = ret >= 0 && ret < this_len;
-	dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read,
+	dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read,
 	     ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
 
-	if (ret > 0) {
-		int didpages = (page_align + ret) >> PAGE_CACHE_SHIFT;
-
-		if (read < pos - off) {
-			dout(" zero gap %llu to %llu\n", off + read, pos);
-			ceph_zero_page_vector_range(page_align + read,
-						    pos - off - read, pages);
+	if (ret >= 0) {
+		int didpages;
+		if (was_short && (pos + ret < inode->i_size)) {
+			u64 tmp = min(this_len - ret,
+					inode->i_size - pos - ret);
+			dout(" zero gap %llu to %llu\n",
+				pos + ret, pos + ret + tmp);
+			ceph_zero_page_vector_range(page_align + read + ret,
+							tmp, pages);
+			ret += tmp;
 		}
+
+		didpages = (page_align + ret) >> PAGE_CACHE_SHIFT;
 		pos += ret;
 		read = pos - off;
 		left -= ret;
 		page_pos += didpages;
 		pages_left -= didpages;
 
-		/* hit stripe? */
-		if (left && hit_stripe)
+		/* hit stripe and need continue*/
+		if (left && hit_stripe && pos < inode->i_size)
 			goto more;
 	}
 
-	if (was_short) {
+	if (read > 0) {
+		ret = read;
 		/* did we bounce off eof? */
 		if (pos + left > inode->i_size)
 			*checkeof = 1;
-
-		/* zero trailing bytes (inside i_size) */
-		if (left > 0 && pos < inode->i_size) {
-			if (pos + left > inode->i_size)
-				left = inode->i_size - pos;
-
-			dout("zero tail %d\n", left);
-			ceph_zero_page_vector_range(page_align + read, left,
-						    pages);
-			read += left;
-		}
 	}
 
-	if (ret >= 0)
-		ret = read;
 	dout("striped_read returns %d\n", ret);
 	return ret;
 }
@@ -618,6 +629,8 @@
 		if (check_caps)
 			ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY,
 					NULL);
+	} else if (ret != -EOLDSNAPC && written > 0) {
+		ret = written;
 	}
 	return ret;
 }
@@ -659,7 +672,6 @@
 
 	if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
 	    (iocb->ki_filp->f_flags & O_DIRECT) ||
-	    (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
 	    (fi->flags & CEPH_F_SYNC))
 		/* hmm, this isn't really async... */
 		ret = ceph_sync_read(filp, base, len, ppos, &checkeof);
@@ -711,13 +723,11 @@
 		&ceph_sb_to_client(inode->i_sb)->client->osdc;
 	ssize_t count, written = 0;
 	int err, want, got;
-	bool hold_mutex;
 
 	if (ceph_snap(inode) != CEPH_NOSNAP)
 		return -EROFS;
 
 	mutex_lock(&inode->i_mutex);
-	hold_mutex = true;
 
 	err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
 	if (err)
@@ -763,18 +773,31 @@
 
 	if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
 	    (iocb->ki_filp->f_flags & O_DIRECT) ||
-	    (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
 	    (fi->flags & CEPH_F_SYNC)) {
 		mutex_unlock(&inode->i_mutex);
 		written = ceph_sync_write(file, iov->iov_base, count,
 					  pos, &iocb->ki_pos);
+		if (written == -EOLDSNAPC) {
+			dout("aio_write %p %llx.%llx %llu~%u"
+				"got EOLDSNAPC, retrying\n",
+				inode, ceph_vinop(inode),
+				pos, (unsigned)iov->iov_len);
+			mutex_lock(&inode->i_mutex);
+			goto retry_snap;
+		}
 	} else {
+		/*
+		 * No need to acquire the i_truncate_mutex. Because
+		 * the MDS revokes Fwb caps before sending truncate
+		 * message to us. We can't get Fwb cap while there
+		 * are pending vmtruncate. So write and vmtruncate
+		 * can not run at the same time
+		 */
 		written = generic_file_buffered_write(iocb, iov, nr_segs,
 						      pos, &iocb->ki_pos,
 						      count, 0);
 		mutex_unlock(&inode->i_mutex);
 	}
-	hold_mutex = false;
 
 	if (written >= 0) {
 		int dirty;
@@ -798,18 +821,12 @@
 			written = err;
 	}
 
-	if (written == -EOLDSNAPC) {
-		dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n",
-		     inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len);
-		mutex_lock(&inode->i_mutex);
-		hold_mutex = true;
-		goto retry_snap;
-	}
-out:
-	if (hold_mutex)
-		mutex_unlock(&inode->i_mutex);
-	current->backing_dev_info = NULL;
+	goto out_unlocked;
 
+out:
+	mutex_unlock(&inode->i_mutex);
+out_unlocked:
+	current->backing_dev_info = NULL;
 	return written ? written : err;
 }
 
@@ -822,7 +839,6 @@
 	int ret;
 
 	mutex_lock(&inode->i_mutex);
-	__ceph_do_pending_vmtruncate(inode);
 
 	if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) {
 		ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
@@ -871,6 +887,204 @@
 	return offset;
 }
 
+static inline void ceph_zero_partial_page(
+	struct inode *inode, loff_t offset, unsigned size)
+{
+	struct page *page;
+	pgoff_t index = offset >> PAGE_CACHE_SHIFT;
+
+	page = find_lock_page(inode->i_mapping, index);
+	if (page) {
+		wait_on_page_writeback(page);
+		zero_user(page, offset & (PAGE_CACHE_SIZE - 1), size);
+		unlock_page(page);
+		page_cache_release(page);
+	}
+}
+
+static void ceph_zero_pagecache_range(struct inode *inode, loff_t offset,
+				      loff_t length)
+{
+	loff_t nearly = round_up(offset, PAGE_CACHE_SIZE);
+	if (offset < nearly) {
+		loff_t size = nearly - offset;
+		if (length < size)
+			size = length;
+		ceph_zero_partial_page(inode, offset, size);
+		offset += size;
+		length -= size;
+	}
+	if (length >= PAGE_CACHE_SIZE) {
+		loff_t size = round_down(length, PAGE_CACHE_SIZE);
+		truncate_pagecache_range(inode, offset, offset + size - 1);
+		offset += size;
+		length -= size;
+	}
+	if (length)
+		ceph_zero_partial_page(inode, offset, length);
+}
+
+static int ceph_zero_partial_object(struct inode *inode,
+				    loff_t offset, loff_t *length)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+	struct ceph_osd_request *req;
+	int ret = 0;
+	loff_t zero = 0;
+	int op;
+
+	if (!length) {
+		op = offset ? CEPH_OSD_OP_DELETE : CEPH_OSD_OP_TRUNCATE;
+		length = &zero;
+	} else {
+		op = CEPH_OSD_OP_ZERO;
+	}
+
+	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
+					ceph_vino(inode),
+					offset, length,
+					1, op,
+					CEPH_OSD_FLAG_WRITE |
+					CEPH_OSD_FLAG_ONDISK,
+					NULL, 0, 0, false);
+	if (IS_ERR(req)) {
+		ret = PTR_ERR(req);
+		goto out;
+	}
+
+	ceph_osdc_build_request(req, offset, NULL, ceph_vino(inode).snap,
+				&inode->i_mtime);
+
+	ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+	if (!ret) {
+		ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+		if (ret == -ENOENT)
+			ret = 0;
+	}
+	ceph_osdc_put_request(req);
+
+out:
+	return ret;
+}
+
+static int ceph_zero_objects(struct inode *inode, loff_t offset, loff_t length)
+{
+	int ret = 0;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	s32 stripe_unit = ceph_file_layout_su(ci->i_layout);
+	s32 stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
+	s32 object_size = ceph_file_layout_object_size(ci->i_layout);
+	u64 object_set_size = object_size * stripe_count;
+	u64 nearly, t;
+
+	/* round offset up to next period boundary */
+	nearly = offset + object_set_size - 1;
+	t = nearly;
+	nearly -= do_div(t, object_set_size);
+
+	while (length && offset < nearly) {
+		loff_t size = length;
+		ret = ceph_zero_partial_object(inode, offset, &size);
+		if (ret < 0)
+			return ret;
+		offset += size;
+		length -= size;
+	}
+	while (length >= object_set_size) {
+		int i;
+		loff_t pos = offset;
+		for (i = 0; i < stripe_count; ++i) {
+			ret = ceph_zero_partial_object(inode, pos, NULL);
+			if (ret < 0)
+				return ret;
+			pos += stripe_unit;
+		}
+		offset += object_set_size;
+		length -= object_set_size;
+	}
+	while (length) {
+		loff_t size = length;
+		ret = ceph_zero_partial_object(inode, offset, &size);
+		if (ret < 0)
+			return ret;
+		offset += size;
+		length -= size;
+	}
+	return ret;
+}
+
+static long ceph_fallocate(struct file *file, int mode,
+				loff_t offset, loff_t length)
+{
+	struct ceph_file_info *fi = file->private_data;
+	struct inode *inode = file->f_dentry->d_inode;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_osd_client *osdc =
+		&ceph_inode_to_client(inode)->client->osdc;
+	int want, got = 0;
+	int dirty;
+	int ret = 0;
+	loff_t endoff = 0;
+	loff_t size;
+
+	if (!S_ISREG(inode->i_mode))
+		return -EOPNOTSUPP;
+
+	if (IS_SWAPFILE(inode))
+		return -ETXTBSY;
+
+	mutex_lock(&inode->i_mutex);
+
+	if (ceph_snap(inode) != CEPH_NOSNAP) {
+		ret = -EROFS;
+		goto unlock;
+	}
+
+	if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) &&
+		!(mode & FALLOC_FL_PUNCH_HOLE)) {
+		ret = -ENOSPC;
+		goto unlock;
+	}
+
+	size = i_size_read(inode);
+	if (!(mode & FALLOC_FL_KEEP_SIZE))
+		endoff = offset + length;
+
+	if (fi->fmode & CEPH_FILE_MODE_LAZY)
+		want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
+	else
+		want = CEPH_CAP_FILE_BUFFER;
+
+	ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
+	if (ret < 0)
+		goto unlock;
+
+	if (mode & FALLOC_FL_PUNCH_HOLE) {
+		if (offset < size)
+			ceph_zero_pagecache_range(inode, offset, length);
+		ret = ceph_zero_objects(inode, offset, length);
+	} else if (endoff > size) {
+		truncate_pagecache_range(inode, size, -1);
+		if (ceph_inode_set_size(inode, endoff))
+			ceph_check_caps(ceph_inode(inode),
+				CHECK_CAPS_AUTHONLY, NULL);
+	}
+
+	if (!ret) {
+		spin_lock(&ci->i_ceph_lock);
+		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
+		spin_unlock(&ci->i_ceph_lock);
+		if (dirty)
+			__mark_inode_dirty(inode, dirty);
+	}
+
+	ceph_put_cap_refs(ci, got);
+unlock:
+	mutex_unlock(&inode->i_mutex);
+	return ret;
+}
+
 const struct file_operations ceph_file_fops = {
 	.open = ceph_open,
 	.release = ceph_release,
@@ -887,5 +1101,6 @@
 	.splice_write = generic_file_splice_write,
 	.unlocked_ioctl = ceph_ioctl,
 	.compat_ioctl	= ceph_ioctl,
+	.fallocate	= ceph_fallocate,
 };
 
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index f3a2abf..8549a48 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -12,6 +12,7 @@
 
 #include "super.h"
 #include "mds_client.h"
+#include "cache.h"
 #include <linux/ceph/decode.h>
 
 /*
@@ -344,6 +345,7 @@
 	for (i = 0; i < CEPH_FILE_MODE_NUM; i++)
 		ci->i_nr_by_mode[i] = 0;
 
+	mutex_init(&ci->i_truncate_mutex);
 	ci->i_truncate_seq = 0;
 	ci->i_truncate_size = 0;
 	ci->i_truncate_pending = 0;
@@ -377,6 +379,8 @@
 
 	INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work);
 
+	ceph_fscache_inode_init(ci);
+
 	return &ci->vfs_inode;
 }
 
@@ -396,6 +400,8 @@
 
 	dout("destroy_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode));
 
+	ceph_fscache_unregister_inode_cookie(ci);
+
 	ceph_queue_caps_release(inode);
 
 	/*
@@ -430,7 +436,6 @@
 	call_rcu(&inode->i_rcu, ceph_i_callback);
 }
 
-
 /*
  * Helpers to fill in size, ctime, mtime, and atime.  We have to be
  * careful because either the client or MDS may have more up to date
@@ -455,16 +460,20 @@
 			dout("truncate_seq %u -> %u\n",
 			     ci->i_truncate_seq, truncate_seq);
 			ci->i_truncate_seq = truncate_seq;
+
+			/* the MDS should have revoked these caps */
+			WARN_ON_ONCE(issued & (CEPH_CAP_FILE_EXCL |
+					       CEPH_CAP_FILE_RD |
+					       CEPH_CAP_FILE_WR |
+					       CEPH_CAP_FILE_LAZYIO));
 			/*
 			 * If we hold relevant caps, or in the case where we're
 			 * not the only client referencing this file and we
 			 * don't hold those caps, then we need to check whether
 			 * the file is either opened or mmaped
 			 */
-			if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_RD|
-				       CEPH_CAP_FILE_WR|CEPH_CAP_FILE_BUFFER|
-				       CEPH_CAP_FILE_EXCL|
-				       CEPH_CAP_FILE_LAZYIO)) ||
+			if ((issued & (CEPH_CAP_FILE_CACHE|
+				       CEPH_CAP_FILE_BUFFER)) ||
 			    mapping_mapped(inode->i_mapping) ||
 			    __ceph_caps_file_wanted(ci)) {
 				ci->i_truncate_pending++;
@@ -478,6 +487,10 @@
 		     truncate_size);
 		ci->i_truncate_size = truncate_size;
 	}
+
+	if (queue_trunc)
+		ceph_fscache_invalidate(inode);
+
 	return queue_trunc;
 }
 
@@ -1066,7 +1079,7 @@
 			 * complete.
 			 */
 			ceph_set_dentry_offset(req->r_old_dentry);
-			dout("dn %p gets new offset %lld\n", req->r_old_dentry, 
+			dout("dn %p gets new offset %lld\n", req->r_old_dentry,
 			     ceph_dentry(req->r_old_dentry)->offset);
 
 			dn = req->r_old_dentry;  /* use old_dentry */
@@ -1419,18 +1432,20 @@
 	u32 orig_gen;
 	int check = 0;
 
+	mutex_lock(&ci->i_truncate_mutex);
 	spin_lock(&ci->i_ceph_lock);
 	dout("invalidate_pages %p gen %d revoking %d\n", inode,
 	     ci->i_rdcache_gen, ci->i_rdcache_revoking);
 	if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
 		/* nevermind! */
 		spin_unlock(&ci->i_ceph_lock);
+		mutex_unlock(&ci->i_truncate_mutex);
 		goto out;
 	}
 	orig_gen = ci->i_rdcache_gen;
 	spin_unlock(&ci->i_ceph_lock);
 
-	truncate_inode_pages(&inode->i_data, 0);
+	truncate_inode_pages(inode->i_mapping, 0);
 
 	spin_lock(&ci->i_ceph_lock);
 	if (orig_gen == ci->i_rdcache_gen &&
@@ -1445,6 +1460,7 @@
 		     ci->i_rdcache_revoking);
 	}
 	spin_unlock(&ci->i_ceph_lock);
+	mutex_unlock(&ci->i_truncate_mutex);
 
 	if (check)
 		ceph_check_caps(ci, 0, NULL);
@@ -1465,9 +1481,7 @@
 	struct inode *inode = &ci->vfs_inode;
 
 	dout("vmtruncate_work %p\n", inode);
-	mutex_lock(&inode->i_mutex);
 	__ceph_do_pending_vmtruncate(inode);
-	mutex_unlock(&inode->i_mutex);
 	iput(inode);
 }
 
@@ -1480,6 +1494,7 @@
 	struct ceph_inode_info *ci = ceph_inode(inode);
 
 	ihold(inode);
+
 	if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq,
 		       &ci->i_vmtruncate_work)) {
 		dout("ceph_queue_vmtruncate %p\n", inode);
@@ -1500,11 +1515,13 @@
 	u64 to;
 	int wrbuffer_refs, finish = 0;
 
+	mutex_lock(&ci->i_truncate_mutex);
 retry:
 	spin_lock(&ci->i_ceph_lock);
 	if (ci->i_truncate_pending == 0) {
 		dout("__do_pending_vmtruncate %p none pending\n", inode);
 		spin_unlock(&ci->i_ceph_lock);
+		mutex_unlock(&ci->i_truncate_mutex);
 		return;
 	}
 
@@ -1521,6 +1538,9 @@
 		goto retry;
 	}
 
+	/* there should be no reader or writer */
+	WARN_ON_ONCE(ci->i_rd_ref || ci->i_wr_ref);
+
 	to = ci->i_truncate_size;
 	wrbuffer_refs = ci->i_wrbuffer_ref;
 	dout("__do_pending_vmtruncate %p (%d) to %lld\n", inode,
@@ -1538,13 +1558,14 @@
 	if (!finish)
 		goto retry;
 
+	mutex_unlock(&ci->i_truncate_mutex);
+
 	if (wrbuffer_refs == 0)
 		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
 
 	wake_up_all(&ci->i_cap_wq);
 }
 
-
 /*
  * symlinks
  */
@@ -1586,8 +1607,6 @@
 	if (ceph_snap(inode) != CEPH_NOSNAP)
 		return -EROFS;
 
-	__ceph_do_pending_vmtruncate(inode);
-
 	err = inode_change_ok(inode, attr);
 	if (err != 0)
 		return err;
@@ -1768,7 +1787,8 @@
 	     ceph_cap_string(dirtied), mask);
 
 	ceph_mdsc_put_request(req);
-	__ceph_do_pending_vmtruncate(inode);
+	if (mask & CEPH_SETATTR_SIZE)
+		__ceph_do_pending_vmtruncate(inode);
 	return err;
 out:
 	spin_unlock(&ci->i_ceph_lock);
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index e0b4ef3..669622f 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -196,8 +196,10 @@
 	r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len,
 					  &dl.object_no, &dl.object_offset,
 					  &olen);
-	if (r < 0)
+	if (r < 0) {
+		up_read(&osdc->map_sem);
 		return -EIO;
+	}
 	dl.file_offset -= dl.object_offset;
 	dl.object_size = ceph_file_layout_object_size(ci->i_layout);
 	dl.block_size = ceph_file_layout_su(ci->i_layout);
@@ -209,8 +211,12 @@
 	snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx",
 		 ceph_ino(inode), dl.object_no);
 
-	ceph_calc_ceph_pg(&pgid, dl.object_name, osdc->osdmap,
-		ceph_file_layout_pg_pool(ci->i_layout));
+	r = ceph_calc_ceph_pg(&pgid, dl.object_name, osdc->osdmap,
+				ceph_file_layout_pg_pool(ci->i_layout));
+	if (r < 0) {
+		up_read(&osdc->map_sem);
+		return r;
+	}
 
 	dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid);
 	if (dl.osd >= 0) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 187bf21..b7bda5d 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -414,6 +414,9 @@
 {
 	struct ceph_mds_session *s;
 
+	if (mds >= mdsc->mdsmap->m_max_mds)
+		return ERR_PTR(-EINVAL);
+
 	s = kzalloc(sizeof(*s), GFP_NOFS);
 	if (!s)
 		return ERR_PTR(-ENOMEM);
@@ -1028,6 +1031,37 @@
 {
 	dout("remove_session_caps on %p\n", session);
 	iterate_session_caps(session, remove_session_caps_cb, NULL);
+
+	spin_lock(&session->s_cap_lock);
+	if (session->s_nr_caps > 0) {
+		struct super_block *sb = session->s_mdsc->fsc->sb;
+		struct inode *inode;
+		struct ceph_cap *cap, *prev = NULL;
+		struct ceph_vino vino;
+		/*
+		 * iterate_session_caps() skips inodes that are being
+		 * deleted, we need to wait until deletions are complete.
+		 * __wait_on_freeing_inode() is designed for the job,
+		 * but it is not exported, so use lookup inode function
+		 * to access it.
+		 */
+		while (!list_empty(&session->s_caps)) {
+			cap = list_entry(session->s_caps.next,
+					 struct ceph_cap, session_caps);
+			if (cap == prev)
+				break;
+			prev = cap;
+			vino = cap->ci->i_vino;
+			spin_unlock(&session->s_cap_lock);
+
+			inode = ceph_find_inode(sb, vino);
+			iput(inode);
+
+			spin_lock(&session->s_cap_lock);
+		}
+	}
+	spin_unlock(&session->s_cap_lock);
+
 	BUG_ON(session->s_nr_caps > 0);
 	BUG_ON(!list_empty(&session->s_cap_flushing));
 	cleanup_cap_releases(session);
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 6627b26..6a0951e 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -17,6 +17,7 @@
 
 #include "super.h"
 #include "mds_client.h"
+#include "cache.h"
 
 #include <linux/ceph/ceph_features.h>
 #include <linux/ceph/decode.h>
@@ -142,6 +143,8 @@
 	Opt_nodcache,
 	Opt_ino32,
 	Opt_noino32,
+	Opt_fscache,
+	Opt_nofscache
 };
 
 static match_table_t fsopt_tokens = {
@@ -167,6 +170,8 @@
 	{Opt_nodcache, "nodcache"},
 	{Opt_ino32, "ino32"},
 	{Opt_noino32, "noino32"},
+	{Opt_fscache, "fsc"},
+	{Opt_nofscache, "nofsc"},
 	{-1, NULL}
 };
 
@@ -260,6 +265,12 @@
 	case Opt_noino32:
 		fsopt->flags &= ~CEPH_MOUNT_OPT_INO32;
 		break;
+	case Opt_fscache:
+		fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
+		break;
+	case Opt_nofscache:
+		fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
+		break;
 	default:
 		BUG_ON(token);
 	}
@@ -422,6 +433,10 @@
 		seq_puts(m, ",dcache");
 	else
 		seq_puts(m, ",nodcache");
+	if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
+		seq_puts(m, ",fsc");
+	else
+		seq_puts(m, ",nofsc");
 
 	if (fsopt->wsize)
 		seq_printf(m, ",wsize=%d", fsopt->wsize);
@@ -530,11 +545,18 @@
 	if (!fsc->wb_pagevec_pool)
 		goto fail_trunc_wq;
 
+	/* setup fscache */
+	if ((fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) &&
+	    (ceph_fscache_register_fs(fsc) != 0))
+		goto fail_fscache;
+
 	/* caps */
 	fsc->min_caps = fsopt->max_readdir;
 
 	return fsc;
 
+fail_fscache:
+	ceph_fscache_unregister_fs(fsc);
 fail_trunc_wq:
 	destroy_workqueue(fsc->trunc_wq);
 fail_pg_inv_wq:
@@ -554,6 +576,8 @@
 {
 	dout("destroy_fs_client %p\n", fsc);
 
+	ceph_fscache_unregister_fs(fsc);
+
 	destroy_workqueue(fsc->wb_wq);
 	destroy_workqueue(fsc->pg_inv_wq);
 	destroy_workqueue(fsc->trunc_wq);
@@ -588,6 +612,8 @@
 
 static int __init init_caches(void)
 {
+	int error = -ENOMEM;
+
 	ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
 				      sizeof(struct ceph_inode_info),
 				      __alignof__(struct ceph_inode_info),
@@ -611,15 +637,17 @@
 	if (ceph_file_cachep == NULL)
 		goto bad_file;
 
-	return 0;
+	if ((error = ceph_fscache_register()))
+		goto bad_file;
 
+	return 0;
 bad_file:
 	kmem_cache_destroy(ceph_dentry_cachep);
 bad_dentry:
 	kmem_cache_destroy(ceph_cap_cachep);
 bad_cap:
 	kmem_cache_destroy(ceph_inode_cachep);
-	return -ENOMEM;
+	return error;
 }
 
 static void destroy_caches(void)
@@ -629,10 +657,13 @@
 	 * destroy cache.
 	 */
 	rcu_barrier();
+
 	kmem_cache_destroy(ceph_inode_cachep);
 	kmem_cache_destroy(ceph_cap_cachep);
 	kmem_cache_destroy(ceph_dentry_cachep);
 	kmem_cache_destroy(ceph_file_cachep);
+
+	ceph_fscache_unregister();
 }
 
 
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index cbded57..6014b0a 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -16,6 +16,10 @@
 
 #include <linux/ceph/libceph.h>
 
+#ifdef CONFIG_CEPH_FSCACHE
+#include <linux/fscache.h>
+#endif
+
 /* f_type in struct statfs */
 #define CEPH_SUPER_MAGIC 0x00c36400
 
@@ -29,6 +33,7 @@
 #define CEPH_MOUNT_OPT_NOASYNCREADDIR  (1<<7) /* no dcache readdir */
 #define CEPH_MOUNT_OPT_INO32           (1<<8) /* 32 bit inos */
 #define CEPH_MOUNT_OPT_DCACHE          (1<<9) /* use dcache for readdir etc */
+#define CEPH_MOUNT_OPT_FSCACHE         (1<<10) /* use fscache */
 
 #define CEPH_MOUNT_OPT_DEFAULT    (CEPH_MOUNT_OPT_RBYTES)
 
@@ -90,6 +95,11 @@
 	struct dentry *debugfs_bdi;
 	struct dentry *debugfs_mdsc, *debugfs_mdsmap;
 #endif
+
+#ifdef CONFIG_CEPH_FSCACHE
+	struct fscache_cookie *fscache;
+	struct workqueue_struct *revalidate_wq;
+#endif
 };
 
 
@@ -288,6 +298,7 @@
 
 	int i_nr_by_mode[CEPH_FILE_MODE_NUM];  /* open file counts */
 
+	struct mutex i_truncate_mutex;
 	u32 i_truncate_seq;        /* last truncate to smaller size */
 	u64 i_truncate_size;       /*  and the size we last truncated down to */
 	int i_truncate_pending;    /*  still need to call vmtruncate */
@@ -319,6 +330,12 @@
 
 	struct work_struct i_vmtruncate_work;
 
+#ifdef CONFIG_CEPH_FSCACHE
+	struct fscache_cookie *fscache;
+	u32 i_fscache_gen; /* sequence, for delayed fscache validate */
+	struct work_struct i_revalidate_work;
+#endif
+
 	struct inode vfs_inode; /* at end */
 };
 
diff --git a/fs/dcache.c b/fs/dcache.c
index ca8e9cd..4d9df3c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -88,6 +88,35 @@
 
 static struct kmem_cache *dentry_cache __read_mostly;
 
+/**
+ * read_seqbegin_or_lock - begin a sequence number check or locking block
+ * lock: sequence lock
+ * seq : sequence number to be checked
+ *
+ * First try it once optimistically without taking the lock. If that fails,
+ * take the lock. The sequence number is also used as a marker for deciding
+ * whether to be a reader (even) or writer (odd).
+ * N.B. seq must be initialized to an even number to begin with.
+ */
+static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
+{
+	if (!(*seq & 1))	/* Even */
+		*seq = read_seqbegin(lock);
+	else			/* Odd */
+		write_seqlock(lock);
+}
+
+static inline int need_seqretry(seqlock_t *lock, int seq)
+{
+	return !(seq & 1) && read_seqretry(lock, seq);
+}
+
+static inline void done_seqretry(seqlock_t *lock, int seq)
+{
+	if (seq & 1)
+		write_sequnlock(lock);
+}
+
 /*
  * This is the single most critical data structure when it comes
  * to the dcache: the hashtable for lookups. Somebody should try
@@ -1012,7 +1041,7 @@
  * the parenthood after dropping the lock and check
  * that the sequence number still matches.
  */
-static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
+static struct dentry *try_to_ascend(struct dentry *old, unsigned seq)
 {
 	struct dentry *new = old->d_parent;
 
@@ -1026,7 +1055,7 @@
 	 */
 	if (new != old->d_parent ||
 		 (old->d_flags & DCACHE_DENTRY_KILLED) ||
-		 (!locked && read_seqretry(&rename_lock, seq))) {
+		 need_seqretry(&rename_lock, seq)) {
 		spin_unlock(&new->d_lock);
 		new = NULL;
 	}
@@ -1063,13 +1092,12 @@
 {
 	struct dentry *this_parent;
 	struct list_head *next;
-	unsigned seq;
-	int locked = 0;
+	unsigned seq = 0;
 	enum d_walk_ret ret;
 	bool retry = true;
 
-	seq = read_seqbegin(&rename_lock);
 again:
+	read_seqbegin_or_lock(&rename_lock, &seq);
 	this_parent = parent;
 	spin_lock(&this_parent->d_lock);
 
@@ -1123,13 +1151,13 @@
 	 */
 	if (this_parent != parent) {
 		struct dentry *child = this_parent;
-		this_parent = try_to_ascend(this_parent, locked, seq);
+		this_parent = try_to_ascend(this_parent, seq);
 		if (!this_parent)
 			goto rename_retry;
 		next = child->d_u.d_child.next;
 		goto resume;
 	}
-	if (!locked && read_seqretry(&rename_lock, seq)) {
+	if (need_seqretry(&rename_lock, seq)) {
 		spin_unlock(&this_parent->d_lock);
 		goto rename_retry;
 	}
@@ -1138,17 +1166,13 @@
 
 out_unlock:
 	spin_unlock(&this_parent->d_lock);
-	if (locked)
-		write_sequnlock(&rename_lock);
+	done_seqretry(&rename_lock, seq);
 	return;
 
 rename_retry:
 	if (!retry)
 		return;
-	if (locked)
-		goto again;
-	locked = 1;
-	write_seqlock(&rename_lock);
+	seq = 1;
 	goto again;
 }
 
@@ -2647,9 +2671,39 @@
 	return 0;
 }
 
+/**
+ * prepend_name - prepend a pathname in front of current buffer pointer
+ * buffer: buffer pointer
+ * buflen: allocated length of the buffer
+ * name:   name string and length qstr structure
+ *
+ * With RCU path tracing, it may race with d_move(). Use ACCESS_ONCE() to
+ * make sure that either the old or the new name pointer and length are
+ * fetched. However, there may be mismatch between length and pointer.
+ * The length cannot be trusted, we need to copy it byte-by-byte until
+ * the length is reached or a null byte is found. It also prepends "/" at
+ * the beginning of the name. The sequence number check at the caller will
+ * retry it again when a d_move() does happen. So any garbage in the buffer
+ * due to mismatched pointer and length will be discarded.
+ */
 static int prepend_name(char **buffer, int *buflen, struct qstr *name)
 {
-	return prepend(buffer, buflen, name->name, name->len);
+	const char *dname = ACCESS_ONCE(name->name);
+	u32 dlen = ACCESS_ONCE(name->len);
+	char *p;
+
+	if (*buflen < dlen + 1)
+		return -ENAMETOOLONG;
+	*buflen -= dlen + 1;
+	p = *buffer -= dlen + 1;
+	*p++ = '/';
+	while (dlen--) {
+		char c = *dname++;
+		if (!c)
+			break;
+		*p++ = c;
+	}
+	return 0;
 }
 
 /**
@@ -2659,7 +2713,14 @@
  * @buffer: pointer to the end of the buffer
  * @buflen: pointer to buffer length
  *
- * Caller holds the rename_lock.
+ * The function tries to write out the pathname without taking any lock other
+ * than the RCU read lock to make sure that dentries won't go away. It only
+ * checks the sequence number of the global rename_lock as any change in the
+ * dentry's d_seq will be preceded by changes in the rename_lock sequence
+ * number. If the sequence number had been change, it will restart the whole
+ * pathname back-tracing sequence again. It performs a total of 3 trials of
+ * lockless back-tracing sequences before falling back to take the
+ * rename_lock.
  */
 static int prepend_path(const struct path *path,
 			const struct path *root,
@@ -2668,54 +2729,66 @@
 	struct dentry *dentry = path->dentry;
 	struct vfsmount *vfsmnt = path->mnt;
 	struct mount *mnt = real_mount(vfsmnt);
-	bool slash = false;
 	int error = 0;
+	unsigned seq = 0;
+	char *bptr;
+	int blen;
 
+	rcu_read_lock();
+restart:
+	bptr = *buffer;
+	blen = *buflen;
+	read_seqbegin_or_lock(&rename_lock, &seq);
 	while (dentry != root->dentry || vfsmnt != root->mnt) {
 		struct dentry * parent;
 
 		if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
 			/* Global root? */
-			if (!mnt_has_parent(mnt))
-				goto global_root;
-			dentry = mnt->mnt_mountpoint;
-			mnt = mnt->mnt_parent;
-			vfsmnt = &mnt->mnt;
-			continue;
+			if (mnt_has_parent(mnt)) {
+				dentry = mnt->mnt_mountpoint;
+				mnt = mnt->mnt_parent;
+				vfsmnt = &mnt->mnt;
+				continue;
+			}
+			/*
+			 * Filesystems needing to implement special "root names"
+			 * should do so with ->d_dname()
+			 */
+			if (IS_ROOT(dentry) &&
+			   (dentry->d_name.len != 1 ||
+			    dentry->d_name.name[0] != '/')) {
+				WARN(1, "Root dentry has weird name <%.*s>\n",
+				     (int) dentry->d_name.len,
+				     dentry->d_name.name);
+			}
+			if (!error)
+				error = is_mounted(vfsmnt) ? 1 : 2;
+			break;
 		}
 		parent = dentry->d_parent;
 		prefetch(parent);
-		spin_lock(&dentry->d_lock);
-		error = prepend_name(buffer, buflen, &dentry->d_name);
-		spin_unlock(&dentry->d_lock);
-		if (!error)
-			error = prepend(buffer, buflen, "/", 1);
+		error = prepend_name(&bptr, &blen, &dentry->d_name);
 		if (error)
 			break;
 
-		slash = true;
 		dentry = parent;
 	}
-
-	if (!error && !slash)
-		error = prepend(buffer, buflen, "/", 1);
-
-	return error;
-
-global_root:
-	/*
-	 * Filesystems needing to implement special "root names"
-	 * should do so with ->d_dname()
-	 */
-	if (IS_ROOT(dentry) &&
-	    (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) {
-		WARN(1, "Root dentry has weird name <%.*s>\n",
-		     (int) dentry->d_name.len, dentry->d_name.name);
+	if (!(seq & 1))
+		rcu_read_unlock();
+	if (need_seqretry(&rename_lock, seq)) {
+		seq = 1;
+		goto restart;
 	}
-	if (!slash)
-		error = prepend(buffer, buflen, "/", 1);
-	if (!error)
-		error = is_mounted(vfsmnt) ? 1 : 2;
+	done_seqretry(&rename_lock, seq);
+
+	if (error >= 0 && bptr == *buffer) {
+		if (--blen < 0)
+			error = -ENAMETOOLONG;
+		else
+			*--bptr = '/';
+	}
+	*buffer = bptr;
+	*buflen = blen;
 	return error;
 }
 
@@ -2744,9 +2817,7 @@
 
 	prepend(&res, &buflen, "\0", 1);
 	br_read_lock(&vfsmount_lock);
-	write_seqlock(&rename_lock);
 	error = prepend_path(path, root, &res, &buflen);
-	write_sequnlock(&rename_lock);
 	br_read_unlock(&vfsmount_lock);
 
 	if (error < 0)
@@ -2765,9 +2836,7 @@
 
 	prepend(&res, &buflen, "\0", 1);
 	br_read_lock(&vfsmount_lock);
-	write_seqlock(&rename_lock);
 	error = prepend_path(path, &root, &res, &buflen);
-	write_sequnlock(&rename_lock);
 	br_read_unlock(&vfsmount_lock);
 
 	if (error > 1)
@@ -2833,9 +2902,7 @@
 
 	get_fs_root(current->fs, &root);
 	br_read_lock(&vfsmount_lock);
-	write_seqlock(&rename_lock);
 	error = path_with_deleted(path, &root, &res, &buflen);
-	write_sequnlock(&rename_lock);
 	br_read_unlock(&vfsmount_lock);
 	if (error < 0)
 		res = ERR_PTR(error);
@@ -2870,10 +2937,10 @@
 	char *end = buffer + buflen;
 	/* these dentries are never renamed, so d_lock is not needed */
 	if (prepend(&end, &buflen, " (deleted)", 11) ||
-	    prepend_name(&end, &buflen, &dentry->d_name) ||
+	    prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) ||
 	    prepend(&end, &buflen, "/", 1))  
 		end = ERR_PTR(-ENAMETOOLONG);
-	return end;  
+	return end;
 }
 
 /*
@@ -2881,30 +2948,42 @@
  */
 static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
 {
-	char *end = buf + buflen;
-	char *retval;
+	char *end, *retval;
+	int len, seq = 0;
+	int error = 0;
 
-	prepend(&end, &buflen, "\0", 1);
+	rcu_read_lock();
+restart:
+	end = buf + buflen;
+	len = buflen;
+	prepend(&end, &len, "\0", 1);
 	if (buflen < 1)
 		goto Elong;
 	/* Get '/' right */
 	retval = end-1;
 	*retval = '/';
-
+	read_seqbegin_or_lock(&rename_lock, &seq);
 	while (!IS_ROOT(dentry)) {
 		struct dentry *parent = dentry->d_parent;
 		int error;
 
 		prefetch(parent);
-		spin_lock(&dentry->d_lock);
-		error = prepend_name(&end, &buflen, &dentry->d_name);
-		spin_unlock(&dentry->d_lock);
-		if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
-			goto Elong;
+		error = prepend_name(&end, &len, &dentry->d_name);
+		if (error)
+			break;
 
 		retval = end;
 		dentry = parent;
 	}
+	if (!(seq & 1))
+		rcu_read_unlock();
+	if (need_seqretry(&rename_lock, seq)) {
+		seq = 1;
+		goto restart;
+	}
+	done_seqretry(&rename_lock, seq);
+	if (error)
+		goto Elong;
 	return retval;
 Elong:
 	return ERR_PTR(-ENAMETOOLONG);
@@ -2912,13 +2991,7 @@
 
 char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
 {
-	char *retval;
-
-	write_seqlock(&rename_lock);
-	retval = __dentry_path(dentry, buf, buflen);
-	write_sequnlock(&rename_lock);
-
-	return retval;
+	return __dentry_path(dentry, buf, buflen);
 }
 EXPORT_SYMBOL(dentry_path_raw);
 
@@ -2927,7 +3000,6 @@
 	char *p = NULL;
 	char *retval;
 
-	write_seqlock(&rename_lock);
 	if (d_unlinked(dentry)) {
 		p = buf + buflen;
 		if (prepend(&p, &buflen, "//deleted", 10) != 0)
@@ -2935,7 +3007,6 @@
 		buflen++;
 	}
 	retval = __dentry_path(dentry, buf, buflen);
-	write_sequnlock(&rename_lock);
 	if (!IS_ERR(retval) && p)
 		*p = '/';	/* restore '/' overriden with '\0' */
 	return retval;
@@ -2974,7 +3045,6 @@
 
 	error = -ENOENT;
 	br_read_lock(&vfsmount_lock);
-	write_seqlock(&rename_lock);
 	if (!d_unlinked(pwd.dentry)) {
 		unsigned long len;
 		char *cwd = page + PAGE_SIZE;
@@ -2982,7 +3052,6 @@
 
 		prepend(&cwd, &buflen, "\0", 1);
 		error = prepend_path(&pwd, &root, &cwd, &buflen);
-		write_sequnlock(&rename_lock);
 		br_read_unlock(&vfsmount_lock);
 
 		if (error < 0)
@@ -3003,7 +3072,6 @@
 				error = -EFAULT;
 		}
 	} else {
-		write_sequnlock(&rename_lock);
 		br_read_unlock(&vfsmount_lock);
 	}
 
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1782023..0e04142 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -544,6 +544,7 @@
  */
 static int sb_init_dio_done_wq(struct super_block *sb)
 {
+	struct workqueue_struct *old;
 	struct workqueue_struct *wq = alloc_workqueue("dio/%s",
 						      WQ_MEM_RECLAIM, 0,
 						      sb->s_id);
@@ -552,9 +553,9 @@
 	/*
 	 * This has to be atomic as more DIOs can race to create the workqueue
 	 */
-	cmpxchg(&sb->s_dio_done_wq, NULL, wq);
+	old = cmpxchg(&sb->s_dio_done_wq, NULL, wq);
 	/* Someone created workqueue before us? Free ours... */
-	if (wq != sb->s_dio_done_wq)
+	if (old)
 		destroy_workqueue(wq);
 	return 0;
 }
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 293bc2e..a235f00 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -231,7 +231,7 @@
 	int result = 0;
 
 	buf->sequence++;
-	if (buf->ino == ino) {
+	if (buf->ino == ino && len <= NAME_MAX) {
 		memcpy(buf->name, name, len);
 		buf->name[len] = '\0';
 		buf->found = 1;
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 0e91a3c..318e843 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -558,3 +558,74 @@
 
 	_leave("");
 }
+
+/*
+ * check the consistency between the netfs inode and the backing cache
+ *
+ * NOTE: it only serves no-index type
+ */
+int __fscache_check_consistency(struct fscache_cookie *cookie)
+{
+	struct fscache_operation *op;
+	struct fscache_object *object;
+	int ret;
+
+	_enter("%p,", cookie);
+
+	ASSERTCMP(cookie->def->type, ==, FSCACHE_COOKIE_TYPE_DATAFILE);
+
+	if (fscache_wait_for_deferred_lookup(cookie) < 0)
+		return -ERESTARTSYS;
+
+	if (hlist_empty(&cookie->backing_objects))
+		return 0;
+
+	op = kzalloc(sizeof(*op), GFP_NOIO | __GFP_NOMEMALLOC | __GFP_NORETRY);
+	if (!op)
+		return -ENOMEM;
+
+	fscache_operation_init(op, NULL, NULL);
+	op->flags = FSCACHE_OP_MYTHREAD |
+		(1 << FSCACHE_OP_WAITING);
+
+	spin_lock(&cookie->lock);
+
+	if (hlist_empty(&cookie->backing_objects))
+		goto inconsistent;
+	object = hlist_entry(cookie->backing_objects.first,
+			     struct fscache_object, cookie_link);
+	if (test_bit(FSCACHE_IOERROR, &object->cache->flags))
+		goto inconsistent;
+
+	op->debug_id = atomic_inc_return(&fscache_op_debug_id);
+
+	atomic_inc(&cookie->n_active);
+	if (fscache_submit_op(object, op) < 0)
+		goto submit_failed;
+
+	/* the work queue now carries its own ref on the object */
+	spin_unlock(&cookie->lock);
+
+	ret = fscache_wait_for_operation_activation(object, op,
+						    NULL, NULL, NULL);
+	if (ret == 0) {
+		/* ask the cache to honour the operation */
+		ret = object->cache->ops->check_consistency(op);
+		fscache_op_complete(op, false);
+	} else if (ret == -ENOBUFS) {
+		ret = 0;
+	}
+
+	fscache_put_operation(op);
+	_leave(" = %d", ret);
+	return ret;
+
+submit_failed:
+	atomic_dec(&cookie->n_active);
+inconsistent:
+	spin_unlock(&cookie->lock);
+	kfree(op);
+	_leave(" = -ESTALE");
+	return -ESTALE;
+}
+EXPORT_SYMBOL(__fscache_check_consistency);
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h
index 12d505b..4226f66 100644
--- a/fs/fscache/internal.h
+++ b/fs/fscache/internal.h
@@ -130,6 +130,12 @@
 /*
  * page.c
  */
+extern int fscache_wait_for_deferred_lookup(struct fscache_cookie *);
+extern int fscache_wait_for_operation_activation(struct fscache_object *,
+						 struct fscache_operation *,
+						 atomic_t *,
+						 atomic_t *,
+						 void (*)(struct fscache_operation *));
 extern void fscache_invalidate_writes(struct fscache_cookie *);
 
 /*
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index d479ab3..8702b73 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -278,7 +278,7 @@
 /*
  * wait for a deferred lookup to complete
  */
-static int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie)
+int fscache_wait_for_deferred_lookup(struct fscache_cookie *cookie)
 {
 	unsigned long jif;
 
@@ -322,42 +322,46 @@
 /*
  * wait for an object to become active (or dead)
  */
-static int fscache_wait_for_retrieval_activation(struct fscache_object *object,
-						 struct fscache_retrieval *op,
-						 atomic_t *stat_op_waits,
-						 atomic_t *stat_object_dead)
+int fscache_wait_for_operation_activation(struct fscache_object *object,
+					  struct fscache_operation *op,
+					  atomic_t *stat_op_waits,
+					  atomic_t *stat_object_dead,
+					  void (*do_cancel)(struct fscache_operation *))
 {
 	int ret;
 
-	if (!test_bit(FSCACHE_OP_WAITING, &op->op.flags))
+	if (!test_bit(FSCACHE_OP_WAITING, &op->flags))
 		goto check_if_dead;
 
 	_debug(">>> WT");
-	fscache_stat(stat_op_waits);
-	if (wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING,
+	if (stat_op_waits)
+		fscache_stat(stat_op_waits);
+	if (wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
 			fscache_wait_bit_interruptible,
 			TASK_INTERRUPTIBLE) != 0) {
-		ret = fscache_cancel_op(&op->op, fscache_do_cancel_retrieval);
+		ret = fscache_cancel_op(op, do_cancel);
 		if (ret == 0)
 			return -ERESTARTSYS;
 
 		/* it's been removed from the pending queue by another party,
 		 * so we should get to run shortly */
-		wait_on_bit(&op->op.flags, FSCACHE_OP_WAITING,
+		wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
 			    fscache_wait_bit, TASK_UNINTERRUPTIBLE);
 	}
 	_debug("<<< GO");
 
 check_if_dead:
-	if (op->op.state == FSCACHE_OP_ST_CANCELLED) {
-		fscache_stat(stat_object_dead);
+	if (op->state == FSCACHE_OP_ST_CANCELLED) {
+		if (stat_object_dead)
+			fscache_stat(stat_object_dead);
 		_leave(" = -ENOBUFS [cancelled]");
 		return -ENOBUFS;
 	}
 	if (unlikely(fscache_object_is_dead(object))) {
-		pr_err("%s() = -ENOBUFS [obj dead %d]\n", __func__, op->op.state);
-		fscache_cancel_op(&op->op, fscache_do_cancel_retrieval);
-		fscache_stat(stat_object_dead);
+		pr_err("%s() = -ENOBUFS [obj dead %d]\n", __func__, op->state);
+		fscache_cancel_op(op, do_cancel);
+		if (stat_object_dead)
+			fscache_stat(stat_object_dead);
 		return -ENOBUFS;
 	}
 	return 0;
@@ -432,10 +436,11 @@
 
 	/* we wait for the operation to become active, and then process it
 	 * *here*, in this thread, and not in the thread pool */
-	ret = fscache_wait_for_retrieval_activation(
-		object, op,
+	ret = fscache_wait_for_operation_activation(
+		object, &op->op,
 		__fscache_stat(&fscache_n_retrieval_op_waits),
-		__fscache_stat(&fscache_n_retrievals_object_dead));
+		__fscache_stat(&fscache_n_retrievals_object_dead),
+		fscache_do_cancel_retrieval);
 	if (ret < 0)
 		goto error;
 
@@ -557,10 +562,11 @@
 
 	/* we wait for the operation to become active, and then process it
 	 * *here*, in this thread, and not in the thread pool */
-	ret = fscache_wait_for_retrieval_activation(
-		object, op,
+	ret = fscache_wait_for_operation_activation(
+		object, &op->op,
 		__fscache_stat(&fscache_n_retrieval_op_waits),
-		__fscache_stat(&fscache_n_retrievals_object_dead));
+		__fscache_stat(&fscache_n_retrievals_object_dead),
+		fscache_do_cancel_retrieval);
 	if (ret < 0)
 		goto error;
 
@@ -658,10 +664,11 @@
 
 	fscache_stat(&fscache_n_alloc_ops);
 
-	ret = fscache_wait_for_retrieval_activation(
-		object, op,
+	ret = fscache_wait_for_operation_activation(
+		object, &op->op,
 		__fscache_stat(&fscache_n_alloc_op_waits),
-		__fscache_stat(&fscache_n_allocs_object_dead));
+		__fscache_stat(&fscache_n_allocs_object_dead),
+		fscache_do_cancel_retrieval);
 	if (ret < 0)
 		goto error;
 
@@ -694,6 +701,22 @@
 EXPORT_SYMBOL(__fscache_alloc_page);
 
 /*
+ * Unmark pages allocate in the readahead code path (via:
+ * fscache_readpages_or_alloc) after delegating to the base filesystem
+ */
+void __fscache_readpages_cancel(struct fscache_cookie *cookie,
+				struct list_head *pages)
+{
+	struct page *page;
+
+	list_for_each_entry(page, pages, lru) {
+		if (PageFsCache(page))
+			__fscache_uncache_page(cookie, page);
+	}
+}
+EXPORT_SYMBOL(__fscache_readpages_cancel);
+
+/*
  * release a write op reference
  */
 static void fscache_release_write_op(struct fscache_operation *_op)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 1d55f94..ef74ad5 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1765,11 +1765,9 @@
 /* Look up request on processing list by unique ID */
 static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
 {
-	struct list_head *entry;
+	struct fuse_req *req;
 
-	list_for_each(entry, &fc->processing) {
-		struct fuse_req *req;
-		req = list_entry(entry, struct fuse_req, list);
+	list_for_each_entry(req, &fc->processing, list) {
 		if (req->in.h.unique == unique || req->intr_unique == unique)
 			return req;
 	}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 0e6961a..3ac9108 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1177,6 +1177,8 @@
 			return -EIO;
 		if (reclen > nbytes)
 			break;
+		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
+			return -EIO;
 
 		if (!dir_emit(ctx, dirent->name, dirent->namelen,
 			       dirent->ino, dirent->type))
@@ -1315,6 +1317,8 @@
 			return -EIO;
 		if (reclen > nbytes)
 			break;
+		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
+			return -EIO;
 
 		if (!over) {
 			/* We fill entries into dstbuf only as much as
@@ -1585,6 +1589,7 @@
 		    struct file *file)
 {
 	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_inode *fi = get_fuse_inode(inode);
 	struct fuse_req *req;
 	struct fuse_setattr_in inarg;
 	struct fuse_attr_out outarg;
@@ -1612,8 +1617,10 @@
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
-	if (is_truncate)
+	if (is_truncate) {
 		fuse_set_nowrite(inode);
+		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+	}
 
 	memset(&inarg, 0, sizeof(inarg));
 	memset(&outarg, 0, sizeof(outarg));
@@ -1675,12 +1682,14 @@
 		invalidate_inode_pages2(inode->i_mapping);
 	}
 
+	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
 	return 0;
 
 error:
 	if (is_truncate)
 		fuse_release_nowrite(inode);
 
+	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
 	return err;
 }
 
@@ -1744,6 +1753,8 @@
 		fc->no_setxattr = 1;
 		err = -EOPNOTSUPP;
 	}
+	if (!err)
+		fuse_invalidate_attr(inode);
 	return err;
 }
 
@@ -1873,6 +1884,8 @@
 		fc->no_removexattr = 1;
 		err = -EOPNOTSUPP;
 	}
+	if (!err)
+		fuse_invalidate_attr(inode);
 	return err;
 }
 
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 5c121fe..d409dea 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -629,7 +629,8 @@
 	struct fuse_inode *fi = get_fuse_inode(inode);
 
 	spin_lock(&fc->lock);
-	if (attr_ver == fi->attr_version && size < inode->i_size) {
+	if (attr_ver == fi->attr_version && size < inode->i_size &&
+	    !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
 		fi->attr_version = ++fc->attr_version;
 		i_size_write(inode, size);
 	}
@@ -1032,12 +1033,16 @@
 {
 	struct inode *inode = mapping->host;
 	struct fuse_conn *fc = get_fuse_conn(inode);
+	struct fuse_inode *fi = get_fuse_inode(inode);
 	int err = 0;
 	ssize_t res = 0;
 
 	if (is_bad_inode(inode))
 		return -EIO;
 
+	if (inode->i_size < pos + iov_iter_count(ii))
+		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+
 	do {
 		struct fuse_req *req;
 		ssize_t count;
@@ -1073,6 +1078,7 @@
 	if (res > 0)
 		fuse_write_update_size(inode, pos);
 
+	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
 	fuse_invalidate_attr(inode);
 
 	return res > 0 ? res : err;
@@ -1529,7 +1535,6 @@
 
 	inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK);
 	inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
-	end_page_writeback(page);
 
 	spin_lock(&fc->lock);
 	list_add(&req->writepages_entry, &fi->writepages);
@@ -1537,6 +1542,8 @@
 	fuse_flush_writepages(inode);
 	spin_unlock(&fc->lock);
 
+	end_page_writeback(page);
+
 	return 0;
 
 err_free:
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index fde7249..5ced199 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -115,6 +115,8 @@
 enum {
 	/** Advise readdirplus  */
 	FUSE_I_ADVISE_RDPLUS,
+	/** An operation changing file size is in progress  */
+	FUSE_I_SIZE_UNSTABLE,
 };
 
 struct fuse_conn;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 0b57859..e0fe703 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -201,7 +201,8 @@
 	struct timespec old_mtime;
 
 	spin_lock(&fc->lock);
-	if (attr_version != 0 && fi->attr_version > attr_version) {
+	if ((attr_version != 0 && fi->attr_version > attr_version) ||
+	    test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
 		spin_unlock(&fc->lock);
 		return;
 	}
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index ee48ad3..1f7d805 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -122,14 +122,13 @@
 }
 
 /**
- * gfs2_writeback_writepage - Write page for writeback mappings
+ * gfs2_writepage - Write page for writeback mappings
  * @page: The page
  * @wbc: The writeback control
  *
  */
 
-static int gfs2_writeback_writepage(struct page *page,
-				    struct writeback_control *wbc)
+static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
 {
 	int ret;
 
@@ -141,32 +140,6 @@
 }
 
 /**
- * gfs2_ordered_writepage - Write page for ordered data files
- * @page: The page to write
- * @wbc: The writeback control
- *
- */
-
-static int gfs2_ordered_writepage(struct page *page,
-				  struct writeback_control *wbc)
-{
-	struct inode *inode = page->mapping->host;
-	struct gfs2_inode *ip = GFS2_I(inode);
-	int ret;
-
-	ret = gfs2_writepage_common(page, wbc);
-	if (ret <= 0)
-		return ret;
-
-	if (!page_has_buffers(page)) {
-		create_empty_buffers(page, inode->i_sb->s_blocksize,
-				     (1 << BH_Dirty)|(1 << BH_Uptodate));
-	}
-	gfs2_page_add_databufs(ip, page, 0, inode->i_sb->s_blocksize-1);
-	return block_write_full_page(page, gfs2_get_block_noalloc, wbc);
-}
-
-/**
  * __gfs2_jdata_writepage - The core of jdata writepage
  * @page: The page to write
  * @wbc: The writeback control
@@ -842,6 +815,8 @@
 	unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
 	unsigned int to = from + len;
 	int ret;
+	struct gfs2_trans *tr = current->journal_info;
+	BUG_ON(!tr);
 
 	BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL);
 
@@ -852,8 +827,6 @@
 		goto failed;
 	}
 
-	gfs2_trans_add_meta(ip->i_gl, dibh);
-
 	if (gfs2_is_stuffed(ip))
 		return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page);
 
@@ -861,6 +834,11 @@
 		gfs2_page_add_databufs(ip, page, from, to);
 
 	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+	if (tr->tr_num_buf_new)
+		__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+	else
+		gfs2_trans_add_meta(ip->i_gl, dibh);
+
 
 	if (inode == sdp->sd_rindex) {
 		adjust_fs_space(inode);
@@ -1107,7 +1085,7 @@
 }
 
 static const struct address_space_operations gfs2_writeback_aops = {
-	.writepage = gfs2_writeback_writepage,
+	.writepage = gfs2_writepage,
 	.writepages = gfs2_writepages,
 	.readpage = gfs2_readpage,
 	.readpages = gfs2_readpages,
@@ -1123,7 +1101,7 @@
 };
 
 static const struct address_space_operations gfs2_ordered_aops = {
-	.writepage = gfs2_ordered_writepage,
+	.writepage = gfs2_writepage,
 	.writepages = gfs2_writepages,
 	.readpage = gfs2_readpage,
 	.readpages = gfs2_readpages,
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 72c3866..0621b46 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -650,7 +650,7 @@
 {
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
-	int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
+	int sync_state = inode->i_state & I_DIRTY;
 	struct gfs2_inode *ip = GFS2_I(inode);
 	int ret = 0, ret1 = 0;
 
@@ -660,6 +660,8 @@
 			return ret1;
 	}
 
+	if (!gfs2_is_jdata(ip))
+		sync_state &= ~I_DIRTY_PAGES;
 	if (datasync)
 		sync_state &= ~I_DIRTY_SYNC;
 
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 544a809..722329c 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1411,7 +1411,6 @@
 		if (demote_ok(gl))
 			handle_callback(gl, LM_ST_UNLOCKED, 0, false);
 		WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags));
-		smp_mb__after_clear_bit();
 		if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
 			gfs2_glock_put_nolock(gl);
 		spin_unlock(&gl->gl_spin);
@@ -1488,7 +1487,7 @@
 
 	rcu_read_lock();
 	hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) {
-		if ((gl->gl_sbd == sdp) && atomic_read(&gl->gl_ref))
+		if ((gl->gl_sbd == sdp) && atomic_inc_not_zero(&gl->gl_ref))
 			examiner(gl);
 	}
 	rcu_read_unlock();
@@ -1508,18 +1507,17 @@
  * thaw_glock - thaw out a glock which has an unprocessed reply waiting
  * @gl: The glock to thaw
  *
- * N.B. When we freeze a glock, we leave a ref to the glock outstanding,
- * so this has to result in the ref count being dropped by one.
  */
 
 static void thaw_glock(struct gfs2_glock *gl)
 {
 	if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))
-		return;
+		goto out;
 	set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
-	gfs2_glock_hold(gl);
-	if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
+	if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) {
+out:
 		gfs2_glock_put(gl);
+	}
 }
 
 /**
@@ -1536,7 +1534,6 @@
 	if (gl->gl_state != LM_ST_UNLOCKED)
 		handle_callback(gl, LM_ST_UNLOCKED, 0, false);
 	spin_unlock(&gl->gl_spin);
-	gfs2_glock_hold(gl);
 	if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
 		gfs2_glock_put(gl);
 }
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 17c5b5d..010b9fb 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -579,6 +579,24 @@
 	return error;
 }
 
+/**
+ * gfs2_meta_sync - Sync all buffers associated with a glock
+ * @gl: The glock
+ *
+ */
+
+static void gfs2_meta_sync(struct gfs2_glock *gl)
+{
+	struct address_space *mapping = gfs2_glock2aspace(gl);
+	int error;
+
+	filemap_fdatawrite(mapping);
+	error = filemap_fdatawait(mapping);
+
+	if (error)
+		gfs2_io_error(gl->gl_sbd);
+}
+
 static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
 {
 	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 0da3906..9324150 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -98,24 +98,6 @@
 };
 
 /**
- * gfs2_meta_sync - Sync all buffers associated with a glock
- * @gl: The glock
- *
- */
-
-void gfs2_meta_sync(struct gfs2_glock *gl)
-{
-	struct address_space *mapping = gfs2_glock2aspace(gl);
-	int error;
-
-	filemap_fdatawrite(mapping);
-	error = filemap_fdatawait(mapping);
-
-	if (error)
-		gfs2_io_error(gl->gl_sbd);
-}
-
-/**
  * gfs2_getbuf - Get a buffer with a given address space
  * @gl: the glock
  * @blkno: the block number (filesystem scope)
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index 0d4c843..4823b93 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -48,21 +48,17 @@
 		return inode->i_sb->s_fs_info;
 }
 
-void gfs2_meta_sync(struct gfs2_glock *gl);
-
-struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno);
-int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno,
-		   int flags, struct buffer_head **bhp);
-int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
-struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create);
-
-void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr,
-			      int meta);
-
-void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
-
-int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
-			      struct buffer_head **bhp);
+extern struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno);
+extern int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
+			  struct buffer_head **bhp);
+extern int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
+extern struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno,
+				       int create);
+extern void gfs2_remove_from_journal(struct buffer_head *bh,
+				     struct gfs2_trans *tr, int meta);
+extern void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
+extern int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
+				     struct buffer_head **bhp);
 
 static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
 					 struct buffer_head **bhp)
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 0262c19..19ff5e8 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -646,6 +646,48 @@
 	return error;
 }
 
+/**
+ * check_journal_clean - Make sure a journal is clean for a spectator mount
+ * @sdp: The GFS2 superblock
+ * @jd: The journal descriptor
+ *
+ * Returns: 0 if the journal is clean or locked, else an error
+ */
+static int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
+{
+	int error;
+	struct gfs2_holder j_gh;
+	struct gfs2_log_header_host head;
+	struct gfs2_inode *ip;
+
+	ip = GFS2_I(jd->jd_inode);
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP |
+				   GL_EXACT | GL_NOCACHE, &j_gh);
+	if (error) {
+		fs_err(sdp, "Error locking journal for spectator mount.\n");
+		return -EPERM;
+	}
+	error = gfs2_jdesc_check(jd);
+	if (error) {
+		fs_err(sdp, "Error checking journal for spectator mount.\n");
+		goto out_unlock;
+	}
+	error = gfs2_find_jhead(jd, &head);
+	if (error) {
+		fs_err(sdp, "Error parsing journal for spectator mount.\n");
+		goto out_unlock;
+	}
+	if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
+		error = -EPERM;
+		fs_err(sdp, "jid=%u: Journal is dirty, so the first mounter "
+		       "must not be a spectator.\n", jd->jd_jid);
+	}
+
+out_unlock:
+	gfs2_glock_dq_uninit(&j_gh);
+	return error;
+}
+
 static int init_journal(struct gfs2_sbd *sdp, int undo)
 {
 	struct inode *master = sdp->sd_master_dir->d_inode;
@@ -732,8 +774,15 @@
 	if (sdp->sd_lockstruct.ls_first) {
 		unsigned int x;
 		for (x = 0; x < sdp->sd_journals; x++) {
-			error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x),
-						     true);
+			struct gfs2_jdesc *jd = gfs2_jdesc_find(sdp, x);
+
+			if (sdp->sd_args.ar_spectator) {
+				error = check_journal_clean(sdp, jd);
+				if (error)
+					goto fail_jinode_gh;
+				continue;
+			}
+			error = gfs2_recover_journal(jd, true);
 			if (error) {
 				fs_err(sdp, "error recovering journal %u: %d\n",
 				       x, error);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index cddb052..2543728 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -361,6 +361,13 @@
 	return 0;
 }
 
+static int hostfs_file_release(struct inode *inode, struct file *file)
+{
+	filemap_write_and_wait(inode->i_mapping);
+
+	return 0;
+}
+
 int hostfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	struct inode *inode = file->f_mapping->host;
@@ -386,7 +393,7 @@
 	.write		= do_sync_write,
 	.mmap		= generic_file_mmap,
 	.open		= hostfs_file_open,
-	.release	= NULL,
+	.release	= hostfs_file_release,
 	.fsync		= hostfs_fsync,
 };
 
diff --git a/fs/internal.h b/fs/internal.h
index d208937..2be46ea 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -45,6 +45,9 @@
  * namei.c
  */
 extern int __inode_permission(struct inode *, int);
+extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
+extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
+			   const char *, unsigned int, struct path *);
 
 /*
  * namespace.c
diff --git a/fs/namei.c b/fs/namei.c
index 56e4f4d..409a441 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -574,9 +574,12 @@
 drop_dentry:
 	unlock_rcu_walk();
 	dput(dentry);
-	return -ECHILD;
+	goto drop_root_mnt;
 out:
 	unlock_rcu_walk();
+drop_root_mnt:
+	if (!(nd->flags & LOOKUP_ROOT))
+		nd->root.mnt = NULL;
 	return -ECHILD;
 }
 
@@ -2206,7 +2209,7 @@
 }
 
 /**
- * umount_lookup_last - look up last component for umount
+ * mountpoint_last - look up last component for umount
  * @nd:   pathwalk nameidata - currently pointing at parent directory of "last"
  * @path: pointer to container for result
  *
@@ -2233,25 +2236,28 @@
  *         to the link, and nd->path will *not* be put.
  */
 static int
-umount_lookup_last(struct nameidata *nd, struct path *path)
+mountpoint_last(struct nameidata *nd, struct path *path)
 {
 	int error = 0;
 	struct dentry *dentry;
 	struct dentry *dir = nd->path.dentry;
 
-	if (unlikely(nd->flags & LOOKUP_RCU)) {
-		WARN_ON_ONCE(1);
-		error = -ECHILD;
-		goto error_check;
+	/* If we're in rcuwalk, drop out of it to handle last component */
+	if (nd->flags & LOOKUP_RCU) {
+		if (unlazy_walk(nd, NULL)) {
+			error = -ECHILD;
+			goto out;
+		}
 	}
 
 	nd->flags &= ~LOOKUP_PARENT;
 
 	if (unlikely(nd->last_type != LAST_NORM)) {
 		error = handle_dots(nd, nd->last_type);
-		if (!error)
-			dentry = dget(nd->path.dentry);
-		goto error_check;
+		if (error)
+			goto out;
+		dentry = dget(nd->path.dentry);
+		goto done;
 	}
 
 	mutex_lock(&dir->d_inode->i_mutex);
@@ -2265,44 +2271,43 @@
 		dentry = d_alloc(dir, &nd->last);
 		if (!dentry) {
 			error = -ENOMEM;
-		} else {
-			dentry = lookup_real(dir->d_inode, dentry, nd->flags);
-			if (IS_ERR(dentry))
-				error = PTR_ERR(dentry);
+			goto out;
 		}
+		dentry = lookup_real(dir->d_inode, dentry, nd->flags);
+		error = PTR_ERR(dentry);
+		if (IS_ERR(dentry))
+			goto out;
 	}
 	mutex_unlock(&dir->d_inode->i_mutex);
 
-error_check:
-	if (!error) {
-		if (!dentry->d_inode) {
-			error = -ENOENT;
-			dput(dentry);
-		} else {
-			path->dentry = dentry;
-			path->mnt = mntget(nd->path.mnt);
-			if (should_follow_link(dentry->d_inode,
-						nd->flags & LOOKUP_FOLLOW))
-				return 1;
-			follow_mount(path);
-		}
+done:
+	if (!dentry->d_inode) {
+		error = -ENOENT;
+		dput(dentry);
+		goto out;
 	}
+	path->dentry = dentry;
+	path->mnt = mntget(nd->path.mnt);
+	if (should_follow_link(dentry->d_inode, nd->flags & LOOKUP_FOLLOW))
+		return 1;
+	follow_mount(path);
+	error = 0;
+out:
 	terminate_walk(nd);
 	return error;
 }
 
 /**
- * path_umountat - look up a path to be umounted
+ * path_mountpoint - look up a path to be umounted
  * @dfd:	directory file descriptor to start walk from
  * @name:	full pathname to walk
  * @flags:	lookup flags
- * @nd:		pathwalk nameidata
  *
  * Look up the given name, but don't attempt to revalidate the last component.
  * Returns 0 and "path" will be valid on success; Retuns error otherwise.
  */
 static int
-path_umountat(int dfd, const char *name, struct path *path, unsigned int flags)
+path_mountpoint(int dfd, const char *name, struct path *path, unsigned int flags)
 {
 	struct file *base = NULL;
 	struct nameidata nd;
@@ -2317,16 +2322,7 @@
 	if (err)
 		goto out;
 
-	/* If we're in rcuwalk, drop out of it to handle last component */
-	if (nd.flags & LOOKUP_RCU) {
-		err = unlazy_walk(&nd, NULL);
-		if (err) {
-			terminate_walk(&nd);
-			goto out;
-		}
-	}
-
-	err = umount_lookup_last(&nd, path);
+	err = mountpoint_last(&nd, path);
 	while (err > 0) {
 		void *cookie;
 		struct path link = *path;
@@ -2337,7 +2333,7 @@
 		err = follow_link(&link, &nd, &cookie);
 		if (err)
 			break;
-		err = umount_lookup_last(&nd, path);
+		err = mountpoint_last(&nd, path);
 		put_link(&nd, &link, cookie);
 	}
 out:
@@ -2350,8 +2346,22 @@
 	return err;
 }
 
+static int
+filename_mountpoint(int dfd, struct filename *s, struct path *path,
+			unsigned int flags)
+{
+	int error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_RCU);
+	if (unlikely(error == -ECHILD))
+		error = path_mountpoint(dfd, s->name, path, flags);
+	if (unlikely(error == -ESTALE))
+		error = path_mountpoint(dfd, s->name, path, flags | LOOKUP_REVAL);
+	if (likely(!error))
+		audit_inode(s, path->dentry, 0);
+	return error;
+}
+
 /**
- * user_path_umountat - lookup a path from userland in order to umount it
+ * user_path_mountpoint_at - lookup a path from userland in order to umount it
  * @dfd:	directory file descriptor
  * @name:	pathname from userland
  * @flags:	lookup flags
@@ -2365,28 +2375,27 @@
  * Returns 0 and populates "path" on success.
  */
 int
-user_path_umountat(int dfd, const char __user *name, unsigned int flags,
+user_path_mountpoint_at(int dfd, const char __user *name, unsigned int flags,
 			struct path *path)
 {
 	struct filename *s = getname(name);
 	int error;
-
 	if (IS_ERR(s))
 		return PTR_ERR(s);
-
-	error = path_umountat(dfd, s->name, path, flags | LOOKUP_RCU);
-	if (unlikely(error == -ECHILD))
-		error = path_umountat(dfd, s->name, path, flags);
-	if (unlikely(error == -ESTALE))
-		error = path_umountat(dfd, s->name, path, flags | LOOKUP_REVAL);
-
-	if (likely(!error))
-		audit_inode(s, path->dentry, 0);
-
+	error = filename_mountpoint(dfd, s, path, flags);
 	putname(s);
 	return error;
 }
 
+int
+kern_path_mountpoint(int dfd, const char *name, struct path *path,
+			unsigned int flags)
+{
+	struct filename s = {.name = name};
+	return filename_mountpoint(dfd, &s, path, flags);
+}
+EXPORT_SYMBOL(kern_path_mountpoint);
+
 /*
  * It's inline, so penalty for filesystems that don't use sticky bit is
  * minimal.
diff --git a/fs/namespace.c b/fs/namespace.c
index fc2b522..25845d1 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1321,7 +1321,7 @@
 	if (!(flags & UMOUNT_NOFOLLOW))
 		lookup_flags |= LOOKUP_FOLLOW;
 
-	retval = user_path_umountat(AT_FDCWD, name, lookup_flags, &path);
+	retval = user_path_mountpoint_at(AT_FDCWD, name, lookup_flags, &path);
 	if (retval)
 		goto out;
 	mnt = real_mount(path.mnt);
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index e0bb048..03192a6 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -4,9 +4,10 @@
 
 obj-$(CONFIG_NFS_FS) += nfs.o
 
+CFLAGS_nfstrace.o += -I$(src)
 nfs-y 			:= client.o dir.o file.o getroot.o inode.o super.o \
 			   direct.o pagelist.o read.o symlink.o unlink.o \
-			   write.o namespace.o mount_clnt.o
+			   write.o namespace.o mount_clnt.o nfstrace.o
 nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o
 nfs-$(CONFIG_SYSCTL)	+= sysctl.o
 nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
@@ -19,12 +20,14 @@
 nfsv3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
 
 obj-$(CONFIG_NFS_V4) += nfsv4.o
+CFLAGS_nfs4trace.o += -I$(src)
 nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \
 	  delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \
-	  nfs4namespace.o nfs4getroot.o nfs4client.o dns_resolve.o
+	  nfs4namespace.o nfs4getroot.o nfs4client.o nfs4session.o \
+	  dns_resolve.o nfs4trace.o
 nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o
 nfsv4-$(CONFIG_SYSCTL)	+= nfs4sysctl.o
-nfsv4-$(CONFIG_NFS_V4_1)	+= nfs4session.o pnfs.o pnfs_dev.o
+nfsv4-$(CONFIG_NFS_V4_1)	+= pnfs.o pnfs_dev.o
 
 obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
 nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index e6ebc4c..ae2e87b 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -15,6 +15,7 @@
 #include "internal.h"
 #include "pnfs.h"
 #include "nfs4session.h"
+#include "nfs4trace.h"
 
 #ifdef NFS_DEBUG
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
@@ -93,6 +94,7 @@
 	default:
 		res = htonl(NFS4ERR_RESOURCE);
 	}
+	trace_nfs4_recall_delegation(inode, -ntohl(res));
 	iput(inode);
 out:
 	dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
@@ -301,14 +303,14 @@
 {
 	struct nfs4_slot *slot;
 
-	dprintk("%s enter. slotid %d seqid %d\n",
+	dprintk("%s enter. slotid %u seqid %u\n",
 		__func__, args->csa_slotid, args->csa_sequenceid);
 
 	if (args->csa_slotid >= NFS41_BC_MAX_CALLBACKS)
 		return htonl(NFS4ERR_BADSLOT);
 
 	slot = tbl->slots + args->csa_slotid;
-	dprintk("%s slot table seqid: %d\n", __func__, slot->seq_nr);
+	dprintk("%s slot table seqid: %u\n", __func__, slot->seq_nr);
 
 	/* Normal */
 	if (likely(args->csa_sequenceid == slot->seq_nr + 1)) {
@@ -318,7 +320,7 @@
 
 	/* Replay */
 	if (args->csa_sequenceid == slot->seq_nr) {
-		dprintk("%s seqid %d is a replay\n",
+		dprintk("%s seqid %u is a replay\n",
 			__func__, args->csa_sequenceid);
 		/* Signal process_op to set this error on next op */
 		if (args->csa_cachethis == 0)
@@ -462,6 +464,7 @@
 	} else
 		res->csr_status = status;
 
+	trace_nfs4_cb_sequence(args, res, status);
 	dprintk("%s: exit with status = %d res->csr_status %d\n", __func__,
 		ntohl(status), ntohl(res->csr_status));
 	return status;
@@ -518,7 +521,7 @@
 	if (!cps->clp) /* set in cb_sequence */
 		goto out;
 
-	dprintk_rcu("NFS: CB_RECALL_SLOT request from %s target highest slotid %d\n",
+	dprintk_rcu("NFS: CB_RECALL_SLOT request from %s target highest slotid %u\n",
 		rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR),
 		args->crsa_target_highest_slotid);
 
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 340b1ef..2dceee4 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -501,8 +501,7 @@
 					&nn->nfs_client_list);
 			spin_unlock(&nn->nfs_client_lock);
 			new->cl_flags = cl_init->init_flags;
-			return rpc_ops->init_client(new, timeparms, ip_addr,
-						    authflavour);
+			return rpc_ops->init_client(new, timeparms, ip_addr);
 		}
 
 		spin_unlock(&nn->nfs_client_lock);
@@ -694,13 +693,12 @@
  * @clp: nfs_client to initialise
  * @timeparms: timeout parameters for underlying RPC transport
  * @ip_addr: IP presentation address (not used)
- * @authflavor: authentication flavor for underlying RPC transport
  *
  * Returns pointer to an NFS client, or an ERR_PTR value.
  */
 struct nfs_client *nfs_init_client(struct nfs_client *clp,
 		    const struct rpc_timeout *timeparms,
-		    const char *ip_addr, rpc_authflavor_t authflavour)
+		    const char *ip_addr)
 {
 	int error;
 
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 7ec4814..ef792f2 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -20,6 +20,7 @@
 #include "nfs4_fs.h"
 #include "delegation.h"
 #include "internal.h"
+#include "nfs4trace.h"
 
 static void nfs_free_delegation(struct nfs_delegation *delegation)
 {
@@ -160,6 +161,7 @@
 			spin_unlock(&delegation->lock);
 			put_rpccred(oldcred);
 			rcu_read_unlock();
+			trace_nfs4_reclaim_delegation(inode, res->delegation_type);
 		} else {
 			/* We appear to have raced with a delegation return. */
 			spin_unlock(&delegation->lock);
@@ -344,6 +346,7 @@
 	spin_lock(&inode->i_lock);
 	nfsi->cache_validity |= NFS_INO_REVAL_FORCED;
 	spin_unlock(&inode->i_lock);
+	trace_nfs4_set_delegation(inode, res->delegation_type);
 
 out:
 	spin_unlock(&clp->cl_lock);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7468735..e79bc6c 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -43,6 +43,8 @@
 #include "internal.h"
 #include "fscache.h"
 
+#include "nfstrace.h"
+
 /* #define NFS_DEBUG_VERBOSE 1 */
 
 static int nfs_opendir(struct inode *, struct file *);
@@ -1100,7 +1102,9 @@
 	if (IS_ERR(label))
 		goto out_error;
 
+	trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
 	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
+	trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error);
 	if (error)
 		goto out_bad;
 	if (nfs_compare_fh(NFS_FH(inode), fhandle))
@@ -1312,6 +1316,7 @@
 
 	parent = dentry->d_parent;
 	/* Protect against concurrent sillydeletes */
+	trace_nfs_lookup_enter(dir, dentry, flags);
 	nfs_block_sillyrename(parent);
 	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
 	if (error == -ENOENT)
@@ -1338,6 +1343,7 @@
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 out_unblock_sillyrename:
 	nfs_unblock_sillyrename(parent);
+	trace_nfs_lookup_exit(dir, dentry, flags, error);
 	nfs4_label_free(label);
 out:
 	nfs_free_fattr(fattr);
@@ -1392,7 +1398,6 @@
 	nfs_file_set_open_context(file, ctx);
 
 out:
-	put_nfs_open_context(ctx);
 	return err;
 }
 
@@ -1404,6 +1409,7 @@
 	struct dentry *res;
 	struct iattr attr = { .ia_valid = ATTR_OPEN };
 	struct inode *inode;
+	unsigned int lookup_flags = 0;
 	int err;
 
 	/* Expect a negative dentry */
@@ -1412,6 +1418,10 @@
 	dfprintk(VFS, "NFS: atomic_open(%s/%ld), %s\n",
 			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
+	err = nfs_check_flags(open_flags);
+	if (err)
+		return err;
+
 	/* NFS only supports OPEN on regular files */
 	if ((open_flags & O_DIRECTORY)) {
 		if (!d_unhashed(dentry)) {
@@ -1422,6 +1432,7 @@
 			 */
 			return -ENOENT;
 		}
+		lookup_flags = LOOKUP_OPEN|LOOKUP_DIRECTORY;
 		goto no_open;
 	}
 
@@ -1442,12 +1453,14 @@
 	if (IS_ERR(ctx))
 		goto out;
 
+	trace_nfs_atomic_open_enter(dir, ctx, open_flags);
 	nfs_block_sillyrename(dentry->d_parent);
 	inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr);
 	nfs_unblock_sillyrename(dentry->d_parent);
 	if (IS_ERR(inode)) {
-		put_nfs_open_context(ctx);
 		err = PTR_ERR(inode);
+		trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
+		put_nfs_open_context(ctx);
 		switch (err) {
 		case -ENOENT:
 			d_drop(dentry);
@@ -1468,11 +1481,13 @@
 	}
 
 	err = nfs_finish_open(ctx, ctx->dentry, file, open_flags, opened);
+	trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
+	put_nfs_open_context(ctx);
 out:
 	return err;
 
 no_open:
-	res = nfs_lookup(dir, dentry, 0);
+	res = nfs_lookup(dir, dentry, lookup_flags);
 	err = PTR_ERR(res);
 	if (IS_ERR(res))
 		goto out;
@@ -1596,7 +1611,9 @@
 	attr.ia_mode = mode;
 	attr.ia_valid = ATTR_MODE;
 
+	trace_nfs_create_enter(dir, dentry, open_flags);
 	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
+	trace_nfs_create_exit(dir, dentry, open_flags, error);
 	if (error != 0)
 		goto out_err;
 	return 0;
@@ -1624,7 +1641,9 @@
 	attr.ia_mode = mode;
 	attr.ia_valid = ATTR_MODE;
 
+	trace_nfs_mknod_enter(dir, dentry);
 	status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev);
+	trace_nfs_mknod_exit(dir, dentry, status);
 	if (status != 0)
 		goto out_err;
 	return 0;
@@ -1648,7 +1667,9 @@
 	attr.ia_valid = ATTR_MODE;
 	attr.ia_mode = mode | S_IFDIR;
 
+	trace_nfs_mkdir_enter(dir, dentry);
 	error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
+	trace_nfs_mkdir_exit(dir, dentry, error);
 	if (error != 0)
 		goto out_err;
 	return 0;
@@ -1671,12 +1692,21 @@
 	dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n",
 			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
-	error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
-	/* Ensure the VFS deletes this inode */
-	if (error == 0 && dentry->d_inode != NULL)
-		clear_nlink(dentry->d_inode);
-	else if (error == -ENOENT)
-		nfs_dentry_handle_enoent(dentry);
+	trace_nfs_rmdir_enter(dir, dentry);
+	if (dentry->d_inode) {
+		nfs_wait_on_sillyrename(dentry);
+		error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
+		/* Ensure the VFS deletes this inode */
+		switch (error) {
+		case 0:
+			clear_nlink(dentry->d_inode);
+			break;
+		case -ENOENT:
+			nfs_dentry_handle_enoent(dentry);
+		}
+	} else
+		error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
+	trace_nfs_rmdir_exit(dir, dentry, error);
 
 	return error;
 }
@@ -1704,6 +1734,7 @@
 		goto out;
 	}
 
+	trace_nfs_remove_enter(dir, dentry);
 	if (inode != NULL) {
 		NFS_PROTO(inode)->return_delegation(inode);
 		error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
@@ -1713,6 +1744,7 @@
 		error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
 	if (error == -ENOENT)
 		nfs_dentry_handle_enoent(dentry);
+	trace_nfs_remove_exit(dir, dentry, error);
 out:
 	return error;
 }
@@ -1730,13 +1762,14 @@
 	dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id,
 		dir->i_ino, dentry->d_name.name);
 
+	trace_nfs_unlink_enter(dir, dentry);
 	spin_lock(&dentry->d_lock);
 	if (d_count(dentry) > 1) {
 		spin_unlock(&dentry->d_lock);
 		/* Start asynchronous writeout of the inode */
 		write_inode_now(dentry->d_inode, 0);
 		error = nfs_sillyrename(dir, dentry);
-		return error;
+		goto out;
 	}
 	if (!d_unhashed(dentry)) {
 		__d_drop(dentry);
@@ -1748,6 +1781,8 @@
 		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 	} else if (need_rehash)
 		d_rehash(dentry);
+out:
+	trace_nfs_unlink_exit(dir, dentry, error);
 	return error;
 }
 EXPORT_SYMBOL_GPL(nfs_unlink);
@@ -1794,7 +1829,9 @@
 		memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
 	kunmap_atomic(kaddr);
 
+	trace_nfs_symlink_enter(dir, dentry);
 	error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
+	trace_nfs_symlink_exit(dir, dentry, error);
 	if (error != 0) {
 		dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n",
 			dir->i_sb->s_id, dir->i_ino,
@@ -1829,6 +1866,7 @@
 		old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 
+	trace_nfs_link_enter(inode, dir, dentry);
 	NFS_PROTO(inode)->return_delegation(inode);
 
 	d_drop(dentry);
@@ -1837,6 +1875,7 @@
 		ihold(inode);
 		d_add(dentry, inode);
 	}
+	trace_nfs_link_exit(inode, dir, dentry, error);
 	return error;
 }
 EXPORT_SYMBOL_GPL(nfs_link);
@@ -1878,6 +1917,7 @@
 		 new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
 		 d_count(new_dentry));
 
+	trace_nfs_rename_enter(old_dir, old_dentry, new_dir, new_dentry);
 	/*
 	 * For non-directories, check whether the target is busy and if so,
 	 * make a copy of the dentry and then do a silly-rename. If the
@@ -1924,6 +1964,8 @@
 out:
 	if (rehash)
 		d_rehash(rehash);
+	trace_nfs_rename_exit(old_dir, old_dentry,
+			new_dir, new_dentry, error);
 	if (!error) {
 		if (new_inode != NULL)
 			nfs_drop_nlink(new_inode);
@@ -2173,9 +2215,11 @@
 	struct nfs_access_entry cache;
 	int status;
 
+	trace_nfs_access_enter(inode);
+
 	status = nfs_access_get_cached(inode, cred, &cache);
 	if (status == 0)
-		goto out;
+		goto out_cached;
 
 	/* Be clever: ask server to check for all possible rights */
 	cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ;
@@ -2188,13 +2232,15 @@
 			if (!S_ISDIR(inode->i_mode))
 				set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
 		}
-		return status;
+		goto out;
 	}
 	nfs_access_add_cache(inode, &cache);
+out_cached:
+	if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0)
+		status = -EACCES;
 out:
-	if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
-		return 0;
-	return -EACCES;
+	trace_nfs_access_exit(inode, status);
+	return status;
 }
 
 static int nfs_open_permission_mask(int openflags)
@@ -2240,11 +2286,6 @@
 		case S_IFLNK:
 			goto out;
 		case S_IFREG:
-			/* NFSv4 has atomic_open... */
-			if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN)
-					&& (mask & MAY_OPEN)
-					&& !(mask & MAY_EXEC))
-				goto out;
 			break;
 		case S_IFDIR:
 			/*
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 94e94bd..1e6bfdb 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -37,6 +37,8 @@
 #include "iostat.h"
 #include "fscache.h"
 
+#include "nfstrace.h"
+
 #define NFSDBG_FACILITY		NFSDBG_FILE
 
 static const struct vm_operations_struct nfs_file_vm_ops;
@@ -294,6 +296,8 @@
 	int ret;
 	struct inode *inode = file_inode(file);
 
+	trace_nfs_fsync_enter(inode);
+
 	do {
 		ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
 		if (ret != 0)
@@ -310,6 +314,7 @@
 		end = LLONG_MAX;
 	} while (ret == -EAGAIN);
 
+	trace_nfs_fsync_exit(inode, ret);
 	return ret;
 }
 
@@ -406,6 +411,7 @@
 			struct page *page, void *fsdata)
 {
 	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+	struct nfs_open_context *ctx = nfs_file_open_context(file);
 	int status;
 
 	dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n",
@@ -441,6 +447,13 @@
 	if (status < 0)
 		return status;
 	NFS_I(mapping->host)->write_io += copied;
+
+	if (nfs_ctx_key_to_expire(ctx)) {
+		status = nfs_wb_all(mapping->host);
+		if (status < 0)
+			return status;
+	}
+
 	return copied;
 }
 
@@ -637,7 +650,8 @@
 	if (IS_SYNC(inode) || (filp->f_flags & O_DSYNC))
 		return 1;
 	ctx = nfs_file_open_context(filp);
-	if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags))
+	if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags) ||
+	    nfs_ctx_key_to_expire(ctx))
 		return 1;
 	return 0;
 }
@@ -651,6 +665,10 @@
 	ssize_t result;
 	size_t count = iov_length(iov, nr_segs);
 
+	result = nfs_key_timeout_notify(iocb->ki_filp, inode);
+	if (result)
+		return result;
+
 	if (iocb->ki_filp->f_flags & O_DIRECT)
 		return nfs_file_direct_write(iocb, iov, nr_segs, pos, true);
 
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index c2c4163..567983d2 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -49,6 +49,7 @@
 
 #include "internal.h"
 #include "netns.h"
+#include "nfs4trace.h"
 
 #define NFS_UINT_MAXLEN 11
 
@@ -63,6 +64,7 @@
 };
 
 struct idmap {
+	struct rpc_pipe_dir_object idmap_pdo;
 	struct rpc_pipe		*idmap_pipe;
 	struct idmap_legacy_upcalldata *idmap_upcall_data;
 	struct mutex		idmap_mutex;
@@ -310,7 +312,7 @@
 	if (ret < 0)
 		goto out_up;
 
-	payload = rcu_dereference(rkey->payload.data);
+	payload = rcu_dereference(rkey->payload.rcudata);
 	if (IS_ERR_OR_NULL(payload)) {
 		ret = PTR_ERR(payload);
 		goto out_up;
@@ -401,16 +403,23 @@
 	.request_key	= nfs_idmap_legacy_upcall,
 };
 
-static void __nfs_idmap_unregister(struct rpc_pipe *pipe)
+static void nfs_idmap_pipe_destroy(struct dentry *dir,
+		struct rpc_pipe_dir_object *pdo)
 {
-	if (pipe->dentry)
+	struct idmap *idmap = pdo->pdo_data;
+	struct rpc_pipe *pipe = idmap->idmap_pipe;
+
+	if (pipe->dentry) {
 		rpc_unlink(pipe->dentry);
+		pipe->dentry = NULL;
+	}
 }
 
-static int __nfs_idmap_register(struct dentry *dir,
-				     struct idmap *idmap,
-				     struct rpc_pipe *pipe)
+static int nfs_idmap_pipe_create(struct dentry *dir,
+		struct rpc_pipe_dir_object *pdo)
 {
+	struct idmap *idmap = pdo->pdo_data;
+	struct rpc_pipe *pipe = idmap->idmap_pipe;
 	struct dentry *dentry;
 
 	dentry = rpc_mkpipe_dentry(dir, "idmap", idmap, pipe);
@@ -420,36 +429,10 @@
 	return 0;
 }
 
-static void nfs_idmap_unregister(struct nfs_client *clp,
-				      struct rpc_pipe *pipe)
-{
-	struct net *net = clp->cl_net;
-	struct super_block *pipefs_sb;
-
-	pipefs_sb = rpc_get_sb_net(net);
-	if (pipefs_sb) {
-		__nfs_idmap_unregister(pipe);
-		rpc_put_sb_net(net);
-	}
-}
-
-static int nfs_idmap_register(struct nfs_client *clp,
-				   struct idmap *idmap,
-				   struct rpc_pipe *pipe)
-{
-	struct net *net = clp->cl_net;
-	struct super_block *pipefs_sb;
-	int err = 0;
-
-	pipefs_sb = rpc_get_sb_net(net);
-	if (pipefs_sb) {
-		if (clp->cl_rpcclient->cl_dentry)
-			err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry,
-						   idmap, pipe);
-		rpc_put_sb_net(net);
-	}
-	return err;
-}
+static const struct rpc_pipe_dir_object_ops nfs_idmap_pipe_dir_object_ops = {
+	.create = nfs_idmap_pipe_create,
+	.destroy = nfs_idmap_pipe_destroy,
+};
 
 int
 nfs_idmap_new(struct nfs_client *clp)
@@ -462,23 +445,31 @@
 	if (idmap == NULL)
 		return -ENOMEM;
 
+	rpc_init_pipe_dir_object(&idmap->idmap_pdo,
+			&nfs_idmap_pipe_dir_object_ops,
+			idmap);
+
 	pipe = rpc_mkpipe_data(&idmap_upcall_ops, 0);
 	if (IS_ERR(pipe)) {
 		error = PTR_ERR(pipe);
-		kfree(idmap);
-		return error;
-	}
-	error = nfs_idmap_register(clp, idmap, pipe);
-	if (error) {
-		rpc_destroy_pipe_data(pipe);
-		kfree(idmap);
-		return error;
+		goto err;
 	}
 	idmap->idmap_pipe = pipe;
 	mutex_init(&idmap->idmap_mutex);
 
+	error = rpc_add_pipe_dir_object(clp->cl_net,
+			&clp->cl_rpcclient->cl_pipedir_objects,
+			&idmap->idmap_pdo);
+	if (error)
+		goto err_destroy_pipe;
+
 	clp->cl_idmap = idmap;
 	return 0;
+err_destroy_pipe:
+	rpc_destroy_pipe_data(idmap->idmap_pipe);
+err:
+	kfree(idmap);
+	return error;
 }
 
 void
@@ -488,130 +479,26 @@
 
 	if (!idmap)
 		return;
-	nfs_idmap_unregister(clp, idmap->idmap_pipe);
-	rpc_destroy_pipe_data(idmap->idmap_pipe);
 	clp->cl_idmap = NULL;
+	rpc_remove_pipe_dir_object(clp->cl_net,
+			&clp->cl_rpcclient->cl_pipedir_objects,
+			&idmap->idmap_pdo);
+	rpc_destroy_pipe_data(idmap->idmap_pipe);
 	kfree(idmap);
 }
 
-static int __rpc_pipefs_event(struct nfs_client *clp, unsigned long event,
-			      struct super_block *sb)
-{
-	int err = 0;
-
-	switch (event) {
-	case RPC_PIPEFS_MOUNT:
-		err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry,
-						clp->cl_idmap,
-						clp->cl_idmap->idmap_pipe);
-		break;
-	case RPC_PIPEFS_UMOUNT:
-		if (clp->cl_idmap->idmap_pipe) {
-			struct dentry *parent;
-
-			parent = clp->cl_idmap->idmap_pipe->dentry->d_parent;
-			__nfs_idmap_unregister(clp->cl_idmap->idmap_pipe);
-			/*
-			 * Note: This is a dirty hack. SUNRPC hook has been
-			 * called already but simple_rmdir() call for the
-			 * directory returned with error because of idmap pipe
-			 * inside. Thus now we have to remove this directory
-			 * here.
-			 */
-			if (rpc_rmdir(parent))
-				printk(KERN_ERR "NFS: %s: failed to remove "
-					"clnt dir!\n", __func__);
-		}
-		break;
-	default:
-		printk(KERN_ERR "NFS: %s: unknown event: %ld\n", __func__,
-			event);
-		return -ENOTSUPP;
-	}
-	return err;
-}
-
-static struct nfs_client *nfs_get_client_for_event(struct net *net, int event)
-{
-	struct nfs_net *nn = net_generic(net, nfs_net_id);
-	struct dentry *cl_dentry;
-	struct nfs_client *clp;
-	int err;
-
-restart:
-	spin_lock(&nn->nfs_client_lock);
-	list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
-		/* Wait for initialisation to finish */
-		if (clp->cl_cons_state == NFS_CS_INITING) {
-			atomic_inc(&clp->cl_count);
-			spin_unlock(&nn->nfs_client_lock);
-			err = nfs_wait_client_init_complete(clp);
-			nfs_put_client(clp);
-			if (err)
-				return NULL;
-			goto restart;
-		}
-		/* Skip nfs_clients that failed to initialise */
-		if (clp->cl_cons_state < 0)
-			continue;
-		smp_rmb();
-		if (clp->rpc_ops != &nfs_v4_clientops)
-			continue;
-		cl_dentry = clp->cl_idmap->idmap_pipe->dentry;
-		if (((event == RPC_PIPEFS_MOUNT) && cl_dentry) ||
-		    ((event == RPC_PIPEFS_UMOUNT) && !cl_dentry))
-			continue;
-		atomic_inc(&clp->cl_count);
-		spin_unlock(&nn->nfs_client_lock);
-		return clp;
-	}
-	spin_unlock(&nn->nfs_client_lock);
-	return NULL;
-}
-
-static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event,
-			    void *ptr)
-{
-	struct super_block *sb = ptr;
-	struct nfs_client *clp;
-	int error = 0;
-
-	if (!try_module_get(THIS_MODULE))
-		return 0;
-
-	while ((clp = nfs_get_client_for_event(sb->s_fs_info, event))) {
-		error = __rpc_pipefs_event(clp, event, sb);
-		nfs_put_client(clp);
-		if (error)
-			break;
-	}
-	module_put(THIS_MODULE);
-	return error;
-}
-
-#define PIPEFS_NFS_PRIO		1
-
-static struct notifier_block nfs_idmap_block = {
-	.notifier_call	= rpc_pipefs_event,
-	.priority	= SUNRPC_PIPEFS_NFS_PRIO,
-};
-
 int nfs_idmap_init(void)
 {
 	int ret;
 	ret = nfs_idmap_init_keyring();
 	if (ret != 0)
 		goto out;
-	ret = rpc_pipefs_notifier_register(&nfs_idmap_block);
-	if (ret != 0)
-		nfs_idmap_quit_keyring();
 out:
 	return ret;
 }
 
 void nfs_idmap_quit(void)
 {
-	rpc_pipefs_notifier_unregister(&nfs_idmap_block);
 	nfs_idmap_quit_keyring();
 }
 
@@ -849,6 +736,7 @@
 		if (!uid_valid(*uid))
 			ret = -ERANGE;
 	}
+	trace_nfs4_map_name_to_uid(name, namelen, id, ret);
 	return ret;
 }
 
@@ -865,6 +753,7 @@
 		if (!gid_valid(*gid))
 			ret = -ERANGE;
 	}
+	trace_nfs4_map_group_to_gid(name, namelen, id, ret);
 	return ret;
 }
 
@@ -879,6 +768,7 @@
 		ret = nfs_idmap_lookup_name(id, "user", buf, buflen, idmap);
 	if (ret < 0)
 		ret = nfs_map_numeric_to_string(id, buf, buflen);
+	trace_nfs4_map_uid_to_name(buf, ret, id, ret);
 	return ret;
 }
 int nfs_map_gid_to_group(const struct nfs_server *server, kgid_t gid, char *buf, size_t buflen)
@@ -892,5 +782,6 @@
 		ret = nfs_idmap_lookup_name(id, "group", buf, buflen, idmap);
 	if (ret < 0)
 		ret = nfs_map_numeric_to_string(id, buf, buflen);
+	trace_nfs4_map_gid_to_group(buf, ret, id, ret);
 	return ret;
 }
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 941246f..87e7976 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -38,7 +38,6 @@
 #include <linux/slab.h>
 #include <linux/compat.h>
 #include <linux/freezer.h>
-#include <linux/crc32.h>
 
 #include <asm/uaccess.h>
 
@@ -52,6 +51,8 @@
 #include "nfs.h"
 #include "netns.h"
 
+#include "nfstrace.h"
+
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
 #define NFS_64_BIT_INODE_NUMBERS_ENABLED	1
@@ -503,6 +504,8 @@
 	if ((attr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0)
 		return 0;
 
+	trace_nfs_setattr_enter(inode);
+
 	/* Write all dirty data */
 	if (S_ISREG(inode->i_mode)) {
 		nfs_inode_dio_wait(inode);
@@ -522,6 +525,7 @@
 		error = nfs_refresh_inode(inode, fattr);
 	nfs_free_fattr(fattr);
 out:
+	trace_nfs_setattr_exit(inode, error);
 	return error;
 }
 EXPORT_SYMBOL_GPL(nfs_setattr);
@@ -591,6 +595,7 @@
 	int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
 	int err;
 
+	trace_nfs_getattr_enter(inode);
 	/* Flush out writes to the server in order to update c/mtime.  */
 	if (S_ISREG(inode->i_mode)) {
 		nfs_inode_dio_wait(inode);
@@ -621,6 +626,7 @@
 		stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
 	}
 out:
+	trace_nfs_getattr_exit(inode, err);
 	return err;
 }
 EXPORT_SYMBOL_GPL(nfs_getattr);
@@ -875,6 +881,8 @@
 	dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n",
 		inode->i_sb->s_id, (long long)NFS_FILEID(inode));
 
+	trace_nfs_revalidate_inode_enter(inode);
+
 	if (is_bad_inode(inode))
 		goto out;
 	if (NFS_STALE(inode))
@@ -925,6 +933,7 @@
 	nfs4_label_free(label);
 out:
 	nfs_free_fattr(fattr);
+	trace_nfs_revalidate_inode_exit(inode, status);
 	return status;
 }
 
@@ -981,6 +990,7 @@
 	spin_unlock(&inode->i_lock);
 	nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
 	nfs_fscache_wait_on_invalidate(inode);
+
 	dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
 			inode->i_sb->s_id, (long long)NFS_FILEID(inode));
 	return 0;
@@ -1014,8 +1024,12 @@
 		if (ret < 0)
 			goto out;
 	}
-	if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
+	if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
+		trace_nfs_invalidate_mapping_enter(inode);
 		ret = nfs_invalidate_mapping(inode, mapping);
+		trace_nfs_invalidate_mapping_exit(inode, ret);
+	}
+
 out:
 	return ret;
 }
@@ -1195,7 +1209,7 @@
 {
 	/* wireshark uses 32-bit AUTODIN crc and does a bitwise
 	 * not on the result */
-	return ~crc32(0xFFFFFFFF, &fh->data[0], fh->size);
+	return nfs_fhandle_hash(fh);
 }
 
 /*
@@ -1274,9 +1288,17 @@
 
 static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
 {
+	int ret;
+
+	trace_nfs_refresh_inode_enter(inode);
+
 	if (nfs_inode_attrs_need_update(inode, fattr))
-		return nfs_update_inode(inode, fattr);
-	return nfs_check_inode_attributes(inode, fattr);
+		ret = nfs_update_inode(inode, fattr);
+	else
+		ret = nfs_check_inode_attributes(inode, fattr);
+
+	trace_nfs_refresh_inode_exit(inode, ret);
+	return ret;
 }
 
 /**
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 3c8373f..d388302c 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -5,6 +5,7 @@
 #include "nfs4_fs.h"
 #include <linux/mount.h>
 #include <linux/security.h>
+#include <linux/crc32.h>
 
 #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
 
@@ -185,6 +186,8 @@
 					     int ds_addrlen, int ds_proto,
 					     unsigned int ds_timeo,
 					     unsigned int ds_retrans);
+extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *,
+						struct inode *);
 #ifdef CONFIG_PROC_FS
 extern int __init nfs_fs_proc_init(void);
 extern void nfs_fs_proc_exit(void);
@@ -267,7 +270,7 @@
 void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
 extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
 			   const struct rpc_timeout *timeparms,
-			   const char *ip_addr, rpc_authflavor_t authflavour);
+			   const char *ip_addr);
 
 /* dir.c */
 extern int nfs_access_cache_shrinker(struct shrinker *shrink,
@@ -355,7 +358,7 @@
 extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
 				    const char *);
 
-extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
+extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool);
 #endif
 
 struct nfs_pgio_completion_ops;
@@ -430,6 +433,8 @@
 void nfs_init_cinfo(struct nfs_commit_info *cinfo,
 		    struct inode *inode,
 		    struct nfs_direct_req *dreq);
+int nfs_key_timeout_notify(struct file *filp, struct inode *inode);
+bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx);
 
 #ifdef CONFIG_MIGRATION
 extern int nfs_migrate_page(struct address_space *,
@@ -451,8 +456,7 @@
 extern void __nfs4_read_done_cb(struct nfs_read_data *);
 extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 			    const struct rpc_timeout *timeparms,
-			    const char *ip_addr,
-			    rpc_authflavor_t authflavour);
+			    const char *ip_addr);
 extern int nfs40_walk_client_list(struct nfs_client *clp,
 				struct nfs_client **result,
 				struct rpc_cred *cred);
@@ -575,3 +579,22 @@
 {
 	return ((u64)ts->tv_sec << 30) + ts->tv_nsec;
 }
+
+#ifdef CONFIG_CRC32
+/**
+ * nfs_fhandle_hash - calculate the crc32 hash for the filehandle
+ * @fh - pointer to filehandle
+ *
+ * returns a crc32 hash for the filehandle that is compatible with
+ * the one displayed by "wireshark".
+ */
+static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh)
+{
+	return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size);
+}
+#else
+static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh)
+{
+	return 0;
+}
+#endif
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index f5c84c3..90cb10d 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -336,8 +336,8 @@
 	data->arg.create.createmode = NFS3_CREATE_UNCHECKED;
 	if (flags & O_EXCL) {
 		data->arg.create.createmode  = NFS3_CREATE_EXCLUSIVE;
-		data->arg.create.verifier[0] = jiffies;
-		data->arg.create.verifier[1] = current->pid;
+		data->arg.create.verifier[0] = cpu_to_be32(jiffies);
+		data->arg.create.verifier[1] = cpu_to_be32(current->pid);
 	}
 
 	sattr->ia_mode &= ~current_umask();
@@ -826,9 +826,10 @@
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
 }
 
-static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
 {
 	rpc_call_start(task);
+	return 0;
 }
 
 static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
@@ -847,9 +848,10 @@
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
 }
 
-static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
 {
 	rpc_call_start(task);
+	return 0;
 }
 
 static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ee81e35..f520a11 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -38,17 +38,15 @@
 	u32	minor_version;
 	unsigned init_caps;
 
-	int	(*call_sync)(struct rpc_clnt *clnt,
-			struct nfs_server *server,
-			struct rpc_message *msg,
-			struct nfs4_sequence_args *args,
-			struct nfs4_sequence_res *res);
+	int	(*init_client)(struct nfs_client *);
+	void	(*shutdown_client)(struct nfs_client *);
 	bool	(*match_stateid)(const nfs4_stateid *,
 			const nfs4_stateid *);
 	int	(*find_root_sec)(struct nfs_server *, struct nfs_fh *,
 			struct nfs_fsinfo *);
 	int	(*free_lock_state)(struct nfs_server *,
 			struct nfs4_lock_state *);
+	const struct rpc_call_ops *call_sync_ops;
 	const struct nfs4_state_recovery_ops *reboot_recovery_ops;
 	const struct nfs4_state_recovery_ops *nograce_recovery_ops;
 	const struct nfs4_state_maintenance_ops *state_renewal_ops;
@@ -135,6 +133,7 @@
 	struct list_head	ls_locks;	/* Other lock stateids */
 	struct nfs4_state *	ls_state;	/* Pointer to open state */
 #define NFS_LOCK_INITIALIZED 0
+#define NFS_LOCK_LOST        1
 	unsigned long		ls_flags;
 	struct nfs_seqid_counter	ls_seqid;
 	nfs4_stateid		ls_stateid;
@@ -193,7 +192,6 @@
 	int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *);
 	int (*recover_lock)(struct nfs4_state *, struct file_lock *);
 	int (*establish_clid)(struct nfs_client *, struct rpc_cred *);
-	struct rpc_cred * (*get_clid_cred)(struct nfs_client *);
 	int (*reclaim_complete)(struct nfs_client *, struct rpc_cred *);
 	int (*detect_trunking)(struct nfs_client *, struct nfs_client **,
 		struct rpc_cred *);
@@ -223,7 +221,7 @@
 /* nfs4proc.c */
 extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
 extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
-extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
+extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool);
 extern int nfs4_proc_bind_conn_to_session(struct nfs_client *, struct rpc_cred *cred);
 extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
 extern int nfs4_destroy_clientid(struct nfs_client *clp);
@@ -248,9 +246,6 @@
 	return server->nfs_client->cl_session;
 }
 
-extern int nfs4_setup_sequence(const struct nfs_server *server,
-		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
-		struct rpc_task *task);
 extern int nfs41_setup_sequence(struct nfs4_session *session,
 		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
 		struct rpc_task *task);
@@ -273,20 +268,65 @@
 {
 	return clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS;
 }
+
+static inline bool
+_nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
+		    struct rpc_clnt **clntp, struct rpc_message *msg)
+{
+	struct rpc_cred *newcred = NULL;
+	rpc_authflavor_t flavor;
+
+	if (test_bit(sp4_mode, &clp->cl_sp4_flags)) {
+		spin_lock(&clp->cl_lock);
+		if (clp->cl_machine_cred != NULL)
+			newcred = get_rpccred(clp->cl_machine_cred);
+		spin_unlock(&clp->cl_lock);
+		if (msg->rpc_cred)
+			put_rpccred(msg->rpc_cred);
+		msg->rpc_cred = newcred;
+
+		flavor = clp->cl_rpcclient->cl_auth->au_flavor;
+		WARN_ON(flavor != RPC_AUTH_GSS_KRB5I &&
+			flavor != RPC_AUTH_GSS_KRB5P);
+		*clntp = clp->cl_rpcclient;
+
+		return true;
+	}
+	return false;
+}
+
+/*
+ * Function responsible for determining if an rpc_message should use the
+ * machine cred under SP4_MACH_CRED and if so switching the credential and
+ * authflavor (using the nfs_client's rpc_clnt which will be krb5i/p).
+ * Should be called before rpc_call_sync/rpc_call_async.
+ */
+static inline void
+nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
+		   struct rpc_clnt **clntp, struct rpc_message *msg)
+{
+	_nfs4_state_protect(clp, sp4_mode, clntp, msg);
+}
+
+/*
+ * Special wrapper to nfs4_state_protect for write.
+ * If WRITE can use machine cred but COMMIT cannot, make sure all writes
+ * that use machine cred use NFS_FILE_SYNC.
+ */
+static inline void
+nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
+			 struct rpc_message *msg, struct nfs_write_data *wdata)
+{
+	if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) &&
+	    !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags))
+		wdata->args.stable = NFS_FILE_SYNC;
+}
 #else /* CONFIG_NFS_v4_1 */
 static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
 {
 	return NULL;
 }
 
-static inline int nfs4_setup_sequence(const struct nfs_server *server,
-		struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
-		struct rpc_task *task)
-{
-	rpc_call_start(task);
-	return 0;
-}
-
 static inline bool
 is_ds_only_client(struct nfs_client *clp)
 {
@@ -298,6 +338,18 @@
 {
 	return false;
 }
+
+static inline void
+nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags,
+		   struct rpc_clnt **clntp, struct rpc_message *msg)
+{
+}
+
+static inline void
+nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
+			 struct rpc_message *msg, struct nfs_write_data *wdata)
+{
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[];
@@ -308,6 +360,10 @@
 extern const u32 nfs4_fsinfo_bitmap[3];
 extern const u32 nfs4_fs_locations_bitmap[3];
 
+void nfs40_shutdown_client(struct nfs_client *);
+void nfs41_shutdown_client(struct nfs_client *);
+int nfs40_init_client(struct nfs_client *);
+int nfs41_init_client(struct nfs_client *);
 void nfs4_free_client(struct nfs_client *);
 
 struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *);
@@ -319,7 +375,7 @@
 extern void nfs4_renew_state(struct work_struct *);
 
 /* nfs4state.c */
-struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp);
+struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp);
 struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
 struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
 int nfs4_discover_server_trunking(struct nfs_client *clp,
@@ -327,7 +383,6 @@
 int nfs40_discover_server_trunking(struct nfs_client *clp,
 			struct nfs_client **, struct rpc_cred *);
 #if defined(CONFIG_NFS_V4_1)
-struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
 int nfs41_discover_server_trunking(struct nfs_client *clp,
 			struct nfs_client **, struct rpc_cred *);
 extern void nfs4_schedule_session_recovery(struct nfs4_session *, int);
@@ -382,6 +437,7 @@
 extern bool nfs4_disable_idmapping;
 extern unsigned short max_session_slots;
 extern unsigned short send_implementation_id;
+extern bool recover_lost_locks;
 
 #define NFS4_CLIENT_ID_UNIQ_LEN		(64)
 extern char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN];
@@ -429,6 +485,8 @@
 
 #define nfs4_close_state(a, b) do { } while (0)
 #define nfs4_close_sync(a, b) do { } while (0)
+#define nfs4_state_protect(a, b, c, d) do { } while (0)
+#define nfs4_state_protect_write(a, b, c, d) do { } while (0)
 
 #endif /* CONFIG_NFS_V4 */
 #endif /* __LINUX_FS_NFS_NFS4_FS.H */
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 90dce91..a860ab5 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -41,19 +41,138 @@
 }
 
 #ifdef CONFIG_NFS_V4_1
-static void nfs4_shutdown_session(struct nfs_client *clp)
+/**
+ * Per auth flavor data server rpc clients
+ */
+struct nfs4_ds_server {
+	struct list_head	list;   /* ds_clp->cl_ds_clients */
+	struct rpc_clnt		*rpc_clnt;
+};
+
+/**
+ * Common lookup case for DS I/O
+ */
+static struct nfs4_ds_server *
+nfs4_find_ds_client(struct nfs_client *ds_clp, rpc_authflavor_t flavor)
+{
+	struct nfs4_ds_server *dss;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(dss, &ds_clp->cl_ds_clients, list) {
+		if (dss->rpc_clnt->cl_auth->au_flavor != flavor)
+			continue;
+		goto out;
+	}
+	dss = NULL;
+out:
+	rcu_read_unlock();
+	return dss;
+}
+
+static struct nfs4_ds_server *
+nfs4_add_ds_client(struct nfs_client *ds_clp, rpc_authflavor_t flavor,
+			   struct nfs4_ds_server *new)
+{
+	struct nfs4_ds_server *dss;
+
+	spin_lock(&ds_clp->cl_lock);
+	list_for_each_entry(dss, &ds_clp->cl_ds_clients, list) {
+		if (dss->rpc_clnt->cl_auth->au_flavor != flavor)
+			continue;
+		goto out;
+	}
+	if (new)
+		list_add_rcu(&new->list, &ds_clp->cl_ds_clients);
+	dss = new;
+out:
+	spin_unlock(&ds_clp->cl_lock); /* need some lock to protect list */
+	return dss;
+}
+
+static struct nfs4_ds_server *
+nfs4_alloc_ds_server(struct nfs_client *ds_clp, rpc_authflavor_t flavor)
+{
+	struct nfs4_ds_server *dss;
+
+	dss = kmalloc(sizeof(*dss), GFP_NOFS);
+	if (dss == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	dss->rpc_clnt = rpc_clone_client_set_auth(ds_clp->cl_rpcclient, flavor);
+	if (IS_ERR(dss->rpc_clnt)) {
+		int err = PTR_ERR(dss->rpc_clnt);
+		kfree (dss);
+		return ERR_PTR(err);
+	}
+	INIT_LIST_HEAD(&dss->list);
+
+	return dss;
+}
+
+static void
+nfs4_free_ds_server(struct nfs4_ds_server *dss)
+{
+	rpc_release_client(dss->rpc_clnt);
+	kfree(dss);
+}
+
+/**
+* Find or create a DS rpc client with th MDS server rpc client auth flavor
+* in the nfs_client cl_ds_clients list.
+*/
+struct rpc_clnt *
+nfs4_find_or_create_ds_client(struct nfs_client *ds_clp, struct inode *inode)
+{
+	struct nfs4_ds_server *dss, *new;
+	rpc_authflavor_t flavor = NFS_SERVER(inode)->client->cl_auth->au_flavor;
+
+	dss = nfs4_find_ds_client(ds_clp, flavor);
+	if (dss != NULL)
+		goto out;
+	new = nfs4_alloc_ds_server(ds_clp, flavor);
+	if (IS_ERR(new))
+		return ERR_CAST(new);
+	dss = nfs4_add_ds_client(ds_clp, flavor, new);
+	if (dss != new)
+		nfs4_free_ds_server(new);
+out:
+	return dss->rpc_clnt;
+}
+EXPORT_SYMBOL_GPL(nfs4_find_or_create_ds_client);
+
+static void
+nfs4_shutdown_ds_clients(struct nfs_client *clp)
+{
+	struct nfs4_ds_server *dss;
+	LIST_HEAD(shutdown_list);
+
+	while (!list_empty(&clp->cl_ds_clients)) {
+		dss = list_entry(clp->cl_ds_clients.next,
+					struct nfs4_ds_server, list);
+		list_del(&dss->list);
+		rpc_shutdown_client(dss->rpc_clnt);
+		kfree (dss);
+	}
+}
+
+void nfs41_shutdown_client(struct nfs_client *clp)
 {
 	if (nfs4_has_session(clp)) {
+		nfs4_shutdown_ds_clients(clp);
 		nfs4_destroy_session(clp->cl_session);
 		nfs4_destroy_clientid(clp);
 	}
 
 }
-#else /* CONFIG_NFS_V4_1 */
-static void nfs4_shutdown_session(struct nfs_client *clp)
+#endif	/* CONFIG_NFS_V4_1 */
+
+void nfs40_shutdown_client(struct nfs_client *clp)
 {
+	if (clp->cl_slot_tbl) {
+		nfs4_release_slot_table(clp->cl_slot_tbl);
+		kfree(clp->cl_slot_tbl);
+	}
 }
-#endif /* CONFIG_NFS_V4_1 */
 
 struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
 {
@@ -73,6 +192,7 @@
 
 	spin_lock_init(&clp->cl_lock);
 	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
+	INIT_LIST_HEAD(&clp->cl_ds_clients);
 	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
 	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
 	clp->cl_minorversion = cl_init->minorversion;
@@ -97,7 +217,7 @@
 {
 	if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
 		nfs4_kill_renewd(clp);
-	nfs4_shutdown_session(clp);
+	clp->cl_mvops->shutdown_client(clp);
 	nfs4_destroy_callback(clp);
 	if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
 		nfs_idmap_delete(clp);
@@ -144,34 +264,77 @@
 	return 0;
 }
 
+/**
+ * nfs40_init_client - nfs_client initialization tasks for NFSv4.0
+ * @clp - nfs_client to initialize
+ *
+ * Returns zero on success, or a negative errno if some error occurred.
+ */
+int nfs40_init_client(struct nfs_client *clp)
+{
+	struct nfs4_slot_table *tbl;
+	int ret;
+
+	tbl = kzalloc(sizeof(*tbl), GFP_NOFS);
+	if (tbl == NULL)
+		return -ENOMEM;
+
+	ret = nfs4_setup_slot_table(tbl, NFS4_MAX_SLOT_TABLE,
+					"NFSv4.0 transport Slot table");
+	if (ret) {
+		kfree(tbl);
+		return ret;
+	}
+
+	clp->cl_slot_tbl = tbl;
+	return 0;
+}
+
+#if defined(CONFIG_NFS_V4_1)
+
+/**
+ * nfs41_init_client - nfs_client initialization tasks for NFSv4.1+
+ * @clp - nfs_client to initialize
+ *
+ * Returns zero on success, or a negative errno if some error occurred.
+ */
+int nfs41_init_client(struct nfs_client *clp)
+{
+	struct nfs4_session *session = NULL;
+
+	/*
+	 * Create the session and mark it expired.
+	 * When a SEQUENCE operation encounters the expired session
+	 * it will do session recovery to initialize it.
+	 */
+	session = nfs4_alloc_session(clp);
+	if (!session)
+		return -ENOMEM;
+
+	clp->cl_session = session;
+
+	/*
+	 * The create session reply races with the server back
+	 * channel probe. Mark the client NFS_CS_SESSION_INITING
+	 * so that the client back channel can find the
+	 * nfs_client struct
+	 */
+	nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING);
+	return 0;
+}
+
+#endif	/* CONFIG_NFS_V4_1 */
+
 /*
  * Initialize the minor version specific parts of an NFS4 client record
  */
 static int nfs4_init_client_minor_version(struct nfs_client *clp)
 {
-#if defined(CONFIG_NFS_V4_1)
-	if (clp->cl_mvops->minor_version) {
-		struct nfs4_session *session = NULL;
-		/*
-		 * Create the session and mark it expired.
-		 * When a SEQUENCE operation encounters the expired session
-		 * it will do session recovery to initialize it.
-		 */
-		session = nfs4_alloc_session(clp);
-		if (!session)
-			return -ENOMEM;
+	int ret;
 
-		clp->cl_session = session;
-		/*
-		 * The create session reply races with the server back
-		 * channel probe. Mark the client NFS_CS_SESSION_INITING
-		 * so that the client back channel can find the
-		 * nfs_client struct
-		 */
-		nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING);
-	}
-#endif /* CONFIG_NFS_V4_1 */
-
+	ret = clp->cl_mvops->init_client(clp);
+	if (ret)
+		return ret;
 	return nfs4_init_callback(clp);
 }
 
@@ -187,8 +350,7 @@
  */
 struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 				    const struct rpc_timeout *timeparms,
-				    const char *ip_addr,
-				    rpc_authflavor_t authflavour)
+				    const char *ip_addr)
 {
 	char buf[INET6_ADDRSTRLEN + 1];
 	struct nfs_client *old;
@@ -723,7 +885,7 @@
 }
 
 static int nfs4_server_common_setup(struct nfs_server *server,
-		struct nfs_fh *mntfh)
+		struct nfs_fh *mntfh, bool auth_probe)
 {
 	struct nfs_fattr *fattr;
 	int error;
@@ -755,7 +917,7 @@
 
 
 	/* Probe the root fh to retrieve its FSID and filehandle */
-	error = nfs4_get_rootfh(server, mntfh);
+	error = nfs4_get_rootfh(server, mntfh, auth_probe);
 	if (error < 0)
 		goto out;
 
@@ -787,6 +949,7 @@
 static int nfs4_init_server(struct nfs_server *server,
 		const struct nfs_parsed_mount_data *data)
 {
+	rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
 	struct rpc_timeout timeparms;
 	int error;
 
@@ -799,13 +962,16 @@
 	server->flags = data->flags;
 	server->options = data->options;
 
+	if (data->auth_flavor_len >= 1)
+		pseudoflavor = data->auth_flavors[0];
+
 	/* Get a client record */
 	error = nfs4_set_client(server,
 			data->nfs_server.hostname,
 			(const struct sockaddr *)&data->nfs_server.address,
 			data->nfs_server.addrlen,
 			data->client_address,
-			data->auth_flavors[0],
+			pseudoflavor,
 			data->nfs_server.protocol,
 			&timeparms,
 			data->minorversion,
@@ -825,7 +991,7 @@
 
 	server->port = data->nfs_server.port;
 
-	error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]);
+	error = nfs_init_server_rpcclient(server, &timeparms, pseudoflavor);
 
 error:
 	/* Done */
@@ -843,6 +1009,7 @@
 				      struct nfs_subversion *nfs_mod)
 {
 	struct nfs_server *server;
+	bool auth_probe;
 	int error;
 
 	dprintk("--> nfs4_create_server()\n");
@@ -851,12 +1018,14 @@
 	if (!server)
 		return ERR_PTR(-ENOMEM);
 
+	auth_probe = mount_info->parsed->auth_flavor_len < 1;
+
 	/* set up the general RPC client */
 	error = nfs4_init_server(server, mount_info->parsed);
 	if (error < 0)
 		goto error;
 
-	error = nfs4_server_common_setup(server, mount_info->mntfh);
+	error = nfs4_server_common_setup(server, mount_info->mntfh, auth_probe);
 	if (error < 0)
 		goto error;
 
@@ -909,7 +1078,8 @@
 	if (error < 0)
 		goto error;
 
-	error = nfs4_server_common_setup(server, mntfh);
+	error = nfs4_server_common_setup(server, mntfh,
+			!(parent_server->flags & NFS_MOUNT_SECFLAVOUR));
 	if (error < 0)
 		goto error;
 
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 17ed87e..b86464b 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -39,6 +39,7 @@
 #include "internal.h"
 #include "delegation.h"
 #include "nfs4filelayout.h"
+#include "nfs4trace.h"
 
 #define NFSDBG_FACILITY         NFSDBG_PNFS_LD
 
@@ -247,6 +248,7 @@
 	struct nfs_pgio_header *hdr = data->header;
 	int err;
 
+	trace_nfs4_pnfs_read(data, task->tk_status);
 	err = filelayout_async_handle_error(task, data->args.context->state,
 					    data->ds_clp, hdr->lseg);
 
@@ -363,6 +365,7 @@
 	struct nfs_pgio_header *hdr = data->header;
 	int err;
 
+	trace_nfs4_pnfs_write(data, task->tk_status);
 	err = filelayout_async_handle_error(task, data->args.context->state,
 					    data->ds_clp, hdr->lseg);
 
@@ -395,6 +398,7 @@
 {
 	int err;
 
+	trace_nfs4_pnfs_commit_ds(data, task->tk_status);
 	err = filelayout_async_handle_error(task, NULL, data->ds_clp,
 					    data->lseg);
 
@@ -524,6 +528,7 @@
 	struct nfs_pgio_header *hdr = data->header;
 	struct pnfs_layout_segment *lseg = hdr->lseg;
 	struct nfs4_pnfs_ds *ds;
+	struct rpc_clnt *ds_clnt;
 	loff_t offset = data->args.offset;
 	u32 j, idx;
 	struct nfs_fh *fh;
@@ -538,6 +543,11 @@
 	ds = nfs4_fl_prepare_ds(lseg, idx);
 	if (!ds)
 		return PNFS_NOT_ATTEMPTED;
+
+	ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, hdr->inode);
+	if (IS_ERR(ds_clnt))
+		return PNFS_NOT_ATTEMPTED;
+
 	dprintk("%s USE DS: %s cl_count %d\n", __func__,
 		ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
 
@@ -552,7 +562,7 @@
 	data->mds_offset = offset;
 
 	/* Perform an asynchronous read to ds */
-	nfs_initiate_read(ds->ds_clp->cl_rpcclient, data,
+	nfs_initiate_read(ds_clnt, data,
 				  &filelayout_read_call_ops, RPC_TASK_SOFTCONN);
 	return PNFS_ATTEMPTED;
 }
@@ -564,6 +574,7 @@
 	struct nfs_pgio_header *hdr = data->header;
 	struct pnfs_layout_segment *lseg = hdr->lseg;
 	struct nfs4_pnfs_ds *ds;
+	struct rpc_clnt *ds_clnt;
 	loff_t offset = data->args.offset;
 	u32 j, idx;
 	struct nfs_fh *fh;
@@ -574,6 +585,11 @@
 	ds = nfs4_fl_prepare_ds(lseg, idx);
 	if (!ds)
 		return PNFS_NOT_ATTEMPTED;
+
+	ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, hdr->inode);
+	if (IS_ERR(ds_clnt))
+		return PNFS_NOT_ATTEMPTED;
+
 	dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
 		__func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
 		offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
@@ -591,7 +607,7 @@
 	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
 
 	/* Perform an asynchronous write */
-	nfs_initiate_write(ds->ds_clp->cl_rpcclient, data,
+	nfs_initiate_write(ds_clnt, data,
 				    &filelayout_write_call_ops, sync,
 				    RPC_TASK_SOFTCONN);
 	return PNFS_ATTEMPTED;
@@ -1101,16 +1117,19 @@
 {
 	struct pnfs_layout_segment *lseg = data->lseg;
 	struct nfs4_pnfs_ds *ds;
+	struct rpc_clnt *ds_clnt;
 	u32 idx;
 	struct nfs_fh *fh;
 
 	idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
 	ds = nfs4_fl_prepare_ds(lseg, idx);
-	if (!ds) {
-		prepare_to_resend_writes(data);
-		filelayout_commit_release(data);
-		return -EAGAIN;
-	}
+	if (!ds)
+		goto out_err;
+
+	ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, data->inode);
+	if (IS_ERR(ds_clnt))
+		goto out_err;
+
 	dprintk("%s ino %lu, how %d cl_count %d\n", __func__,
 		data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count));
 	data->commit_done_cb = filelayout_commit_done_cb;
@@ -1119,9 +1138,13 @@
 	fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
 	if (fh)
 		data->args.fh = fh;
-	return nfs_initiate_commit(ds->ds_clp->cl_rpcclient, data,
+	return nfs_initiate_commit(ds_clnt, data,
 				   &filelayout_commit_call_ops, how,
 				   RPC_TASK_SOFTCONN);
+out_err:
+	prepare_to_resend_writes(data);
+	filelayout_commit_release(data);
+	return -EAGAIN;
 }
 
 static int
diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c
index 549462e..c0b3a16 100644
--- a/fs/nfs/nfs4getroot.c
+++ b/fs/nfs/nfs4getroot.c
@@ -9,7 +9,7 @@
 
 #define NFSDBG_FACILITY		NFSDBG_CLIENT
 
-int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
+int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_probe)
 {
 	struct nfs_fsinfo fsinfo;
 	int ret = -ENOMEM;
@@ -21,7 +21,7 @@
 		goto out;
 
 	/* Start by getting the root filehandle from the server */
-	ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo);
+	ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo, auth_probe);
 	if (ret < 0) {
 		dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret);
 		goto out;
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index cdb0b41..2288cd3 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -11,6 +11,7 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/sunrpc/clnt.h>
@@ -369,21 +370,33 @@
 struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
 			       struct nfs_fh *fh, struct nfs_fattr *fattr)
 {
+	rpc_authflavor_t flavor = server->client->cl_auth->au_flavor;
 	struct dentry *parent = dget_parent(dentry);
+	struct inode *dir = parent->d_inode;
+	struct qstr *name = &dentry->d_name;
 	struct rpc_clnt *client;
 	struct vfsmount *mnt;
 
 	/* Look it up again to get its attributes and sec flavor */
-	client = nfs4_proc_lookup_mountpoint(parent->d_inode, &dentry->d_name, fh, fattr);
+	client = nfs4_proc_lookup_mountpoint(dir, name, fh, fattr);
 	dput(parent);
 	if (IS_ERR(client))
 		return ERR_CAST(client);
 
-	if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
+	if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
 		mnt = nfs_do_refmount(client, dentry);
-	else
-		mnt = nfs_do_submount(dentry, fh, fattr, client->cl_auth->au_flavor);
+		goto out;
+	}
 
+	if (client->cl_auth->au_flavor != flavor)
+		flavor = client->cl_auth->au_flavor;
+	else if (!(server->flags & NFS_MOUNT_SECFLAVOUR)) {
+		rpc_authflavor_t new = nfs4_negotiate_security(dir, name);
+		if ((int)new >= 0)
+			flavor = new;
+	}
+	mnt = nfs_do_submount(dentry, fh, fattr, flavor);
+out:
 	rpc_shutdown_client(client);
 	return mnt;
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 108a774..39b6cf2 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -66,6 +66,8 @@
 #include "nfs4session.h"
 #include "fscache.h"
 
+#include "nfs4trace.h"
+
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
 #define NFS4_POLL_RETRY_MIN	(HZ/10)
@@ -150,6 +152,7 @@
 	case -NFS4ERR_RECALLCONFLICT:
 		return -EREMOTEIO;
 	case -NFS4ERR_WRONGSEC:
+	case -NFS4ERR_WRONG_CRED:
 		return -EPERM;
 	case -NFS4ERR_BADOWNER:
 	case -NFS4ERR_BADNAME:
@@ -433,6 +436,20 @@
 	return ret;
 }
 
+/*
+ * Return 'true' if 'clp' is using an rpc_client that is integrity protected
+ * or 'false' otherwise.
+ */
+static bool _nfs4_is_integrity_protected(struct nfs_client *clp)
+{
+	rpc_authflavor_t flavor = clp->cl_rpcclient->cl_auth->au_flavor;
+
+	if (flavor == RPC_AUTH_GSS_KRB5I ||
+	    flavor == RPC_AUTH_GSS_KRB5P)
+		return true;
+
+	return false;
+}
 
 static void do_renew_lease(struct nfs_client *clp, unsigned long timestamp)
 {
@@ -447,6 +464,88 @@
 	do_renew_lease(server->nfs_client, timestamp);
 }
 
+struct nfs4_call_sync_data {
+	const struct nfs_server *seq_server;
+	struct nfs4_sequence_args *seq_args;
+	struct nfs4_sequence_res *seq_res;
+};
+
+static void nfs4_init_sequence(struct nfs4_sequence_args *args,
+			       struct nfs4_sequence_res *res, int cache_reply)
+{
+	args->sa_slot = NULL;
+	args->sa_cache_this = cache_reply;
+	args->sa_privileged = 0;
+
+	res->sr_slot = NULL;
+}
+
+static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args)
+{
+	args->sa_privileged = 1;
+}
+
+static int nfs40_setup_sequence(const struct nfs_server *server,
+				struct nfs4_sequence_args *args,
+				struct nfs4_sequence_res *res,
+				struct rpc_task *task)
+{
+	struct nfs4_slot_table *tbl = server->nfs_client->cl_slot_tbl;
+	struct nfs4_slot *slot;
+
+	/* slot already allocated? */
+	if (res->sr_slot != NULL)
+		goto out_start;
+
+	spin_lock(&tbl->slot_tbl_lock);
+	if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged)
+		goto out_sleep;
+
+	slot = nfs4_alloc_slot(tbl);
+	if (IS_ERR(slot)) {
+		if (slot == ERR_PTR(-ENOMEM))
+			task->tk_timeout = HZ >> 2;
+		goto out_sleep;
+	}
+	spin_unlock(&tbl->slot_tbl_lock);
+
+	args->sa_slot = slot;
+	res->sr_slot = slot;
+
+out_start:
+	rpc_call_start(task);
+	return 0;
+
+out_sleep:
+	if (args->sa_privileged)
+		rpc_sleep_on_priority(&tbl->slot_tbl_waitq, task,
+				NULL, RPC_PRIORITY_PRIVILEGED);
+	else
+		rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
+	spin_unlock(&tbl->slot_tbl_lock);
+	return -EAGAIN;
+}
+
+static int nfs40_sequence_done(struct rpc_task *task,
+			       struct nfs4_sequence_res *res)
+{
+	struct nfs4_slot *slot = res->sr_slot;
+	struct nfs4_slot_table *tbl;
+
+	if (!RPC_WAS_SENT(task))
+		goto out;
+
+	tbl = slot->table;
+	spin_lock(&tbl->slot_tbl_lock);
+	if (!nfs41_wake_and_assign_slot(tbl, slot))
+		nfs4_free_slot(tbl, slot);
+	spin_unlock(&tbl->slot_tbl_lock);
+
+	res->sr_slot = NULL;
+out:
+	return 1;
+}
+
 #if defined(CONFIG_NFS_V4_1)
 
 static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
@@ -506,6 +605,7 @@
 		interrupted = true;
 	}
 
+	trace_nfs4_sequence_done(session, res);
 	/* Check the SEQUENCE operation status */
 	switch (res->sr_status) {
 	case 0:
@@ -591,25 +691,11 @@
 {
 	if (res->sr_slot == NULL)
 		return 1;
+	if (!res->sr_slot->table->session)
+		return nfs40_sequence_done(task, res);
 	return nfs41_sequence_done(task, res);
 }
 
-static void nfs41_init_sequence(struct nfs4_sequence_args *args,
-		struct nfs4_sequence_res *res, int cache_reply)
-{
-	args->sa_slot = NULL;
-	args->sa_cache_this = 0;
-	args->sa_privileged = 0;
-	if (cache_reply)
-		args->sa_cache_this = 1;
-	res->sr_slot = NULL;
-}
-
-static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args)
-{
-	args->sa_privileged = 1;
-}
-
 int nfs41_setup_sequence(struct nfs4_session *session,
 				struct nfs4_sequence_args *args,
 				struct nfs4_sequence_res *res,
@@ -647,7 +733,7 @@
 
 	args->sa_slot = slot;
 
-	dprintk("<-- %s slotid=%d seqid=%d\n", __func__,
+	dprintk("<-- %s slotid=%u seqid=%u\n", __func__,
 			slot->slot_nr, slot->seq_nr);
 
 	res->sr_slot = slot;
@@ -658,6 +744,7 @@
 	 * set to 1 if an rpc level failure occurs.
 	 */
 	res->sr_status = 1;
+	trace_nfs4_setup_sequence(session, args);
 out_success:
 	rpc_call_start(task);
 	return 0;
@@ -673,38 +760,30 @@
 }
 EXPORT_SYMBOL_GPL(nfs41_setup_sequence);
 
-int nfs4_setup_sequence(const struct nfs_server *server,
-			struct nfs4_sequence_args *args,
-			struct nfs4_sequence_res *res,
-			struct rpc_task *task)
+static int nfs4_setup_sequence(const struct nfs_server *server,
+			       struct nfs4_sequence_args *args,
+			       struct nfs4_sequence_res *res,
+			       struct rpc_task *task)
 {
 	struct nfs4_session *session = nfs4_get_session(server);
 	int ret = 0;
 
-	if (session == NULL) {
-		rpc_call_start(task);
-		goto out;
-	}
+	if (!session)
+		return nfs40_setup_sequence(server, args, res, task);
 
-	dprintk("--> %s clp %p session %p sr_slot %d\n",
+	dprintk("--> %s clp %p session %p sr_slot %u\n",
 		__func__, session->clp, session, res->sr_slot ?
-			res->sr_slot->slot_nr : -1);
+			res->sr_slot->slot_nr : NFS4_NO_SLOT);
 
 	ret = nfs41_setup_sequence(session, args, res, task);
-out:
+
 	dprintk("<-- %s status=%d\n", __func__, ret);
 	return ret;
 }
 
-struct nfs41_call_sync_data {
-	const struct nfs_server *seq_server;
-	struct nfs4_sequence_args *seq_args;
-	struct nfs4_sequence_res *seq_res;
-};
-
 static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
 {
-	struct nfs41_call_sync_data *data = calldata;
+	struct nfs4_call_sync_data *data = calldata;
 	struct nfs4_session *session = nfs4_get_session(data->seq_server);
 
 	dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server);
@@ -714,7 +793,7 @@
 
 static void nfs41_call_sync_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs41_call_sync_data *data = calldata;
+	struct nfs4_call_sync_data *data = calldata;
 
 	nfs41_sequence_done(task, data->seq_res);
 }
@@ -724,6 +803,42 @@
 	.rpc_call_done = nfs41_call_sync_done,
 };
 
+#else	/* !CONFIG_NFS_V4_1 */
+
+static int nfs4_setup_sequence(const struct nfs_server *server,
+			       struct nfs4_sequence_args *args,
+			       struct nfs4_sequence_res *res,
+			       struct rpc_task *task)
+{
+	return nfs40_setup_sequence(server, args, res, task);
+}
+
+static int nfs4_sequence_done(struct rpc_task *task,
+			       struct nfs4_sequence_res *res)
+{
+	return nfs40_sequence_done(task, res);
+}
+
+#endif	/* !CONFIG_NFS_V4_1 */
+
+static void nfs40_call_sync_prepare(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_call_sync_data *data = calldata;
+	nfs4_setup_sequence(data->seq_server,
+				data->seq_args, data->seq_res, task);
+}
+
+static void nfs40_call_sync_done(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_call_sync_data *data = calldata;
+	nfs4_sequence_done(task, data->seq_res);
+}
+
+static const struct rpc_call_ops nfs40_call_sync_ops = {
+	.rpc_call_prepare = nfs40_call_sync_prepare,
+	.rpc_call_done = nfs40_call_sync_done,
+};
+
 static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
 				   struct nfs_server *server,
 				   struct rpc_message *msg,
@@ -732,7 +847,8 @@
 {
 	int ret;
 	struct rpc_task *task;
-	struct nfs41_call_sync_data data = {
+	struct nfs_client *clp = server->nfs_client;
+	struct nfs4_call_sync_data data = {
 		.seq_server = server,
 		.seq_args = args,
 		.seq_res = res,
@@ -740,7 +856,7 @@
 	struct rpc_task_setup task_setup = {
 		.rpc_client = clnt,
 		.rpc_message = msg,
-		.callback_ops = &nfs41_call_sync_ops,
+		.callback_ops = clp->cl_mvops->call_sync_ops,
 		.callback_data = &data
 	};
 
@@ -754,35 +870,6 @@
 	return ret;
 }
 
-#else
-static
-void nfs41_init_sequence(struct nfs4_sequence_args *args,
-		struct nfs4_sequence_res *res, int cache_reply)
-{
-}
-
-static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args)
-{
-}
-
-
-static int nfs4_sequence_done(struct rpc_task *task,
-			       struct nfs4_sequence_res *res)
-{
-	return 1;
-}
-#endif /* CONFIG_NFS_V4_1 */
-
-static
-int _nfs4_call_sync(struct rpc_clnt *clnt,
-		    struct nfs_server *server,
-		    struct rpc_message *msg,
-		    struct nfs4_sequence_args *args,
-		    struct nfs4_sequence_res *res)
-{
-	return rpc_call_sync(clnt, msg, 0);
-}
-
 static
 int nfs4_call_sync(struct rpc_clnt *clnt,
 		   struct nfs_server *server,
@@ -791,9 +878,8 @@
 		   struct nfs4_sequence_res *res,
 		   int cache_reply)
 {
-	nfs41_init_sequence(args, res, cache_reply);
-	return server->nfs_client->cl_mvops->call_sync(clnt, server, msg,
-						args, res);
+	nfs4_init_sequence(args, res, cache_reply);
+	return nfs4_call_sync_sequence(clnt, server, msg, args, res);
 }
 
 static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
@@ -933,7 +1019,7 @@
 		p->o_arg.fh = NFS_FH(dentry->d_inode);
 	}
 	if (attrs != NULL && attrs->ia_valid != 0) {
-		__be32 verf[2];
+		__u32 verf[2];
 
 		p->o_arg.u.attrs = &p->attrs;
 		memcpy(&p->attrs, attrs, sizeof(p->attrs));
@@ -1103,7 +1189,7 @@
 		goto no_delegation;
 
 	spin_lock(&deleg_cur->lock);
-	if (nfsi->delegation != deleg_cur ||
+	if (rcu_dereference(nfsi->delegation) != deleg_cur ||
 	   test_bit(NFS_DELEGATION_RETURNING, &deleg_cur->flags) ||
 	    (deleg_cur->type & fmode) != fmode)
 		goto no_delegation_unlock;
@@ -1440,6 +1526,7 @@
 	int err;
 	do {
 		err = _nfs4_do_open_reclaim(ctx, state);
+		trace_nfs4_open_reclaim(ctx, 0, err);
 		if (nfs4_clear_cap_atomic_open_v1(server, err, &exception))
 			continue;
 		if (err != -NFS4ERR_DELAY)
@@ -1524,10 +1611,20 @@
 	return nfs4_handle_delegation_recall_error(server, state, stateid, err);
 }
 
+static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_opendata *data = calldata;
+
+	nfs40_setup_sequence(data->o_arg.server, &data->o_arg.seq_args,
+				&data->o_res.seq_res, task);
+}
+
 static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_opendata *data = calldata;
 
+	nfs40_sequence_done(task, &data->o_res.seq_res);
+
 	data->rpc_status = task->tk_status;
 	if (data->rpc_status == 0) {
 		nfs4_stateid_copy(&data->o_res.stateid, &data->c_res.stateid);
@@ -1556,6 +1653,7 @@
 }
 
 static const struct rpc_call_ops nfs4_open_confirm_ops = {
+	.rpc_call_prepare = nfs4_open_confirm_prepare,
 	.rpc_call_done = nfs4_open_confirm_done,
 	.rpc_release = nfs4_open_confirm_release,
 };
@@ -1583,6 +1681,7 @@
 	};
 	int status;
 
+	nfs4_init_sequence(&data->o_arg.seq_args, &data->o_res.seq_res, 1);
 	kref_get(&data->kref);
 	data->rpc_done = 0;
 	data->rpc_status = 0;
@@ -1742,7 +1841,7 @@
 	};
 	int status;
 
-	nfs41_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1);
+	nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1);
 	kref_get(&data->kref);
 	data->rpc_done = 0;
 	data->rpc_status = 0;
@@ -1895,6 +1994,7 @@
 
 	do {
 		err = _nfs4_open_expired(ctx, state);
+		trace_nfs4_open_expired(ctx, 0, err);
 		if (nfs4_clear_cap_atomic_open_v1(server, err, &exception))
 			continue;
 		switch (err) {
@@ -1944,6 +2044,7 @@
 		cred = get_rpccred(delegation->cred);
 		rcu_read_unlock();
 		status = nfs41_test_stateid(server, stateid, cred);
+		trace_nfs4_test_delegation_stateid(state, NULL, status);
 	} else
 		rcu_read_unlock();
 
@@ -1986,6 +2087,7 @@
 		return -NFS4ERR_BAD_STATEID;
 
 	status = nfs41_test_stateid(server, stateid, cred);
+	trace_nfs4_test_open_stateid(state, NULL, status);
 	if (status != NFS_OK) {
 		/* Free the stateid unless the server explicitly
 		 * informs us the stateid is unrecognized. */
@@ -2197,6 +2299,7 @@
 	do {
 		status = _nfs4_do_open(dir, ctx, flags, sattr, label);
 		res = ctx->state;
+		trace_nfs4_open_file(ctx, flags, status);
 		if (status == 0)
 			break;
 		/* NOTE: BAD_SEQID means the server and client disagree about the
@@ -2310,6 +2413,7 @@
 	int err;
 	do {
 		err = _nfs4_do_setattr(inode, cred, fattr, sattr, state, ilabel, olabel);
+		trace_nfs4_setattr(inode, err);
 		switch (err) {
 		case -NFS4ERR_OPENMODE:
 			if (!(sattr->ia_valid & ATTR_SIZE)) {
@@ -2387,6 +2491,7 @@
 	dprintk("%s: begin!\n", __func__);
 	if (!nfs4_sequence_done(task, &calldata->res.seq_res))
 		return;
+	trace_nfs4_close(state, &calldata->arg, &calldata->res, task->tk_status);
         /* hmm. we are done with the inode, and in the process of freeing
 	 * the state_owner. we keep this around to process errors
 	 */
@@ -2511,10 +2616,13 @@
 	};
 	int status = -ENOMEM;
 
+	nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_CLEANUP,
+		&task_setup_data.rpc_client, &msg);
+
 	calldata = kzalloc(sizeof(*calldata), gfp_mask);
 	if (calldata == NULL)
 		goto out;
-	nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1);
+	nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1);
 	calldata->inode = state->inode;
 	calldata->state = state;
 	calldata->arg.fh = NFS_FH(state->inode);
@@ -2690,6 +2798,7 @@
 	int err;
 	do {
 		err = _nfs4_lookup_root(server, fhandle, info);
+		trace_nfs4_lookup_root(server, fhandle, info->fattr, err);
 		switch (err) {
 		case 0:
 		case -NFS4ERR_WRONGSEC:
@@ -2705,10 +2814,13 @@
 static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
 				struct nfs_fsinfo *info, rpc_authflavor_t flavor)
 {
+	struct rpc_auth_create_args auth_args = {
+		.pseudoflavor = flavor,
+	};
 	struct rpc_auth *auth;
 	int ret;
 
-	auth = rpcauth_create(flavor, server->client);
+	auth = rpcauth_create(&auth_args, server->client);
 	if (IS_ERR(auth)) {
 		ret = -EACCES;
 		goto out;
@@ -2772,18 +2884,27 @@
  * @server: initialized nfs_server handle
  * @fhandle: we fill in the pseudo-fs root file handle
  * @info: we fill in an FSINFO struct
+ * @auth_probe: probe the auth flavours
  *
  * Returns zero on success, or a negative errno.
  */
 int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle,
-			 struct nfs_fsinfo *info)
+			 struct nfs_fsinfo *info,
+			 bool auth_probe)
 {
 	int status;
 
-	status = nfs4_lookup_root(server, fhandle, info);
-	if ((status == -NFS4ERR_WRONGSEC) &&
-	    !(server->flags & NFS_MOUNT_SECFLAVOUR))
+	switch (auth_probe) {
+	case false:
+		status = nfs4_lookup_root(server, fhandle, info);
+		if (status != -NFS4ERR_WRONGSEC)
+			break;
+		/* Did user force a 'sec=' mount option? */
+		if (server->flags & NFS_MOUNT_SECFLAVOUR)
+			break;
+	default:
 		status = nfs4_do_find_root_sec(server, fhandle, info);
+	}
 
 	if (status == 0)
 		status = nfs4_server_capabilities(server, fhandle);
@@ -2899,8 +3020,9 @@
 	struct nfs4_exception exception = { };
 	int err;
 	do {
-		err = nfs4_handle_exception(server,
-				_nfs4_proc_getattr(server, fhandle, fattr, label),
+		err = _nfs4_proc_getattr(server, fhandle, fattr, label);
+		trace_nfs4_getattr(server, fhandle, fattr, err);
+		err = nfs4_handle_exception(server, err,
 				&exception);
 	} while (exception.retry);
 	return err;
@@ -2940,10 +3062,10 @@
 	
 	/* Deal with open(O_TRUNC) */
 	if (sattr->ia_valid & ATTR_OPEN)
-		sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
+		sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME);
 
 	/* Optimization: if the end result is no change, don't RPC */
-	if ((sattr->ia_valid & ~(ATTR_FILE)) == 0)
+	if ((sattr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0)
 		return 0;
 
 	/* Search for an existing open(O_WRITE) file */
@@ -3020,6 +3142,7 @@
 	int err;
 	do {
 		err = _nfs4_proc_lookup(client, dir, name, fhandle, fattr, label);
+		trace_nfs4_lookup(dir, name, err);
 		switch (err) {
 		case -NFS4ERR_BADNAME:
 			err = -ENOENT;
@@ -3031,7 +3154,9 @@
 			err = -EPERM;
 			if (client != *clnt)
 				goto out;
-
+			/* No security negotiation if the user specified 'sec=' */
+			if (NFS_SERVER(dir)->flags & NFS_MOUNT_SECFLAVOUR)
+				goto out;
 			client = nfs4_create_sec_client(client, dir, name);
 			if (IS_ERR(client))
 				return PTR_ERR(client);
@@ -3134,8 +3259,9 @@
 	struct nfs4_exception exception = { };
 	int err;
 	do {
-		err = nfs4_handle_exception(NFS_SERVER(inode),
-				_nfs4_proc_access(inode, entry),
+		err = _nfs4_proc_access(inode, entry);
+		trace_nfs4_access(inode, err);
+		err = nfs4_handle_exception(NFS_SERVER(inode), err,
 				&exception);
 	} while (exception.retry);
 	return err;
@@ -3188,8 +3314,9 @@
 	struct nfs4_exception exception = { };
 	int err;
 	do {
-		err = nfs4_handle_exception(NFS_SERVER(inode),
-				_nfs4_proc_readlink(inode, page, pgbase, pglen),
+		err = _nfs4_proc_readlink(inode, page, pgbase, pglen);
+		trace_nfs4_readlink(inode, err);
+		err = nfs4_handle_exception(NFS_SERVER(inode), err,
 				&exception);
 	} while (exception.retry);
 	return err;
@@ -3253,8 +3380,9 @@
 	struct nfs4_exception exception = { };
 	int err;
 	do {
-		err = nfs4_handle_exception(NFS_SERVER(dir),
-				_nfs4_proc_remove(dir, name),
+		err = _nfs4_proc_remove(dir, name);
+		trace_nfs4_remove(dir, name, err);
+		err = nfs4_handle_exception(NFS_SERVER(dir), err,
 				&exception);
 	} while (exception.retry);
 	return err;
@@ -3268,7 +3396,7 @@
 
 	res->server = server;
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
-	nfs41_init_sequence(&args->seq_args, &res->seq_res, 1);
+	nfs4_init_sequence(&args->seq_args, &res->seq_res, 1);
 
 	nfs_fattr_init(res->dir_attr);
 }
@@ -3283,7 +3411,8 @@
 
 static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
 {
-	struct nfs_removeres *res = task->tk_msg.rpc_resp;
+	struct nfs_unlinkdata *data = task->tk_calldata;
+	struct nfs_removeres *res = &data->res;
 
 	if (!nfs4_sequence_done(task, &res->seq_res))
 		return 0;
@@ -3301,7 +3430,7 @@
 
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME];
 	res->server = server;
-	nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1);
+	nfs4_init_sequence(&arg->seq_args, &res->seq_res, 1);
 }
 
 static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data)
@@ -3315,7 +3444,8 @@
 static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
 				 struct inode *new_dir)
 {
-	struct nfs_renameres *res = task->tk_msg.rpc_resp;
+	struct nfs_renamedata *data = task->tk_calldata;
+	struct nfs_renameres *res = &data->res;
 
 	if (!nfs4_sequence_done(task, &res->seq_res))
 		return 0;
@@ -3361,9 +3491,10 @@
 	struct nfs4_exception exception = { };
 	int err;
 	do {
-		err = nfs4_handle_exception(NFS_SERVER(old_dir),
-				_nfs4_proc_rename(old_dir, old_name,
-					new_dir, new_name),
+		err = _nfs4_proc_rename(old_dir, old_name,
+					new_dir, new_name);
+		trace_nfs4_rename(old_dir, old_name, new_dir, new_name, err);
+		err = nfs4_handle_exception(NFS_SERVER(old_dir), err,
 				&exception);
 	} while (exception.retry);
 	return err;
@@ -3525,9 +3656,9 @@
 	label = nfs4_label_init_security(dir, dentry, sattr, &l);
 
 	do {
-		err = nfs4_handle_exception(NFS_SERVER(dir),
-				_nfs4_proc_symlink(dir, dentry, page,
-							len, sattr, label),
+		err = _nfs4_proc_symlink(dir, dentry, page, len, sattr, label);
+		trace_nfs4_symlink(dir, &dentry->d_name, err);
+		err = nfs4_handle_exception(NFS_SERVER(dir), err,
 				&exception);
 	} while (exception.retry);
 
@@ -3564,8 +3695,9 @@
 
 	sattr->ia_mode &= ~current_umask();
 	do {
-		err = nfs4_handle_exception(NFS_SERVER(dir),
-				_nfs4_proc_mkdir(dir, dentry, sattr, label),
+		err = _nfs4_proc_mkdir(dir, dentry, sattr, label);
+		trace_nfs4_mkdir(dir, &dentry->d_name, err);
+		err = nfs4_handle_exception(NFS_SERVER(dir), err,
 				&exception);
 	} while (exception.retry);
 	nfs4_label_release_security(label);
@@ -3618,9 +3750,10 @@
 	struct nfs4_exception exception = { };
 	int err;
 	do {
-		err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode),
-				_nfs4_proc_readdir(dentry, cred, cookie,
-					pages, count, plus),
+		err = _nfs4_proc_readdir(dentry, cred, cookie,
+				pages, count, plus);
+		trace_nfs4_readdir(dentry->d_inode, err);
+		err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode), err,
 				&exception);
 	} while (exception.retry);
 	return err;
@@ -3672,8 +3805,9 @@
 
 	sattr->ia_mode &= ~current_umask();
 	do {
-		err = nfs4_handle_exception(NFS_SERVER(dir),
-				_nfs4_proc_mknod(dir, dentry, sattr, label, rdev),
+		err = _nfs4_proc_mknod(dir, dentry, sattr, label, rdev);
+		trace_nfs4_mknod(dir, &dentry->d_name, err);
+		err = nfs4_handle_exception(NFS_SERVER(dir), err,
 				&exception);
 	} while (exception.retry);
 
@@ -3741,6 +3875,7 @@
 
 	do {
 		err = _nfs4_do_fsinfo(server, fhandle, fsinfo);
+		trace_nfs4_fsinfo(server, fhandle, fsinfo->fattr, err);
 		if (err == 0) {
 			struct nfs_client *clp = server->nfs_client;
 
@@ -3859,6 +3994,7 @@
 {
 	struct nfs_server *server = NFS_SERVER(data->header->inode);
 
+	trace_nfs4_read(data, task->tk_status);
 	if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
 		rpc_restart_call_prepare(task);
 		return -EAGAIN;
@@ -3902,24 +4038,29 @@
 	data->timestamp   = jiffies;
 	data->read_done_cb = nfs4_read_done_cb;
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
-	nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
+	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
 }
 
-static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
 {
 	if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
 			&data->args.seq_args,
 			&data->res.seq_res,
 			task))
-		return;
-	nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
-			data->args.lock_context, FMODE_READ);
+		return 0;
+	if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
+				data->args.lock_context, FMODE_READ) == -EIO)
+		return -EIO;
+	if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
+		return -EIO;
+	return 0;
 }
 
 static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
 {
 	struct inode *inode = data->header->inode;
 	
+	trace_nfs4_write(data, task->tk_status);
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
 		rpc_restart_call_prepare(task);
 		return -EAGAIN;
@@ -3985,18 +4126,22 @@
 	data->timestamp   = jiffies;
 
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
-	nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
+	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
 }
 
-static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
 {
 	if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
 			&data->args.seq_args,
 			&data->res.seq_res,
 			task))
-		return;
-	nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
-			data->args.lock_context, FMODE_WRITE);
+		return 0;
+	if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
+				data->args.lock_context, FMODE_WRITE) == -EIO)
+		return -EIO;
+	if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
+		return -EIO;
+	return 0;
 }
 
 static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
@@ -4011,6 +4156,7 @@
 {
 	struct inode *inode = data->inode;
 
+	trace_nfs4_commit(data, task->tk_status);
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
 		rpc_restart_call_prepare(task);
 		return -EAGAIN;
@@ -4033,7 +4179,7 @@
 		data->commit_done_cb = nfs4_commit_done_cb;
 	data->res.server = server;
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
-	nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
+	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
 }
 
 struct nfs4_renewdata {
@@ -4062,6 +4208,7 @@
 	struct nfs_client *clp = data->client;
 	unsigned long timestamp = data->timestamp;
 
+	trace_nfs4_renew_async(clp, task->tk_status);
 	if (task->tk_status < 0) {
 		/* Unless we're shutting down, schedule state recovery! */
 		if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) == 0)
@@ -4319,6 +4466,7 @@
 	ssize_t ret;
 	do {
 		ret = __nfs4_get_acl_uncached(inode, buf, buflen);
+		trace_nfs4_get_acl(inode, ret);
 		if (ret >= 0)
 			break;
 		ret = nfs4_handle_exception(NFS_SERVER(inode), ret, &exception);
@@ -4398,8 +4546,9 @@
 	struct nfs4_exception exception = { };
 	int err;
 	do {
-		err = nfs4_handle_exception(NFS_SERVER(inode),
-				__nfs4_proc_set_acl(inode, buf, buflen),
+		err = __nfs4_proc_set_acl(inode, buf, buflen);
+		trace_nfs4_set_acl(inode, err);
+		err = nfs4_handle_exception(NFS_SERVER(inode), err,
 				&exception);
 	} while (exception.retry);
 	return err;
@@ -4452,8 +4601,9 @@
 		return -EOPNOTSUPP;
 
 	do {
-		err = nfs4_handle_exception(NFS_SERVER(inode),
-				_nfs4_get_security_label(inode, buf, buflen),
+		err = _nfs4_get_security_label(inode, buf, buflen);
+		trace_nfs4_get_security_label(inode, err);
+		err = nfs4_handle_exception(NFS_SERVER(inode), err,
 				&exception);
 	} while (exception.retry);
 	return err;
@@ -4505,9 +4655,10 @@
 	int err;
 
 	do {
-		err = nfs4_handle_exception(NFS_SERVER(inode),
-				_nfs4_do_set_security_label(inode, ilabel,
-				fattr, olabel),
+		err = _nfs4_do_set_security_label(inode, ilabel,
+				fattr, olabel);
+		trace_nfs4_set_security_label(inode, err);
+		err = nfs4_handle_exception(NFS_SERVER(inode), err,
 				&exception);
 	} while (exception.retry);
 	return err;
@@ -4630,11 +4781,11 @@
 		/* An impossible timestamp guarantees this value
 		 * will never match a generated boot time. */
 		verf[0] = 0;
-		verf[1] = (__be32)(NSEC_PER_SEC + 1);
+		verf[1] = cpu_to_be32(NSEC_PER_SEC + 1);
 	} else {
 		struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
-		verf[0] = (__be32)nn->boot_time.tv_sec;
-		verf[1] = (__be32)nn->boot_time.tv_nsec;
+		verf[0] = cpu_to_be32(nn->boot_time.tv_sec);
+		verf[1] = cpu_to_be32(nn->boot_time.tv_nsec);
 	}
 	memcpy(bootverf->data, verf, sizeof(bootverf->data));
 }
@@ -4660,10 +4811,14 @@
 nfs4_init_uniform_client_string(const struct nfs_client *clp,
 				char *buf, size_t len)
 {
-	char *nodename = clp->cl_rpcclient->cl_nodename;
+	const char *nodename = clp->cl_rpcclient->cl_nodename;
 
 	if (nfs4_client_id_uniquifier[0] != '\0')
-		nodename = nfs4_client_id_uniquifier;
+		return scnprintf(buf, len, "Linux NFSv%u.%u %s/%s",
+				clp->rpc_ops->version,
+				clp->cl_minorversion,
+				nfs4_client_id_uniquifier,
+				nodename);
 	return scnprintf(buf, len, "Linux NFSv%u.%u %s",
 				clp->rpc_ops->version, clp->cl_minorversion,
 				nodename);
@@ -4724,6 +4879,7 @@
 		clp->cl_rpcclient->cl_auth->au_ops->au_name,
 		setclientid.sc_name_len, setclientid.sc_name);
 	status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+	trace_nfs4_setclientid(clp, status);
 	dprintk("NFS reply setclientid: %d\n", status);
 	return status;
 }
@@ -4751,6 +4907,7 @@
 		clp->cl_rpcclient->cl_auth->au_ops->au_name,
 		clp->cl_clientid);
 	status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+	trace_nfs4_setclientid_confirm(clp, status);
 	dprintk("NFS reply setclientid_confirm: %d\n", status);
 	return status;
 }
@@ -4772,6 +4929,7 @@
 	if (!nfs4_sequence_done(task, &data->res.seq_res))
 		return;
 
+	trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status);
 	switch (task->tk_status) {
 	case -NFS4ERR_STALE_STATEID:
 	case -NFS4ERR_EXPIRED:
@@ -4793,7 +4951,6 @@
 	kfree(calldata);
 }
 
-#if defined(CONFIG_NFS_V4_1)
 static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
 {
 	struct nfs4_delegreturndata *d_data;
@@ -4805,12 +4962,9 @@
 			&d_data->res.seq_res,
 			task);
 }
-#endif /* CONFIG_NFS_V4_1 */
 
 static const struct rpc_call_ops nfs4_delegreturn_ops = {
-#if defined(CONFIG_NFS_V4_1)
 	.rpc_call_prepare = nfs4_delegreturn_prepare,
-#endif /* CONFIG_NFS_V4_1 */
 	.rpc_call_done = nfs4_delegreturn_done,
 	.rpc_release = nfs4_delegreturn_release,
 };
@@ -4835,7 +4989,7 @@
 	data = kzalloc(sizeof(*data), GFP_NOFS);
 	if (data == NULL)
 		return -ENOMEM;
-	nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
+	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
 	data->args.fhandle = &data->fh;
 	data->args.stateid = &data->stateid;
 	data->args.bitmask = server->cache_consistency_bitmask;
@@ -4875,6 +5029,7 @@
 	int err;
 	do {
 		err = _nfs4_proc_delegreturn(inode, cred, stateid, issync);
+		trace_nfs4_delegreturn(inode, err);
 		switch (err) {
 			case -NFS4ERR_STALE_STATEID:
 			case -NFS4ERR_EXPIRED:
@@ -4949,8 +5104,9 @@
 	int err;
 
 	do {
-		err = nfs4_handle_exception(NFS_SERVER(state->inode),
-				_nfs4_proc_getlk(state, cmd, request),
+		err = _nfs4_proc_getlk(state, cmd, request);
+		trace_nfs4_get_lock(request, state, cmd, err);
+		err = nfs4_handle_exception(NFS_SERVER(state->inode), err,
 				&exception);
 	} while (exception.retry);
 	return err;
@@ -5087,6 +5243,9 @@
 		.flags = RPC_TASK_ASYNC,
 	};
 
+	nfs4_state_protect(NFS_SERVER(lsp->ls_state->inode)->nfs_client,
+		NFS_SP4_MACH_CRED_CLEANUP, &task_setup_data.rpc_client, &msg);
+
 	/* Ensure this is an unlock - when canceling a lock, the
 	 * canceled lock is passed in, and it won't be an unlock.
 	 */
@@ -5098,7 +5257,7 @@
 		return ERR_PTR(-ENOMEM);
 	}
 
-	nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1);
+	nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1);
 	msg.rpc_argp = &data->arg;
 	msg.rpc_resp = &data->res;
 	task_setup_data.callback_data = data;
@@ -5148,6 +5307,7 @@
 	rpc_put_task(task);
 out:
 	request->fl_flags = fl_flags;
+	trace_nfs4_unlock(request, state, F_SETLK, status);
 	return status;
 }
 
@@ -5333,7 +5493,7 @@
 		return -ENOMEM;
 	if (IS_SETLKW(cmd))
 		data->arg.block = 1;
-	nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1);
+	nfs4_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1);
 	msg.rpc_argp = &data->arg;
 	msg.rpc_resp = &data->res;
 	task_setup_data.callback_data = data;
@@ -5371,6 +5531,7 @@
 		if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
 			return 0;
 		err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM);
+		trace_nfs4_lock_reclaim(request, state, F_SETLK, err);
 		if (err != -NFS4ERR_DELAY)
 			break;
 		nfs4_handle_exception(server, err, &exception);
@@ -5389,10 +5550,15 @@
 	err = nfs4_set_lock_state(state, request);
 	if (err != 0)
 		return err;
+	if (!recover_lost_locks) {
+		set_bit(NFS_LOCK_LOST, &request->fl_u.nfs4_fl.owner->ls_flags);
+		return 0;
+	}
 	do {
 		if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
 			return 0;
 		err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_EXPIRED);
+		trace_nfs4_lock_expired(request, state, F_SETLK, err);
 		switch (err) {
 		default:
 			goto out;
@@ -5428,6 +5594,7 @@
 			status = nfs41_test_stateid(server,
 					&lsp->ls_stateid,
 					cred);
+			trace_nfs4_test_lock_stateid(state, lsp, status);
 			if (status != NFS_OK) {
 				/* Free the stateid unless the server
 				 * informs us the stateid is unrecognized. */
@@ -5515,6 +5682,7 @@
 
 	do {
 		err = _nfs4_proc_setlk(state, cmd, request);
+		trace_nfs4_set_lock(request, state, cmd, err);
 		if (err == -NFS4ERR_DENIED)
 			err = -EAGAIN;
 		err = nfs4_handle_exception(NFS_SERVER(state->inode),
@@ -5597,8 +5765,23 @@
 	struct nfs4_lock_state *lsp;
 	struct nfs_server *server;
 	struct nfs_release_lockowner_args args;
+	struct nfs4_sequence_args seq_args;
+	struct nfs4_sequence_res seq_res;
 };
 
+static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata)
+{
+	struct nfs_release_lockowner_data *data = calldata;
+	nfs40_setup_sequence(data->server,
+				&data->seq_args, &data->seq_res, task);
+}
+
+static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata)
+{
+	struct nfs_release_lockowner_data *data = calldata;
+	nfs40_sequence_done(task, &data->seq_res);
+}
+
 static void nfs4_release_lockowner_release(void *calldata)
 {
 	struct nfs_release_lockowner_data *data = calldata;
@@ -5607,6 +5790,8 @@
 }
 
 static const struct rpc_call_ops nfs4_release_lockowner_ops = {
+	.rpc_call_prepare = nfs4_release_lockowner_prepare,
+	.rpc_call_done = nfs4_release_lockowner_done,
 	.rpc_release = nfs4_release_lockowner_release,
 };
 
@@ -5619,14 +5804,17 @@
 
 	if (server->nfs_client->cl_mvops->minor_version != 0)
 		return -EINVAL;
+
 	data = kmalloc(sizeof(*data), GFP_NOFS);
 	if (!data)
 		return -ENOMEM;
+	nfs4_init_sequence(&data->seq_args, &data->seq_res, 0);
 	data->lsp = lsp;
 	data->server = server;
 	data->args.lock_owner.clientid = server->nfs_client->cl_clientid;
 	data->args.lock_owner.id = lsp->ls_seqid.owner_id;
 	data->args.lock_owner.s_dev = server->s_dev;
+
 	msg.rpc_argp = &data->args;
 	rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data);
 	return 0;
@@ -5781,14 +5969,23 @@
 	struct nfs4_exception exception = { };
 	int err;
 	do {
-		err = nfs4_handle_exception(NFS_SERVER(dir),
-				_nfs4_proc_fs_locations(client, dir, name, fs_locations, page),
+		err = _nfs4_proc_fs_locations(client, dir, name,
+				fs_locations, page);
+		trace_nfs4_get_fs_locations(dir, name, err);
+		err = nfs4_handle_exception(NFS_SERVER(dir), err,
 				&exception);
 	} while (exception.retry);
 	return err;
 }
 
-static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors)
+/**
+ * If 'use_integrity' is true and the state managment nfs_client
+ * cl_rpcclient is using krb5i/p, use the integrity protected cl_rpcclient
+ * and the machine credential as per RFC3530bis and RFC5661 Security
+ * Considerations sections. Otherwise, just use the user cred with the
+ * filesystem's rpc_client.
+ */
+static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors, bool use_integrity)
 {
 	int status;
 	struct nfs4_secinfo_arg args = {
@@ -5803,10 +6000,25 @@
 		.rpc_argp = &args,
 		.rpc_resp = &res,
 	};
+	struct rpc_clnt *clnt = NFS_SERVER(dir)->client;
+
+	if (use_integrity) {
+		clnt = NFS_SERVER(dir)->nfs_client->cl_rpcclient;
+		msg.rpc_cred = nfs4_get_clid_cred(NFS_SERVER(dir)->nfs_client);
+	}
 
 	dprintk("NFS call  secinfo %s\n", name->name);
-	status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
+
+	nfs4_state_protect(NFS_SERVER(dir)->nfs_client,
+		NFS_SP4_MACH_CRED_SECINFO, &clnt, &msg);
+
+	status = nfs4_call_sync(clnt, NFS_SERVER(dir), &msg, &args.seq_args,
+				&res.seq_res, 0);
 	dprintk("NFS reply  secinfo: %d\n", status);
+
+	if (msg.rpc_cred)
+		put_rpccred(msg.rpc_cred);
+
 	return status;
 }
 
@@ -5816,8 +6028,23 @@
 	struct nfs4_exception exception = { };
 	int err;
 	do {
-		err = nfs4_handle_exception(NFS_SERVER(dir),
-				_nfs4_proc_secinfo(dir, name, flavors),
+		err = -NFS4ERR_WRONGSEC;
+
+		/* try to use integrity protection with machine cred */
+		if (_nfs4_is_integrity_protected(NFS_SERVER(dir)->nfs_client))
+			err = _nfs4_proc_secinfo(dir, name, flavors, true);
+
+		/*
+		 * if unable to use integrity protection, or SECINFO with
+		 * integrity protection returns NFS4ERR_WRONGSEC (which is
+		 * disallowed by spec, but exists in deployed servers) use
+		 * the current filesystem's rpc_client and the user cred.
+		 */
+		if (err == -NFS4ERR_WRONGSEC)
+			err = _nfs4_proc_secinfo(dir, name, flavors, false);
+
+		trace_nfs4_secinfo(dir, name, err);
+		err = nfs4_handle_exception(NFS_SERVER(dir), err,
 				&exception);
 	} while (exception.retry);
 	return err;
@@ -5881,6 +6108,7 @@
 	}
 
 	status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+	trace_nfs4_bind_conn_to_session(clp, status);
 	if (status == 0) {
 		if (memcmp(res.session->sess_id.data,
 		    clp->cl_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) {
@@ -5909,16 +6137,124 @@
 }
 
 /*
- * nfs4_proc_exchange_id()
- *
- * Returns zero, a negative errno, or a negative NFS4ERR status code.
- *
- * Since the clientid has expired, all compounds using sessions
- * associated with the stale clientid will be returning
- * NFS4ERR_BADSESSION in the sequence operation, and will therefore
- * be in some phase of session reset.
+ * Minimum set of SP4_MACH_CRED operations from RFC 5661 in the enforce map
+ * and operations we'd like to see to enable certain features in the allow map
  */
-int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
+static const struct nfs41_state_protection nfs4_sp4_mach_cred_request = {
+	.how = SP4_MACH_CRED,
+	.enforce.u.words = {
+		[1] = 1 << (OP_BIND_CONN_TO_SESSION - 32) |
+		      1 << (OP_EXCHANGE_ID - 32) |
+		      1 << (OP_CREATE_SESSION - 32) |
+		      1 << (OP_DESTROY_SESSION - 32) |
+		      1 << (OP_DESTROY_CLIENTID - 32)
+	},
+	.allow.u.words = {
+		[0] = 1 << (OP_CLOSE) |
+		      1 << (OP_LOCKU),
+		[1] = 1 << (OP_SECINFO - 32) |
+		      1 << (OP_SECINFO_NO_NAME - 32) |
+		      1 << (OP_TEST_STATEID - 32) |
+		      1 << (OP_FREE_STATEID - 32)
+	}
+};
+
+/*
+ * Select the state protection mode for client `clp' given the server results
+ * from exchange_id in `sp'.
+ *
+ * Returns 0 on success, negative errno otherwise.
+ */
+static int nfs4_sp4_select_mode(struct nfs_client *clp,
+				 struct nfs41_state_protection *sp)
+{
+	static const u32 supported_enforce[NFS4_OP_MAP_NUM_WORDS] = {
+		[1] = 1 << (OP_BIND_CONN_TO_SESSION - 32) |
+		      1 << (OP_EXCHANGE_ID - 32) |
+		      1 << (OP_CREATE_SESSION - 32) |
+		      1 << (OP_DESTROY_SESSION - 32) |
+		      1 << (OP_DESTROY_CLIENTID - 32)
+	};
+	unsigned int i;
+
+	if (sp->how == SP4_MACH_CRED) {
+		/* Print state protect result */
+		dfprintk(MOUNT, "Server SP4_MACH_CRED support:\n");
+		for (i = 0; i <= LAST_NFS4_OP; i++) {
+			if (test_bit(i, sp->enforce.u.longs))
+				dfprintk(MOUNT, "  enforce op %d\n", i);
+			if (test_bit(i, sp->allow.u.longs))
+				dfprintk(MOUNT, "  allow op %d\n", i);
+		}
+
+		/* make sure nothing is on enforce list that isn't supported */
+		for (i = 0; i < NFS4_OP_MAP_NUM_WORDS; i++) {
+			if (sp->enforce.u.words[i] & ~supported_enforce[i]) {
+				dfprintk(MOUNT, "sp4_mach_cred: disabled\n");
+				return -EINVAL;
+			}
+		}
+
+		/*
+		 * Minimal mode - state operations are allowed to use machine
+		 * credential.  Note this already happens by default, so the
+		 * client doesn't have to do anything more than the negotiation.
+		 *
+		 * NOTE: we don't care if EXCHANGE_ID is in the list -
+		 *       we're already using the machine cred for exchange_id
+		 *       and will never use a different cred.
+		 */
+		if (test_bit(OP_BIND_CONN_TO_SESSION, sp->enforce.u.longs) &&
+		    test_bit(OP_CREATE_SESSION, sp->enforce.u.longs) &&
+		    test_bit(OP_DESTROY_SESSION, sp->enforce.u.longs) &&
+		    test_bit(OP_DESTROY_CLIENTID, sp->enforce.u.longs)) {
+			dfprintk(MOUNT, "sp4_mach_cred:\n");
+			dfprintk(MOUNT, "  minimal mode enabled\n");
+			set_bit(NFS_SP4_MACH_CRED_MINIMAL, &clp->cl_sp4_flags);
+		} else {
+			dfprintk(MOUNT, "sp4_mach_cred: disabled\n");
+			return -EINVAL;
+		}
+
+		if (test_bit(OP_CLOSE, sp->allow.u.longs) &&
+		    test_bit(OP_LOCKU, sp->allow.u.longs)) {
+			dfprintk(MOUNT, "  cleanup mode enabled\n");
+			set_bit(NFS_SP4_MACH_CRED_CLEANUP, &clp->cl_sp4_flags);
+		}
+
+		if (test_bit(OP_SECINFO, sp->allow.u.longs) &&
+		    test_bit(OP_SECINFO_NO_NAME, sp->allow.u.longs)) {
+			dfprintk(MOUNT, "  secinfo mode enabled\n");
+			set_bit(NFS_SP4_MACH_CRED_SECINFO, &clp->cl_sp4_flags);
+		}
+
+		if (test_bit(OP_TEST_STATEID, sp->allow.u.longs) &&
+		    test_bit(OP_FREE_STATEID, sp->allow.u.longs)) {
+			dfprintk(MOUNT, "  stateid mode enabled\n");
+			set_bit(NFS_SP4_MACH_CRED_STATEID, &clp->cl_sp4_flags);
+		}
+
+		if (test_bit(OP_WRITE, sp->allow.u.longs)) {
+			dfprintk(MOUNT, "  write mode enabled\n");
+			set_bit(NFS_SP4_MACH_CRED_WRITE, &clp->cl_sp4_flags);
+		}
+
+		if (test_bit(OP_COMMIT, sp->allow.u.longs)) {
+			dfprintk(MOUNT, "  commit mode enabled\n");
+			set_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags);
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * _nfs4_proc_exchange_id()
+ *
+ * Wrapper for EXCHANGE_ID operation.
+ */
+static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
+	u32 sp4_how)
 {
 	nfs4_verifier verifier;
 	struct nfs41_exchange_id_args args = {
@@ -5965,10 +6301,30 @@
 		goto out_server_scope;
 	}
 
+	switch (sp4_how) {
+	case SP4_NONE:
+		args.state_protect.how = SP4_NONE;
+		break;
+
+	case SP4_MACH_CRED:
+		args.state_protect = nfs4_sp4_mach_cred_request;
+		break;
+
+	default:
+		/* unsupported! */
+		WARN_ON_ONCE(1);
+		status = -EINVAL;
+		goto out_server_scope;
+	}
+
 	status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+	trace_nfs4_exchange_id(clp, status);
 	if (status == 0)
 		status = nfs4_check_cl_exchange_flags(res.flags);
 
+	if (status == 0)
+		status = nfs4_sp4_select_mode(clp, &res.state_protect);
+
 	if (status == 0) {
 		clp->cl_clientid = res.clientid;
 		clp->cl_exchange_flags = (res.flags & ~EXCHGID4_FLAG_CONFIRMED_R);
@@ -6015,6 +6371,35 @@
 	return status;
 }
 
+/*
+ * nfs4_proc_exchange_id()
+ *
+ * Returns zero, a negative errno, or a negative NFS4ERR status code.
+ *
+ * Since the clientid has expired, all compounds using sessions
+ * associated with the stale clientid will be returning
+ * NFS4ERR_BADSESSION in the sequence operation, and will therefore
+ * be in some phase of session reset.
+ *
+ * Will attempt to negotiate SP4_MACH_CRED if krb5i / krb5p auth is used.
+ */
+int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
+{
+	rpc_authflavor_t authflavor = clp->cl_rpcclient->cl_auth->au_flavor;
+	int status;
+
+	/* try SP4_MACH_CRED if krb5i/p	*/
+	if (authflavor == RPC_AUTH_GSS_KRB5I ||
+	    authflavor == RPC_AUTH_GSS_KRB5P) {
+		status = _nfs4_proc_exchange_id(clp, cred, SP4_MACH_CRED);
+		if (!status)
+			return 0;
+	}
+
+	/* try SP4_NONE */
+	return _nfs4_proc_exchange_id(clp, cred, SP4_NONE);
+}
+
 static int _nfs4_proc_destroy_clientid(struct nfs_client *clp,
 		struct rpc_cred *cred)
 {
@@ -6026,6 +6411,7 @@
 	int status;
 
 	status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+	trace_nfs4_destroy_clientid(clp, status);
 	if (status)
 		dprintk("NFS: Got error %d from the server %s on "
 			"DESTROY_CLIENTID.", status, clp->cl_hostname);
@@ -6063,7 +6449,7 @@
 		goto out;
 	if (clp->cl_preserve_clid)
 		goto out;
-	cred = nfs4_get_exchange_id_cred(clp);
+	cred = nfs4_get_clid_cred(clp);
 	ret = nfs4_proc_destroy_clientid(clp, cred);
 	if (cred)
 		put_rpccred(cred);
@@ -6155,7 +6541,7 @@
 	};
 	int status;
 
-	nfs41_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0);
+	nfs4_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0);
 	nfs4_set_sequence_privileged(&args.la_seq_args);
 	dprintk("--> %s\n", __func__);
 	task = rpc_run_task(&task_setup);
@@ -6289,6 +6675,7 @@
 	args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN);
 
 	status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+	trace_nfs4_create_session(clp, status);
 
 	if (!status) {
 		/* Verify the session's negotiated channel_attrs values */
@@ -6352,6 +6739,7 @@
 		return status;
 
 	status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+	trace_nfs4_destroy_session(session->clp, status);
 
 	if (status)
 		dprintk("NFS: Got error %d from the server on DESTROY_SESSION. "
@@ -6401,6 +6789,7 @@
 	if (!nfs41_sequence_done(task, task->tk_msg.rpc_resp))
 		return;
 
+	trace_nfs4_sequence(clp, task->tk_status);
 	if (task->tk_status < 0) {
 		dprintk("%s ERROR %d\n", __func__, task->tk_status);
 		if (atomic_read(&clp->cl_count) == 1)
@@ -6458,7 +6847,7 @@
 		nfs_put_client(clp);
 		return ERR_PTR(-ENOMEM);
 	}
-	nfs41_init_sequence(&calldata->args, &calldata->res, 0);
+	nfs4_init_sequence(&calldata->args, &calldata->res, 0);
 	if (is_privileged)
 		nfs4_set_sequence_privileged(&calldata->args);
 	msg.rpc_argp = &calldata->args;
@@ -6553,6 +6942,7 @@
 	if (!nfs41_sequence_done(task, res))
 		return;
 
+	trace_nfs4_reclaim_complete(clp, task->tk_status);
 	if (nfs41_reclaim_complete_handle_errors(task, clp) == -EAGAIN) {
 		rpc_restart_call_prepare(task);
 		return;
@@ -6600,7 +6990,7 @@
 	calldata->clp = clp;
 	calldata->arg.one_fs = 0;
 
-	nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0);
+	nfs4_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0);
 	nfs4_set_sequence_privileged(&calldata->arg.seq_args);
 	msg.rpc_argp = &calldata->arg;
 	msg.rpc_resp = &calldata->res;
@@ -6791,7 +7181,7 @@
 
 	lgp->res.layoutp = &lgp->args.layout;
 	lgp->res.seq_res.sr_slot = NULL;
-	nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
+	nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
 
 	/* nfs4_layoutget_release calls pnfs_put_layout_hdr */
 	pnfs_get_layout_hdr(NFS_I(inode)->layout);
@@ -6802,6 +7192,10 @@
 	status = nfs4_wait_for_completion_rpc_task(task);
 	if (status == 0)
 		status = task->tk_status;
+	trace_nfs4_layoutget(lgp->args.ctx,
+			&lgp->args.range,
+			&lgp->res.range,
+			status);
 	/* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
 	if (status == 0 && lgp->res.layoutp->len)
 		lseg = pnfs_layout_process(lgp);
@@ -6874,7 +7268,7 @@
 		.rpc_cred = lrp->cred,
 	};
 	struct rpc_task_setup task_setup_data = {
-		.rpc_client = lrp->clp->cl_rpcclient,
+		.rpc_client = NFS_SERVER(lrp->args.inode)->client,
 		.rpc_message = &msg,
 		.callback_ops = &nfs4_layoutreturn_call_ops,
 		.callback_data = lrp,
@@ -6882,11 +7276,12 @@
 	int status;
 
 	dprintk("--> %s\n", __func__);
-	nfs41_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1);
+	nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1);
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
 	status = task->tk_status;
+	trace_nfs4_layoutreturn(lrp->args.inode, status);
 	dprintk("<-- %s status=%d\n", __func__, status);
 	rpc_put_task(task);
 	return status;
@@ -7063,7 +7458,7 @@
 		data->args.lastbytewritten,
 		data->args.inode->i_ino);
 
-	nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
+	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
@@ -7073,15 +7468,21 @@
 	if (status != 0)
 		goto out;
 	status = task->tk_status;
+	trace_nfs4_layoutcommit(data->args.inode, status);
 out:
 	dprintk("%s: status %d\n", __func__, status);
 	rpc_put_task(task);
 	return status;
 }
 
+/**
+ * Use the state managment nfs_client cl_rpcclient, which uses krb5i (if
+ * possible) as per RFC3530bis and RFC5661 Security Considerations sections
+ */
 static int
 _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
-		    struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors)
+		    struct nfs_fsinfo *info,
+		    struct nfs4_secinfo_flavors *flavors, bool use_integrity)
 {
 	struct nfs41_secinfo_no_name_args args = {
 		.style = SECINFO_STYLE_CURRENT_FH,
@@ -7094,7 +7495,23 @@
 		.rpc_argp = &args,
 		.rpc_resp = &res,
 	};
-	return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
+	struct rpc_clnt *clnt = server->client;
+	int status;
+
+	if (use_integrity) {
+		clnt = server->nfs_client->cl_rpcclient;
+		msg.rpc_cred = nfs4_get_clid_cred(server->nfs_client);
+	}
+
+	dprintk("--> %s\n", __func__);
+	status = nfs4_call_sync(clnt, server, &msg, &args.seq_args,
+				&res.seq_res, 0);
+	dprintk("<-- %s status=%d\n", __func__, status);
+
+	if (msg.rpc_cred)
+		put_rpccred(msg.rpc_cred);
+
+	return status;
 }
 
 static int
@@ -7104,7 +7521,24 @@
 	struct nfs4_exception exception = { };
 	int err;
 	do {
-		err = _nfs41_proc_secinfo_no_name(server, fhandle, info, flavors);
+		/* first try using integrity protection */
+		err = -NFS4ERR_WRONGSEC;
+
+		/* try to use integrity protection with machine cred */
+		if (_nfs4_is_integrity_protected(server->nfs_client))
+			err = _nfs41_proc_secinfo_no_name(server, fhandle, info,
+							  flavors, true);
+
+		/*
+		 * if unable to use integrity protection, or SECINFO with
+		 * integrity protection returns NFS4ERR_WRONGSEC (which is
+		 * disallowed by spec, but exists in deployed servers) use
+		 * the current filesystem's rpc_client and the user cred.
+		 */
+		if (err == -NFS4ERR_WRONGSEC)
+			err = _nfs41_proc_secinfo_no_name(server, fhandle, info,
+							  flavors, false);
+
 		switch (err) {
 		case 0:
 		case -NFS4ERR_WRONGSEC:
@@ -7174,11 +7608,15 @@
 		.rpc_resp = &res,
 		.rpc_cred = cred,
 	};
+	struct rpc_clnt *rpc_client = server->client;
+
+	nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_STATEID,
+		&rpc_client, &msg);
 
 	dprintk("NFS call  test_stateid %p\n", stateid);
-	nfs41_init_sequence(&args.seq_args, &res.seq_res, 0);
+	nfs4_init_sequence(&args.seq_args, &res.seq_res, 0);
 	nfs4_set_sequence_privileged(&args.seq_args);
-	status = nfs4_call_sync_sequence(server->client, server, &msg,
+	status = nfs4_call_sync_sequence(rpc_client, server, &msg,
 			&args.seq_args, &res.seq_res);
 	if (status != NFS_OK) {
 		dprintk("NFS reply test_stateid: failed, %d\n", status);
@@ -7247,7 +7685,7 @@
 	kfree(calldata);
 }
 
-const struct rpc_call_ops nfs41_free_stateid_ops = {
+static const struct rpc_call_ops nfs41_free_stateid_ops = {
 	.rpc_call_prepare = nfs41_free_stateid_prepare,
 	.rpc_call_done = nfs41_free_stateid_done,
 	.rpc_release = nfs41_free_stateid_release,
@@ -7270,6 +7708,9 @@
 	};
 	struct nfs_free_stateid_data *data;
 
+	nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_STATEID,
+		&task_setup.rpc_client, &msg);
+
 	dprintk("NFS call  free_stateid %p\n", stateid);
 	data = kmalloc(sizeof(*data), GFP_NOFS);
 	if (!data)
@@ -7281,7 +7722,7 @@
 
 	msg.rpc_argp = &data->args;
 	msg.rpc_resp = &data->res;
-	nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
+	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
 	if (privileged)
 		nfs4_set_sequence_privileged(&data->args.seq_args);
 
@@ -7357,7 +7798,6 @@
 	.recover_open	= nfs4_open_reclaim,
 	.recover_lock	= nfs4_lock_reclaim,
 	.establish_clid = nfs4_init_clientid,
-	.get_clid_cred	= nfs4_get_setclientid_cred,
 	.detect_trunking = nfs40_discover_server_trunking,
 };
 
@@ -7368,7 +7808,6 @@
 	.recover_open	= nfs4_open_reclaim,
 	.recover_lock	= nfs4_lock_reclaim,
 	.establish_clid = nfs41_init_clientid,
-	.get_clid_cred	= nfs4_get_exchange_id_cred,
 	.reclaim_complete = nfs41_proc_reclaim_complete,
 	.detect_trunking = nfs41_discover_server_trunking,
 };
@@ -7380,7 +7819,6 @@
 	.recover_open	= nfs4_open_expired,
 	.recover_lock	= nfs4_lock_expired,
 	.establish_clid = nfs4_init_clientid,
-	.get_clid_cred	= nfs4_get_setclientid_cred,
 };
 
 #if defined(CONFIG_NFS_V4_1)
@@ -7390,7 +7828,6 @@
 	.recover_open	= nfs41_open_expired,
 	.recover_lock	= nfs41_lock_expired,
 	.establish_clid = nfs41_init_clientid,
-	.get_clid_cred	= nfs4_get_exchange_id_cred,
 };
 #endif /* CONFIG_NFS_V4_1 */
 
@@ -7414,10 +7851,12 @@
 		| NFS_CAP_ATOMIC_OPEN
 		| NFS_CAP_CHANGE_ATTR
 		| NFS_CAP_POSIX_LOCK,
-	.call_sync = _nfs4_call_sync,
+	.init_client = nfs40_init_client,
+	.shutdown_client = nfs40_shutdown_client,
 	.match_stateid = nfs4_match_stateid,
 	.find_root_sec = nfs4_find_root_sec,
 	.free_lock_state = nfs4_release_lockowner,
+	.call_sync_ops = &nfs40_call_sync_ops,
 	.reboot_recovery_ops = &nfs40_reboot_recovery_ops,
 	.nograce_recovery_ops = &nfs40_nograce_recovery_ops,
 	.state_renewal_ops = &nfs40_state_renewal_ops,
@@ -7432,10 +7871,12 @@
 		| NFS_CAP_POSIX_LOCK
 		| NFS_CAP_STATEID_NFSV41
 		| NFS_CAP_ATOMIC_OPEN_V1,
-	.call_sync = nfs4_call_sync_sequence,
+	.init_client = nfs41_init_client,
+	.shutdown_client = nfs41_shutdown_client,
 	.match_stateid = nfs41_match_stateid,
 	.find_root_sec = nfs41_find_root_sec,
 	.free_lock_state = nfs41_free_lock_state,
+	.call_sync_ops = &nfs41_call_sync_ops,
 	.reboot_recovery_ops = &nfs41_reboot_recovery_ops,
 	.nograce_recovery_ops = &nfs41_nograce_recovery_ops,
 	.state_renewal_ops = &nfs41_state_renewal_ops,
@@ -7451,10 +7892,12 @@
 		| NFS_CAP_POSIX_LOCK
 		| NFS_CAP_STATEID_NFSV41
 		| NFS_CAP_ATOMIC_OPEN_V1,
-	.call_sync = nfs4_call_sync_sequence,
+	.init_client = nfs41_init_client,
+	.shutdown_client = nfs41_shutdown_client,
 	.match_stateid = nfs41_match_stateid,
 	.find_root_sec = nfs41_find_root_sec,
 	.free_lock_state = nfs41_free_lock_state,
+	.call_sync_ops = &nfs41_call_sync_ops,
 	.reboot_recovery_ops = &nfs41_reboot_recovery_ops,
 	.nograce_recovery_ops = &nfs41_nograce_recovery_ops,
 	.state_renewal_ops = &nfs41_state_renewal_ops,
@@ -7471,7 +7914,7 @@
 #endif
 };
 
-const struct inode_operations nfs4_dir_inode_operations = {
+static const struct inode_operations nfs4_dir_inode_operations = {
 	.create		= nfs_create,
 	.lookup		= nfs_lookup,
 	.atomic_open	= nfs_atomic_open,
diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c
index 36e21cb..cf883c7 100644
--- a/fs/nfs/nfs4session.c
+++ b/fs/nfs/nfs4session.c
@@ -23,6 +23,14 @@
 
 #define NFSDBG_FACILITY		NFSDBG_STATE
 
+static void nfs4_init_slot_table(struct nfs4_slot_table *tbl, const char *queue)
+{
+	tbl->highest_used_slotid = NFS4_NO_SLOT;
+	spin_lock_init(&tbl->slot_tbl_lock);
+	rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, queue);
+	init_completion(&tbl->complete);
+}
+
 /*
  * nfs4_shrink_slot_table - free retired slots from the slot table
  */
@@ -44,6 +52,17 @@
 	}
 }
 
+/**
+ * nfs4_slot_tbl_drain_complete - wake waiters when drain is complete
+ * @tbl - controlling slot table
+ *
+ */
+void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl)
+{
+	if (nfs4_slot_tbl_draining(tbl))
+		complete(&tbl->complete);
+}
+
 /*
  * nfs4_free_slot - free a slot and efficiently update slot table.
  *
@@ -76,7 +95,7 @@
 			nfs4_slot_tbl_drain_complete(tbl);
 		}
 	}
-	dprintk("%s: slotid %u highest_used_slotid %d\n", __func__,
+	dprintk("%s: slotid %u highest_used_slotid %u\n", __func__,
 		slotid, tbl->highest_used_slotid);
 }
 
@@ -146,9 +165,9 @@
 	ret->generation = tbl->generation;
 
 out:
-	dprintk("<-- %s used_slots=%04lx highest_used=%d slotid=%d \n",
+	dprintk("<-- %s used_slots=%04lx highest_used=%u slotid=%u\n",
 		__func__, tbl->used_slots[0], tbl->highest_used_slotid,
-		!IS_ERR(ret) ? ret->slot_nr : -1);
+		!IS_ERR(ret) ? ret->slot_nr : NFS4_NO_SLOT);
 	return ret;
 }
 
@@ -191,7 +210,7 @@
 {
 	int ret;
 
-	dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__,
+	dprintk("--> %s: max_reqs=%u, tbl->max_slots %u\n", __func__,
 		max_reqs, tbl->max_slots);
 
 	if (max_reqs > NFS4_MAX_SLOT_TABLE)
@@ -205,18 +224,36 @@
 	nfs4_reset_slot_table(tbl, max_reqs - 1, ivalue);
 	spin_unlock(&tbl->slot_tbl_lock);
 
-	dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
+	dprintk("%s: tbl=%p slots=%p max_slots=%u\n", __func__,
 		tbl, tbl->slots, tbl->max_slots);
 out:
 	dprintk("<-- %s: return %d\n", __func__, ret);
 	return ret;
 }
 
-/* Destroy the slot table */
-static void nfs4_destroy_slot_tables(struct nfs4_session *session)
+/**
+ * nfs4_release_slot_table - release resources attached to a slot table
+ * @tbl: slot table to shut down
+ *
+ */
+void nfs4_release_slot_table(struct nfs4_slot_table *tbl)
 {
-	nfs4_shrink_slot_table(&session->fc_slot_table, 0);
-	nfs4_shrink_slot_table(&session->bc_slot_table, 0);
+	nfs4_shrink_slot_table(tbl, 0);
+}
+
+/**
+ * nfs4_setup_slot_table - prepare a stand-alone slot table for use
+ * @tbl: slot table to set up
+ * @max_reqs: maximum number of requests allowed
+ * @queue: name to give RPC wait queue
+ *
+ * Returns zero on success, or a negative errno.
+ */
+int nfs4_setup_slot_table(struct nfs4_slot_table *tbl, unsigned int max_reqs,
+		const char *queue)
+{
+	nfs4_init_slot_table(tbl, queue);
+	return nfs4_realloc_slot_table(tbl, max_reqs, 0);
 }
 
 static bool nfs41_assign_slot(struct rpc_task *task, void *pslot)
@@ -273,6 +310,8 @@
 	}
 }
 
+#if defined(CONFIG_NFS_V4_1)
+
 static void nfs41_set_max_slotid_locked(struct nfs4_slot_table *tbl,
 		u32 target_highest_slotid)
 {
@@ -383,6 +422,12 @@
 	spin_unlock(&tbl->slot_tbl_lock);
 }
 
+static void nfs4_destroy_session_slot_tables(struct nfs4_session *session)
+{
+	nfs4_release_slot_table(&session->fc_slot_table);
+	nfs4_release_slot_table(&session->bc_slot_table);
+}
+
 /*
  * Initialize or reset the forechannel and backchannel tables
  */
@@ -405,31 +450,20 @@
 	if (status && tbl->slots == NULL)
 		/* Fore and back channel share a connection so get
 		 * both slot tables or neither */
-		nfs4_destroy_slot_tables(ses);
+		nfs4_destroy_session_slot_tables(ses);
 	return status;
 }
 
 struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
 {
 	struct nfs4_session *session;
-	struct nfs4_slot_table *tbl;
 
 	session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS);
 	if (!session)
 		return NULL;
 
-	tbl = &session->fc_slot_table;
-	tbl->highest_used_slotid = NFS4_NO_SLOT;
-	spin_lock_init(&tbl->slot_tbl_lock);
-	rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
-	init_completion(&tbl->complete);
-
-	tbl = &session->bc_slot_table;
-	tbl->highest_used_slotid = NFS4_NO_SLOT;
-	spin_lock_init(&tbl->slot_tbl_lock);
-	rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table");
-	init_completion(&tbl->complete);
-
+	nfs4_init_slot_table(&session->fc_slot_table, "ForeChannel Slot table");
+	nfs4_init_slot_table(&session->bc_slot_table, "BackChannel Slot table");
 	session->session_state = 1<<NFS4_SESSION_INITING;
 
 	session->clp = clp;
@@ -441,7 +475,7 @@
 	struct rpc_xprt *xprt;
 	struct rpc_cred *cred;
 
-	cred = nfs4_get_exchange_id_cred(session->clp);
+	cred = nfs4_get_clid_cred(session->clp);
 	nfs4_proc_destroy_session(session, cred);
 	if (cred)
 		put_rpccred(cred);
@@ -452,7 +486,7 @@
 	dprintk("%s Destroy backchannel for xprt %p\n",
 		__func__, xprt);
 	xprt_destroy_backchannel(xprt, NFS41_BC_MIN_CALLBACKS);
-	nfs4_destroy_slot_tables(session);
+	nfs4_destroy_session_slot_tables(session);
 	kfree(session);
 }
 
@@ -513,4 +547,4 @@
 }
 EXPORT_SYMBOL_GPL(nfs4_init_ds_session);
 
-
+#endif	/* defined(CONFIG_NFS_V4_1) */
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index 3a153d8..2323061 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -8,7 +8,7 @@
 #define __LINUX_FS_NFS_NFS4SESSION_H
 
 /* maximum number of slots to use */
-#define NFS4_DEF_SLOT_TABLE_SIZE (16U)
+#define NFS4_DEF_SLOT_TABLE_SIZE (64U)
 #define NFS4_MAX_SLOT_TABLE (1024U)
 #define NFS4_NO_SLOT ((u32)-1)
 
@@ -72,10 +72,22 @@
 	NFS4_SESSION_INITING,
 };
 
-#if defined(CONFIG_NFS_V4_1)
+extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl,
+		unsigned int max_reqs, const char *queue);
+extern void nfs4_release_slot_table(struct nfs4_slot_table *tbl);
 extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl);
 extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
+extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl);
+bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl,
+		struct nfs4_slot *slot);
+void nfs41_wake_slot_table(struct nfs4_slot_table *tbl);
 
+static inline bool nfs4_slot_tbl_draining(struct nfs4_slot_table *tbl)
+{
+	return !!test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state);
+}
+
+#if defined(CONFIG_NFS_V4_1)
 extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl,
 		u32 target_highest_slotid);
 extern void nfs41_update_target_slotid(struct nfs4_slot_table *tbl,
@@ -89,17 +101,6 @@
 extern int nfs4_init_session(struct nfs_client *clp);
 extern int nfs4_init_ds_session(struct nfs_client *, unsigned long);
 
-extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl);
-
-static inline bool nfs4_slot_tbl_draining(struct nfs4_slot_table *tbl)
-{
-	return !!test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state);
-}
-
-bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl,
-		struct nfs4_slot *slot);
-void nfs41_wake_slot_table(struct nfs4_slot_table *tbl);
-
 /*
  * Determine if sessions are in use.
  */
@@ -117,6 +118,16 @@
 	return 0;
 }
 
+#ifdef CONFIG_CRC32
+/*
+ * nfs_session_id_hash - calculate the crc32 hash for the session id
+ * @session - pointer to session
+ */
+#define nfs_session_id_hash(sess_id) \
+	(~crc32_le(0xFFFFFFFF, &(sess_id)->data[0], sizeof((sess_id)->data)))
+#else
+#define nfs_session_id_hash(session) (0)
+#endif
 #else /* defined(CONFIG_NFS_V4_1) */
 
 static inline int nfs4_init_session(struct nfs_client *clp)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e22862f..cc14cbb 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -154,6 +154,19 @@
 	return cred;
 }
 
+static void nfs4_root_machine_cred(struct nfs_client *clp)
+{
+	struct rpc_cred *cred, *new;
+
+	new = rpc_lookup_machine_cred(NULL);
+	spin_lock(&clp->cl_lock);
+	cred = clp->cl_machine_cred;
+	clp->cl_machine_cred = new;
+	spin_unlock(&clp->cl_lock);
+	if (cred != NULL)
+		put_rpccred(cred);
+}
+
 static struct rpc_cred *
 nfs4_get_renew_cred_server_locked(struct nfs_server *server)
 {
@@ -202,8 +215,61 @@
 	return cred;
 }
 
+static void nfs4_end_drain_slot_table(struct nfs4_slot_table *tbl)
+{
+	if (test_and_clear_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) {
+		spin_lock(&tbl->slot_tbl_lock);
+		nfs41_wake_slot_table(tbl);
+		spin_unlock(&tbl->slot_tbl_lock);
+	}
+}
+
+static void nfs4_end_drain_session(struct nfs_client *clp)
+{
+	struct nfs4_session *ses = clp->cl_session;
+
+	if (clp->cl_slot_tbl) {
+		nfs4_end_drain_slot_table(clp->cl_slot_tbl);
+		return;
+	}
+
+	if (ses != NULL) {
+		nfs4_end_drain_slot_table(&ses->bc_slot_table);
+		nfs4_end_drain_slot_table(&ses->fc_slot_table);
+	}
+}
+
 #if defined(CONFIG_NFS_V4_1)
 
+static int nfs4_drain_slot_tbl(struct nfs4_slot_table *tbl)
+{
+	set_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state);
+	spin_lock(&tbl->slot_tbl_lock);
+	if (tbl->highest_used_slotid != NFS4_NO_SLOT) {
+		INIT_COMPLETION(tbl->complete);
+		spin_unlock(&tbl->slot_tbl_lock);
+		return wait_for_completion_interruptible(&tbl->complete);
+	}
+	spin_unlock(&tbl->slot_tbl_lock);
+	return 0;
+}
+
+static int nfs4_begin_drain_session(struct nfs_client *clp)
+{
+	struct nfs4_session *ses = clp->cl_session;
+	int ret = 0;
+
+	if (clp->cl_slot_tbl)
+		return nfs4_drain_slot_tbl(clp->cl_slot_tbl);
+
+	/* back channel */
+	ret = nfs4_drain_slot_tbl(&ses->bc_slot_table);
+	if (ret)
+		return ret;
+	/* fore channel */
+	return nfs4_drain_slot_tbl(&ses->fc_slot_table);
+}
+
 static int nfs41_setup_state_renewal(struct nfs_client *clp)
 {
 	int status;
@@ -228,60 +294,6 @@
 	return status;
 }
 
-static void nfs4_end_drain_slot_table(struct nfs4_slot_table *tbl)
-{
-	if (test_and_clear_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) {
-		spin_lock(&tbl->slot_tbl_lock);
-		nfs41_wake_slot_table(tbl);
-		spin_unlock(&tbl->slot_tbl_lock);
-	}
-}
-
-static void nfs4_end_drain_session(struct nfs_client *clp)
-{
-	struct nfs4_session *ses = clp->cl_session;
-
-	if (ses != NULL) {
-		nfs4_end_drain_slot_table(&ses->bc_slot_table);
-		nfs4_end_drain_slot_table(&ses->fc_slot_table);
-	}
-}
-
-/*
- * Signal state manager thread if session fore channel is drained
- */
-void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl)
-{
-	if (nfs4_slot_tbl_draining(tbl))
-		complete(&tbl->complete);
-}
-
-static int nfs4_drain_slot_tbl(struct nfs4_slot_table *tbl)
-{
-	set_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state);
-	spin_lock(&tbl->slot_tbl_lock);
-	if (tbl->highest_used_slotid != NFS4_NO_SLOT) {
-		INIT_COMPLETION(tbl->complete);
-		spin_unlock(&tbl->slot_tbl_lock);
-		return wait_for_completion_interruptible(&tbl->complete);
-	}
-	spin_unlock(&tbl->slot_tbl_lock);
-	return 0;
-}
-
-static int nfs4_begin_drain_session(struct nfs_client *clp)
-{
-	struct nfs4_session *ses = clp->cl_session;
-	int ret = 0;
-
-	/* back channel */
-	ret = nfs4_drain_slot_tbl(&ses->bc_slot_table);
-	if (ret)
-		return ret;
-	/* fore channel */
-	return nfs4_drain_slot_tbl(&ses->fc_slot_table);
-}
-
 static void nfs41_finish_session_reset(struct nfs_client *clp)
 {
 	clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
@@ -339,62 +351,21 @@
 	return nfs41_walk_client_list(clp, result, cred);
 }
 
-struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp)
-{
-	struct rpc_cred *cred;
-
-	spin_lock(&clp->cl_lock);
-	cred = nfs4_get_machine_cred_locked(clp);
-	spin_unlock(&clp->cl_lock);
-	return cred;
-}
-
 #endif /* CONFIG_NFS_V4_1 */
 
-static struct rpc_cred *
-nfs4_get_setclientid_cred_server(struct nfs_server *server)
-{
-	struct nfs_client *clp = server->nfs_client;
-	struct rpc_cred *cred = NULL;
-	struct nfs4_state_owner *sp;
-	struct rb_node *pos;
-
-	spin_lock(&clp->cl_lock);
-	pos = rb_first(&server->state_owners);
-	if (pos != NULL) {
-		sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
-		cred = get_rpccred(sp->so_cred);
-	}
-	spin_unlock(&clp->cl_lock);
-	return cred;
-}
-
 /**
- * nfs4_get_setclientid_cred - Acquire credential for a setclientid operation
+ * nfs4_get_clid_cred - Acquire credential for a setclientid operation
  * @clp: client state handle
  *
  * Returns an rpc_cred with reference count bumped, or NULL.
  */
-struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
+struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp)
 {
-	struct nfs_server *server;
 	struct rpc_cred *cred;
 
 	spin_lock(&clp->cl_lock);
 	cred = nfs4_get_machine_cred_locked(clp);
 	spin_unlock(&clp->cl_lock);
-	if (cred != NULL)
-		goto out;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
-		cred = nfs4_get_setclientid_cred_server(server);
-		if (cred != NULL)
-			break;
-	}
-	rcu_read_unlock();
-
-out:
 	return cred;
 }
 
@@ -998,7 +969,9 @@
 	fl_pid = lockowner->l_pid;
 	spin_lock(&state->state_lock);
 	lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE);
-	if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) {
+	if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags))
+		ret = -EIO;
+	else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) {
 		nfs4_stateid_copy(dst, &lsp->ls_stateid);
 		ret = 0;
 		smp_rmb();
@@ -1038,11 +1011,17 @@
 int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state,
 		fmode_t fmode, const struct nfs_lockowner *lockowner)
 {
-	int ret = 0;
+	int ret = nfs4_copy_lock_stateid(dst, state, lockowner);
+	if (ret == -EIO)
+		/* A lost lock - don't even consider delegations */
+		goto out;
 	if (nfs4_copy_delegation_stateid(dst, state->inode, fmode))
 		goto out;
-	ret = nfs4_copy_lock_stateid(dst, state, lockowner);
 	if (ret != -ENOENT)
+		/* nfs4_copy_delegation_stateid() didn't over-write
+		 * dst, so it still has the lock stateid which we now
+		 * choose to use.
+		 */
 		goto out;
 	ret = nfs4_copy_open_stateid(dst, state);
 out:
@@ -1443,14 +1422,16 @@
 		if (status >= 0) {
 			status = nfs4_reclaim_locks(state, ops);
 			if (status >= 0) {
-				spin_lock(&state->state_lock);
-				list_for_each_entry(lock, &state->lock_states, ls_locks) {
-					if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags))
-						pr_warn_ratelimited("NFS: "
-							"%s: Lock reclaim "
-							"failed!\n", __func__);
+				if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) {
+					spin_lock(&state->state_lock);
+					list_for_each_entry(lock, &state->lock_states, ls_locks) {
+						if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags))
+							pr_warn_ratelimited("NFS: "
+									    "%s: Lock reclaim "
+									    "failed!\n", __func__);
+					}
+					spin_unlock(&state->state_lock);
 				}
-				spin_unlock(&state->state_lock);
 				nfs4_put_open_state(state);
 				spin_lock(&sp->so_lock);
 				goto restart;
@@ -1618,7 +1599,7 @@
 	if (!nfs4_state_clear_reclaim_reboot(clp))
 		return;
 	ops = clp->cl_mvops->reboot_recovery_ops;
-	cred = ops->get_clid_cred(clp);
+	cred = nfs4_get_clid_cred(clp);
 	nfs4_reclaim_complete(clp, ops, cred);
 	put_rpccred(cred);
 }
@@ -1732,7 +1713,7 @@
 	cred = ops->get_state_renewal_cred_locked(clp);
 	spin_unlock(&clp->cl_lock);
 	if (cred == NULL) {
-		cred = nfs4_get_setclientid_cred(clp);
+		cred = nfs4_get_clid_cred(clp);
 		status = -ENOKEY;
 		if (cred == NULL)
 			goto out;
@@ -1804,7 +1785,7 @@
 		clp->cl_mvops->reboot_recovery_ops;
 	int status;
 
-	cred = ops->get_clid_cred(clp);
+	cred = nfs4_get_clid_cred(clp);
 	if (cred == NULL)
 		return -ENOENT;
 	status = ops->establish_clid(clp, cred);
@@ -1878,7 +1859,7 @@
 	mutex_lock(&nfs_clid_init_mutex);
 again:
 	status  = -ENOENT;
-	cred = ops->get_clid_cred(clp);
+	cred = nfs4_get_clid_cred(clp);
 	if (cred == NULL)
 		goto out_unlock;
 
@@ -1896,7 +1877,11 @@
 			__func__, status);
 		goto again;
 	case -EACCES:
-		if (i++)
+		if (i++ == 0) {
+			nfs4_root_machine_cred(clp);
+			goto again;
+		}
+		if (i > 2)
 			break;
 	case -NFS4ERR_CLID_INUSE:
 	case -NFS4ERR_WRONGSEC:
@@ -2052,7 +2037,7 @@
 	if (!nfs4_has_session(clp))
 		return 0;
 	nfs4_begin_drain_session(clp);
-	cred = nfs4_get_exchange_id_cred(clp);
+	cred = nfs4_get_clid_cred(clp);
 	status = nfs4_proc_destroy_session(clp->cl_session, cred);
 	switch (status) {
 	case 0:
@@ -2095,7 +2080,7 @@
 	if (!nfs4_has_session(clp))
 		return 0;
 	nfs4_begin_drain_session(clp);
-	cred = nfs4_get_exchange_id_cred(clp);
+	cred = nfs4_get_clid_cred(clp);
 	ret = nfs4_proc_bind_conn_to_session(clp, cred);
 	if (cred)
 		put_rpccred(cred);
@@ -2116,7 +2101,6 @@
 }
 #else /* CONFIG_NFS_V4_1 */
 static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
-static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
 
 static int nfs4_bind_conn_to_session(struct nfs_client *clp)
 {
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index 5dbe2d2..e26acdd 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -253,8 +253,6 @@
 
 	dfprintk(MOUNT, "--> nfs4_try_mount()\n");
 
-	if (data->auth_flavors[0] == RPC_AUTH_MAXFLAVOR)
-		data->auth_flavors[0] = RPC_AUTH_UNIX;
 	export_path = data->nfs_server.export_path;
 	data->nfs_server.export_path = "/";
 	root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info,
diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c
new file mode 100644
index 0000000..d774335
--- /dev/null
+++ b/fs/nfs/nfs4trace.c
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2013 Trond Myklebust <Trond.Myklebust@netapp.com>
+ */
+#include <linux/nfs_fs.h>
+#include "nfs4_fs.h"
+#include "internal.h"
+#include "nfs4session.h"
+#include "callback.h"
+
+#define CREATE_TRACE_POINTS
+#include "nfs4trace.h"
+
+#ifdef CONFIG_NFS_V4_1
+EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_read);
+EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_write);
+EXPORT_TRACEPOINT_SYMBOL_GPL(nfs4_pnfs_commit_ds);
+#endif
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
new file mode 100644
index 0000000..849cf14
--- /dev/null
+++ b/fs/nfs/nfs4trace.h
@@ -0,0 +1,1148 @@
+/*
+ * Copyright (c) 2013 Trond Myklebust <Trond.Myklebust@netapp.com>
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM nfs4
+
+#if !defined(_TRACE_NFS4_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_NFS4_H
+
+#include <linux/tracepoint.h>
+
+#define show_nfsv4_errors(error) \
+	__print_symbolic(error, \
+		{ NFS4_OK, "OK" }, \
+		/* Mapped by nfs4_stat_to_errno() */ \
+		{ -EPERM, "EPERM" }, \
+		{ -ENOENT, "ENOENT" }, \
+		{ -EIO, "EIO" }, \
+		{ -ENXIO, "ENXIO" }, \
+		{ -EACCES, "EACCES" }, \
+		{ -EEXIST, "EEXIST" }, \
+		{ -EXDEV, "EXDEV" }, \
+		{ -ENOTDIR, "ENOTDIR" }, \
+		{ -EISDIR, "EISDIR" }, \
+		{ -EFBIG, "EFBIG" }, \
+		{ -ENOSPC, "ENOSPC" }, \
+		{ -EROFS, "EROFS" }, \
+		{ -EMLINK, "EMLINK" }, \
+		{ -ENAMETOOLONG, "ENAMETOOLONG" }, \
+		{ -ENOTEMPTY, "ENOTEMPTY" }, \
+		{ -EDQUOT, "EDQUOT" }, \
+		{ -ESTALE, "ESTALE" }, \
+		{ -EBADHANDLE, "EBADHANDLE" }, \
+		{ -EBADCOOKIE, "EBADCOOKIE" }, \
+		{ -ENOTSUPP, "ENOTSUPP" }, \
+		{ -ETOOSMALL, "ETOOSMALL" }, \
+		{ -EREMOTEIO, "EREMOTEIO" }, \
+		{ -EBADTYPE, "EBADTYPE" }, \
+		{ -EAGAIN, "EAGAIN" }, \
+		{ -ELOOP, "ELOOP" }, \
+		{ -EOPNOTSUPP, "EOPNOTSUPP" }, \
+		{ -EDEADLK, "EDEADLK" }, \
+		/* RPC errors */ \
+		{ -ENOMEM, "ENOMEM" }, \
+		{ -EKEYEXPIRED, "EKEYEXPIRED" }, \
+		{ -ETIMEDOUT, "ETIMEDOUT" }, \
+		{ -ERESTARTSYS, "ERESTARTSYS" }, \
+		{ -ECONNREFUSED, "ECONNREFUSED" }, \
+		{ -ECONNRESET, "ECONNRESET" }, \
+		{ -ENETUNREACH, "ENETUNREACH" }, \
+		{ -EHOSTUNREACH, "EHOSTUNREACH" }, \
+		{ -EHOSTDOWN, "EHOSTDOWN" }, \
+		{ -EPIPE, "EPIPE" }, \
+		{ -EPFNOSUPPORT, "EPFNOSUPPORT" }, \
+		{ -EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \
+		/* NFSv4 native errors */ \
+		{ -NFS4ERR_ACCESS, "ACCESS" }, \
+		{ -NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \
+		{ -NFS4ERR_ADMIN_REVOKED, "ADMIN_REVOKED" }, \
+		{ -NFS4ERR_BACK_CHAN_BUSY, "BACK_CHAN_BUSY" }, \
+		{ -NFS4ERR_BADCHAR, "BADCHAR" }, \
+		{ -NFS4ERR_BADHANDLE, "BADHANDLE" }, \
+		{ -NFS4ERR_BADIOMODE, "BADIOMODE" }, \
+		{ -NFS4ERR_BADLAYOUT, "BADLAYOUT" }, \
+		{ -NFS4ERR_BADLABEL, "BADLABEL" }, \
+		{ -NFS4ERR_BADNAME, "BADNAME" }, \
+		{ -NFS4ERR_BADOWNER, "BADOWNER" }, \
+		{ -NFS4ERR_BADSESSION, "BADSESSION" }, \
+		{ -NFS4ERR_BADSLOT, "BADSLOT" }, \
+		{ -NFS4ERR_BADTYPE, "BADTYPE" }, \
+		{ -NFS4ERR_BADXDR, "BADXDR" }, \
+		{ -NFS4ERR_BAD_COOKIE, "BAD_COOKIE" }, \
+		{ -NFS4ERR_BAD_HIGH_SLOT, "BAD_HIGH_SLOT" }, \
+		{ -NFS4ERR_BAD_RANGE, "BAD_RANGE" }, \
+		{ -NFS4ERR_BAD_SEQID, "BAD_SEQID" }, \
+		{ -NFS4ERR_BAD_SESSION_DIGEST, "BAD_SESSION_DIGEST" }, \
+		{ -NFS4ERR_BAD_STATEID, "BAD_STATEID" }, \
+		{ -NFS4ERR_CB_PATH_DOWN, "CB_PATH_DOWN" }, \
+		{ -NFS4ERR_CLID_INUSE, "CLID_INUSE" }, \
+		{ -NFS4ERR_CLIENTID_BUSY, "CLIENTID_BUSY" }, \
+		{ -NFS4ERR_COMPLETE_ALREADY, "COMPLETE_ALREADY" }, \
+		{ -NFS4ERR_CONN_NOT_BOUND_TO_SESSION, \
+			"CONN_NOT_BOUND_TO_SESSION" }, \
+		{ -NFS4ERR_DEADLOCK, "DEADLOCK" }, \
+		{ -NFS4ERR_DEADSESSION, "DEAD_SESSION" }, \
+		{ -NFS4ERR_DELAY, "DELAY" }, \
+		{ -NFS4ERR_DELEG_ALREADY_WANTED, \
+			"DELEG_ALREADY_WANTED" }, \
+		{ -NFS4ERR_DELEG_REVOKED, "DELEG_REVOKED" }, \
+		{ -NFS4ERR_DENIED, "DENIED" }, \
+		{ -NFS4ERR_DIRDELEG_UNAVAIL, "DIRDELEG_UNAVAIL" }, \
+		{ -NFS4ERR_DQUOT, "DQUOT" }, \
+		{ -NFS4ERR_ENCR_ALG_UNSUPP, "ENCR_ALG_UNSUPP" }, \
+		{ -NFS4ERR_EXIST, "EXIST" }, \
+		{ -NFS4ERR_EXPIRED, "EXPIRED" }, \
+		{ -NFS4ERR_FBIG, "FBIG" }, \
+		{ -NFS4ERR_FHEXPIRED, "FHEXPIRED" }, \
+		{ -NFS4ERR_FILE_OPEN, "FILE_OPEN" }, \
+		{ -NFS4ERR_GRACE, "GRACE" }, \
+		{ -NFS4ERR_HASH_ALG_UNSUPP, "HASH_ALG_UNSUPP" }, \
+		{ -NFS4ERR_INVAL, "INVAL" }, \
+		{ -NFS4ERR_IO, "IO" }, \
+		{ -NFS4ERR_ISDIR, "ISDIR" }, \
+		{ -NFS4ERR_LAYOUTTRYLATER, "LAYOUTTRYLATER" }, \
+		{ -NFS4ERR_LAYOUTUNAVAILABLE, "LAYOUTUNAVAILABLE" }, \
+		{ -NFS4ERR_LEASE_MOVED, "LEASE_MOVED" }, \
+		{ -NFS4ERR_LOCKED, "LOCKED" }, \
+		{ -NFS4ERR_LOCKS_HELD, "LOCKS_HELD" }, \
+		{ -NFS4ERR_LOCK_RANGE, "LOCK_RANGE" }, \
+		{ -NFS4ERR_MINOR_VERS_MISMATCH, "MINOR_VERS_MISMATCH" }, \
+		{ -NFS4ERR_MLINK, "MLINK" }, \
+		{ -NFS4ERR_MOVED, "MOVED" }, \
+		{ -NFS4ERR_NAMETOOLONG, "NAMETOOLONG" }, \
+		{ -NFS4ERR_NOENT, "NOENT" }, \
+		{ -NFS4ERR_NOFILEHANDLE, "NOFILEHANDLE" }, \
+		{ -NFS4ERR_NOMATCHING_LAYOUT, "NOMATCHING_LAYOUT" }, \
+		{ -NFS4ERR_NOSPC, "NOSPC" }, \
+		{ -NFS4ERR_NOTDIR, "NOTDIR" }, \
+		{ -NFS4ERR_NOTEMPTY, "NOTEMPTY" }, \
+		{ -NFS4ERR_NOTSUPP, "NOTSUPP" }, \
+		{ -NFS4ERR_NOT_ONLY_OP, "NOT_ONLY_OP" }, \
+		{ -NFS4ERR_NOT_SAME, "NOT_SAME" }, \
+		{ -NFS4ERR_NO_GRACE, "NO_GRACE" }, \
+		{ -NFS4ERR_NXIO, "NXIO" }, \
+		{ -NFS4ERR_OLD_STATEID, "OLD_STATEID" }, \
+		{ -NFS4ERR_OPENMODE, "OPENMODE" }, \
+		{ -NFS4ERR_OP_ILLEGAL, "OP_ILLEGAL" }, \
+		{ -NFS4ERR_OP_NOT_IN_SESSION, "OP_NOT_IN_SESSION" }, \
+		{ -NFS4ERR_PERM, "PERM" }, \
+		{ -NFS4ERR_PNFS_IO_HOLE, "PNFS_IO_HOLE" }, \
+		{ -NFS4ERR_PNFS_NO_LAYOUT, "PNFS_NO_LAYOUT" }, \
+		{ -NFS4ERR_RECALLCONFLICT, "RECALLCONFLICT" }, \
+		{ -NFS4ERR_RECLAIM_BAD, "RECLAIM_BAD" }, \
+		{ -NFS4ERR_RECLAIM_CONFLICT, "RECLAIM_CONFLICT" }, \
+		{ -NFS4ERR_REJECT_DELEG, "REJECT_DELEG" }, \
+		{ -NFS4ERR_REP_TOO_BIG, "REP_TOO_BIG" }, \
+		{ -NFS4ERR_REP_TOO_BIG_TO_CACHE, \
+			"REP_TOO_BIG_TO_CACHE" }, \
+		{ -NFS4ERR_REQ_TOO_BIG, "REQ_TOO_BIG" }, \
+		{ -NFS4ERR_RESOURCE, "RESOURCE" }, \
+		{ -NFS4ERR_RESTOREFH, "RESTOREFH" }, \
+		{ -NFS4ERR_RETRY_UNCACHED_REP, "RETRY_UNCACHED_REP" }, \
+		{ -NFS4ERR_RETURNCONFLICT, "RETURNCONFLICT" }, \
+		{ -NFS4ERR_ROFS, "ROFS" }, \
+		{ -NFS4ERR_SAME, "SAME" }, \
+		{ -NFS4ERR_SHARE_DENIED, "SHARE_DENIED" }, \
+		{ -NFS4ERR_SEQUENCE_POS, "SEQUENCE_POS" }, \
+		{ -NFS4ERR_SEQ_FALSE_RETRY, "SEQ_FALSE_RETRY" }, \
+		{ -NFS4ERR_SEQ_MISORDERED, "SEQ_MISORDERED" }, \
+		{ -NFS4ERR_SERVERFAULT, "SERVERFAULT" }, \
+		{ -NFS4ERR_STALE, "STALE" }, \
+		{ -NFS4ERR_STALE_CLIENTID, "STALE_CLIENTID" }, \
+		{ -NFS4ERR_STALE_STATEID, "STALE_STATEID" }, \
+		{ -NFS4ERR_SYMLINK, "SYMLINK" }, \
+		{ -NFS4ERR_TOOSMALL, "TOOSMALL" }, \
+		{ -NFS4ERR_TOO_MANY_OPS, "TOO_MANY_OPS" }, \
+		{ -NFS4ERR_UNKNOWN_LAYOUTTYPE, "UNKNOWN_LAYOUTTYPE" }, \
+		{ -NFS4ERR_UNSAFE_COMPOUND, "UNSAFE_COMPOUND" }, \
+		{ -NFS4ERR_WRONGSEC, "WRONGSEC" }, \
+		{ -NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \
+		{ -NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \
+		{ -NFS4ERR_XDEV, "XDEV" })
+
+#define show_open_flags(flags) \
+	__print_flags(flags, "|", \
+		{ O_CREAT, "O_CREAT" }, \
+		{ O_EXCL, "O_EXCL" }, \
+		{ O_TRUNC, "O_TRUNC" }, \
+		{ O_DIRECT, "O_DIRECT" })
+
+#define show_fmode_flags(mode) \
+	__print_flags(mode, "|", \
+		{ ((__force unsigned long)FMODE_READ), "READ" }, \
+		{ ((__force unsigned long)FMODE_WRITE), "WRITE" }, \
+		{ ((__force unsigned long)FMODE_EXEC), "EXEC" })
+
+#define show_nfs_fattr_flags(valid) \
+	__print_flags((unsigned long)valid, "|", \
+		{ NFS_ATTR_FATTR_TYPE, "TYPE" }, \
+		{ NFS_ATTR_FATTR_MODE, "MODE" }, \
+		{ NFS_ATTR_FATTR_NLINK, "NLINK" }, \
+		{ NFS_ATTR_FATTR_OWNER, "OWNER" }, \
+		{ NFS_ATTR_FATTR_GROUP, "GROUP" }, \
+		{ NFS_ATTR_FATTR_RDEV, "RDEV" }, \
+		{ NFS_ATTR_FATTR_SIZE, "SIZE" }, \
+		{ NFS_ATTR_FATTR_FSID, "FSID" }, \
+		{ NFS_ATTR_FATTR_FILEID, "FILEID" }, \
+		{ NFS_ATTR_FATTR_ATIME, "ATIME" }, \
+		{ NFS_ATTR_FATTR_MTIME, "MTIME" }, \
+		{ NFS_ATTR_FATTR_CTIME, "CTIME" }, \
+		{ NFS_ATTR_FATTR_CHANGE, "CHANGE" }, \
+		{ NFS_ATTR_FATTR_OWNER_NAME, "OWNER_NAME" }, \
+		{ NFS_ATTR_FATTR_GROUP_NAME, "GROUP_NAME" })
+
+DECLARE_EVENT_CLASS(nfs4_clientid_event,
+		TP_PROTO(
+			const struct nfs_client *clp,
+			int error
+		),
+
+		TP_ARGS(clp, error),
+
+		TP_STRUCT__entry(
+			__string(dstaddr,
+				rpc_peeraddr2str(clp->cl_rpcclient,
+					RPC_DISPLAY_ADDR))
+			__field(int, error)
+		),
+
+		TP_fast_assign(
+			__entry->error = error;
+			__assign_str(dstaddr,
+				rpc_peeraddr2str(clp->cl_rpcclient,
+						RPC_DISPLAY_ADDR));
+		),
+
+		TP_printk(
+			"error=%d (%s) dstaddr=%s",
+			__entry->error,
+			show_nfsv4_errors(__entry->error),
+			__get_str(dstaddr)
+		)
+);
+#define DEFINE_NFS4_CLIENTID_EVENT(name) \
+	DEFINE_EVENT(nfs4_clientid_event, name,	 \
+			TP_PROTO( \
+				const struct nfs_client *clp, \
+				int error \
+			), \
+			TP_ARGS(clp, error))
+DEFINE_NFS4_CLIENTID_EVENT(nfs4_setclientid);
+DEFINE_NFS4_CLIENTID_EVENT(nfs4_setclientid_confirm);
+DEFINE_NFS4_CLIENTID_EVENT(nfs4_renew);
+DEFINE_NFS4_CLIENTID_EVENT(nfs4_renew_async);
+#ifdef CONFIG_NFS_V4_1
+DEFINE_NFS4_CLIENTID_EVENT(nfs4_exchange_id);
+DEFINE_NFS4_CLIENTID_EVENT(nfs4_create_session);
+DEFINE_NFS4_CLIENTID_EVENT(nfs4_destroy_session);
+DEFINE_NFS4_CLIENTID_EVENT(nfs4_destroy_clientid);
+DEFINE_NFS4_CLIENTID_EVENT(nfs4_bind_conn_to_session);
+DEFINE_NFS4_CLIENTID_EVENT(nfs4_sequence);
+DEFINE_NFS4_CLIENTID_EVENT(nfs4_reclaim_complete);
+
+TRACE_EVENT(nfs4_setup_sequence,
+		TP_PROTO(
+			const struct nfs4_session *session,
+			const struct nfs4_sequence_args *args
+		),
+		TP_ARGS(session, args),
+
+		TP_STRUCT__entry(
+			__field(unsigned int, session)
+			__field(unsigned int, slot_nr)
+			__field(unsigned int, seq_nr)
+			__field(unsigned int, highest_used_slotid)
+		),
+
+		TP_fast_assign(
+			const struct nfs4_slot *sa_slot = args->sa_slot;
+			__entry->session = nfs_session_id_hash(&session->sess_id);
+			__entry->slot_nr = sa_slot->slot_nr;
+			__entry->seq_nr = sa_slot->seq_nr;
+			__entry->highest_used_slotid =
+					sa_slot->table->highest_used_slotid;
+		),
+		TP_printk(
+			"session=0x%08x slot_nr=%u seq_nr=%u "
+			"highest_used_slotid=%u",
+			__entry->session,
+			__entry->slot_nr,
+			__entry->seq_nr,
+			__entry->highest_used_slotid
+		)
+);
+
+#define show_nfs4_sequence_status_flags(status) \
+	__print_flags((unsigned long)status, "|", \
+		{ SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \
+		{ SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING, \
+			"CB_GSS_CONTEXTS_EXPIRING" }, \
+		{ SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED, \
+			"CB_GSS_CONTEXTS_EXPIRED" }, \
+		{ SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED, \
+			"EXPIRED_ALL_STATE_REVOKED" }, \
+		{ SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED, \
+			"EXPIRED_SOME_STATE_REVOKED" }, \
+		{ SEQ4_STATUS_ADMIN_STATE_REVOKED, \
+			"ADMIN_STATE_REVOKED" }, \
+		{ SEQ4_STATUS_RECALLABLE_STATE_REVOKED,	 \
+			"RECALLABLE_STATE_REVOKED" }, \
+		{ SEQ4_STATUS_LEASE_MOVED, "LEASE_MOVED" }, \
+		{ SEQ4_STATUS_RESTART_RECLAIM_NEEDED, \
+			"RESTART_RECLAIM_NEEDED" }, \
+		{ SEQ4_STATUS_CB_PATH_DOWN_SESSION, \
+			"CB_PATH_DOWN_SESSION" }, \
+		{ SEQ4_STATUS_BACKCHANNEL_FAULT, \
+			"BACKCHANNEL_FAULT" })
+
+TRACE_EVENT(nfs4_sequence_done,
+		TP_PROTO(
+			const struct nfs4_session *session,
+			const struct nfs4_sequence_res *res
+		),
+		TP_ARGS(session, res),
+
+		TP_STRUCT__entry(
+			__field(unsigned int, session)
+			__field(unsigned int, slot_nr)
+			__field(unsigned int, seq_nr)
+			__field(unsigned int, highest_slotid)
+			__field(unsigned int, target_highest_slotid)
+			__field(unsigned int, status_flags)
+			__field(int, error)
+		),
+
+		TP_fast_assign(
+			const struct nfs4_slot *sr_slot = res->sr_slot;
+			__entry->session = nfs_session_id_hash(&session->sess_id);
+			__entry->slot_nr = sr_slot->slot_nr;
+			__entry->seq_nr = sr_slot->seq_nr;
+			__entry->highest_slotid = res->sr_highest_slotid;
+			__entry->target_highest_slotid =
+					res->sr_target_highest_slotid;
+			__entry->error = res->sr_status;
+		),
+		TP_printk(
+			"error=%d (%s) session=0x%08x slot_nr=%u seq_nr=%u "
+			"highest_slotid=%u target_highest_slotid=%u "
+			"status_flags=%u (%s)",
+			__entry->error,
+			show_nfsv4_errors(__entry->error),
+			__entry->session,
+			__entry->slot_nr,
+			__entry->seq_nr,
+			__entry->highest_slotid,
+			__entry->target_highest_slotid,
+			__entry->status_flags,
+			show_nfs4_sequence_status_flags(__entry->status_flags)
+		)
+);
+
+struct cb_sequenceargs;
+struct cb_sequenceres;
+
+TRACE_EVENT(nfs4_cb_sequence,
+		TP_PROTO(
+			const struct cb_sequenceargs *args,
+			const struct cb_sequenceres *res,
+			__be32 status
+		),
+		TP_ARGS(args, res, status),
+
+		TP_STRUCT__entry(
+			__field(unsigned int, session)
+			__field(unsigned int, slot_nr)
+			__field(unsigned int, seq_nr)
+			__field(unsigned int, highest_slotid)
+			__field(unsigned int, cachethis)
+			__field(int, error)
+		),
+
+		TP_fast_assign(
+			__entry->session = nfs_session_id_hash(&args->csa_sessionid);
+			__entry->slot_nr = args->csa_slotid;
+			__entry->seq_nr = args->csa_sequenceid;
+			__entry->highest_slotid = args->csa_highestslotid;
+			__entry->cachethis = args->csa_cachethis;
+			__entry->error = -be32_to_cpu(status);
+		),
+
+		TP_printk(
+			"error=%d (%s) session=0x%08x slot_nr=%u seq_nr=%u "
+			"highest_slotid=%u",
+			__entry->error,
+			show_nfsv4_errors(__entry->error),
+			__entry->session,
+			__entry->slot_nr,
+			__entry->seq_nr,
+			__entry->highest_slotid
+		)
+);
+#endif /* CONFIG_NFS_V4_1 */
+
+DECLARE_EVENT_CLASS(nfs4_open_event,
+		TP_PROTO(
+			const struct nfs_open_context *ctx,
+			int flags,
+			int error
+		),
+
+		TP_ARGS(ctx, flags, error),
+
+		TP_STRUCT__entry(
+			__field(int, error)
+			__field(unsigned int, flags)
+			__field(unsigned int, fmode)
+			__field(dev_t, dev)
+			__field(u32, fhandle)
+			__field(u64, fileid)
+			__field(u64, dir)
+			__string(name, ctx->dentry->d_name.name)
+		),
+
+		TP_fast_assign(
+			const struct nfs4_state *state = ctx->state;
+			const struct inode *inode = NULL;
+
+			__entry->error = error;
+			__entry->flags = flags;
+			__entry->fmode = (__force unsigned int)ctx->mode;
+			__entry->dev = ctx->dentry->d_sb->s_dev;
+			if (!IS_ERR(state))
+				inode = state->inode;
+			if (inode != NULL) {
+				__entry->fileid = NFS_FILEID(inode);
+				__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+			} else {
+				__entry->fileid = 0;
+				__entry->fhandle = 0;
+			}
+			__entry->dir = NFS_FILEID(ctx->dentry->d_parent->d_inode);
+			__assign_str(name, ctx->dentry->d_name.name);
+		),
+
+		TP_printk(
+			"error=%d (%s) flags=%d (%s) fmode=%s "
+			"fileid=%02x:%02x:%llu fhandle=0x%08x "
+			"name=%02x:%02x:%llu/%s",
+			 __entry->error,
+			 show_nfsv4_errors(__entry->error),
+			 __entry->flags,
+			 show_open_flags(__entry->flags),
+			 show_fmode_flags(__entry->fmode),
+			 MAJOR(__entry->dev), MINOR(__entry->dev),
+			 (unsigned long long)__entry->fileid,
+			 __entry->fhandle,
+			 MAJOR(__entry->dev), MINOR(__entry->dev),
+			 (unsigned long long)__entry->dir,
+			 __get_str(name)
+		)
+);
+
+#define DEFINE_NFS4_OPEN_EVENT(name) \
+	DEFINE_EVENT(nfs4_open_event, name, \
+			TP_PROTO( \
+				const struct nfs_open_context *ctx, \
+				int flags, \
+				int error \
+			), \
+			TP_ARGS(ctx, flags, error))
+DEFINE_NFS4_OPEN_EVENT(nfs4_open_reclaim);
+DEFINE_NFS4_OPEN_EVENT(nfs4_open_expired);
+DEFINE_NFS4_OPEN_EVENT(nfs4_open_file);
+
+TRACE_EVENT(nfs4_close,
+		TP_PROTO(
+			const struct nfs4_state *state,
+			const struct nfs_closeargs *args,
+			const struct nfs_closeres *res,
+			int error
+		),
+
+		TP_ARGS(state, args, res, error),
+
+		TP_STRUCT__entry(
+			__field(dev_t, dev)
+			__field(u32, fhandle)
+			__field(u64, fileid)
+			__field(unsigned int, fmode)
+			__field(int, error)
+		),
+
+		TP_fast_assign(
+			const struct inode *inode = state->inode;
+
+			__entry->dev = inode->i_sb->s_dev;
+			__entry->fileid = NFS_FILEID(inode);
+			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+			__entry->fmode = (__force unsigned int)state->state;
+			__entry->error = error;
+		),
+
+		TP_printk(
+			"error=%d (%s) fmode=%s fileid=%02x:%02x:%llu "
+			"fhandle=0x%08x",
+			__entry->error,
+			show_nfsv4_errors(__entry->error),
+			__entry->fmode ?  show_fmode_flags(__entry->fmode) :
+					  "closed",
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->fileid,
+			__entry->fhandle
+		)
+);
+
+#define show_lock_cmd(type) \
+	__print_symbolic((int)type, \
+		{ F_GETLK, "GETLK" }, \
+		{ F_SETLK, "SETLK" }, \
+		{ F_SETLKW, "SETLKW" })
+#define show_lock_type(type) \
+	__print_symbolic((int)type, \
+		{ F_RDLCK, "RDLCK" }, \
+		{ F_WRLCK, "WRLCK" }, \
+		{ F_UNLCK, "UNLCK" })
+
+DECLARE_EVENT_CLASS(nfs4_lock_event,
+		TP_PROTO(
+			const struct file_lock *request,
+			const struct nfs4_state *state,
+			int cmd,
+			int error
+		),
+
+		TP_ARGS(request, state, cmd, error),
+
+		TP_STRUCT__entry(
+			__field(int, error)
+			__field(int, cmd)
+			__field(char, type)
+			__field(loff_t, start)
+			__field(loff_t, end)
+			__field(dev_t, dev)
+			__field(u32, fhandle)
+			__field(u64, fileid)
+		),
+
+		TP_fast_assign(
+			const struct inode *inode = state->inode;
+
+			__entry->error = error;
+			__entry->cmd = cmd;
+			__entry->type = request->fl_type;
+			__entry->start = request->fl_start;
+			__entry->end = request->fl_end;
+			__entry->dev = inode->i_sb->s_dev;
+			__entry->fileid = NFS_FILEID(inode);
+			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+		),
+
+		TP_printk(
+			"error=%d (%s) cmd=%s:%s range=%lld:%lld "
+			"fileid=%02x:%02x:%llu fhandle=0x%08x",
+			__entry->error,
+			show_nfsv4_errors(__entry->error),
+			show_lock_cmd(__entry->cmd),
+			show_lock_type(__entry->type),
+			(long long)__entry->start,
+			(long long)__entry->end,
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->fileid,
+			__entry->fhandle
+		)
+);
+
+#define DEFINE_NFS4_LOCK_EVENT(name) \
+	DEFINE_EVENT(nfs4_lock_event, name, \
+			TP_PROTO( \
+				const struct file_lock *request, \
+				const struct nfs4_state *state, \
+				int cmd, \
+				int error \
+			), \
+			TP_ARGS(request, state, cmd, error))
+DEFINE_NFS4_LOCK_EVENT(nfs4_get_lock);
+DEFINE_NFS4_LOCK_EVENT(nfs4_set_lock);
+DEFINE_NFS4_LOCK_EVENT(nfs4_lock_reclaim);
+DEFINE_NFS4_LOCK_EVENT(nfs4_lock_expired);
+DEFINE_NFS4_LOCK_EVENT(nfs4_unlock);
+
+DECLARE_EVENT_CLASS(nfs4_set_delegation_event,
+		TP_PROTO(
+			const struct inode *inode,
+			fmode_t fmode
+		),
+
+		TP_ARGS(inode, fmode),
+
+		TP_STRUCT__entry(
+			__field(dev_t, dev)
+			__field(u32, fhandle)
+			__field(u64, fileid)
+			__field(unsigned int, fmode)
+		),
+
+		TP_fast_assign(
+			__entry->dev = inode->i_sb->s_dev;
+			__entry->fileid = NFS_FILEID(inode);
+			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+			__entry->fmode = (__force unsigned int)fmode;
+		),
+
+		TP_printk(
+			"fmode=%s fileid=%02x:%02x:%llu fhandle=0x%08x",
+			show_fmode_flags(__entry->fmode),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->fileid,
+			__entry->fhandle
+		)
+);
+#define DEFINE_NFS4_SET_DELEGATION_EVENT(name) \
+	DEFINE_EVENT(nfs4_set_delegation_event, name, \
+			TP_PROTO( \
+				const struct inode *inode, \
+				fmode_t fmode \
+			), \
+			TP_ARGS(inode, fmode))
+DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_set_delegation);
+DEFINE_NFS4_SET_DELEGATION_EVENT(nfs4_reclaim_delegation);
+
+TRACE_EVENT(nfs4_delegreturn_exit,
+		TP_PROTO(
+			const struct nfs4_delegreturnargs *args,
+			const struct nfs4_delegreturnres *res,
+			int error
+		),
+
+		TP_ARGS(args, res, error),
+
+		TP_STRUCT__entry(
+			__field(dev_t, dev)
+			__field(u32, fhandle)
+			__field(int, error)
+		),
+
+		TP_fast_assign(
+			__entry->dev = res->server->s_dev;
+			__entry->fhandle = nfs_fhandle_hash(args->fhandle);
+			__entry->error = error;
+		),
+
+		TP_printk(
+			"error=%d (%s) dev=%02x:%02x fhandle=0x%08x",
+			__entry->error,
+			show_nfsv4_errors(__entry->error),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			__entry->fhandle
+		)
+);
+
+#ifdef CONFIG_NFS_V4_1
+DECLARE_EVENT_CLASS(nfs4_test_stateid_event,
+		TP_PROTO(
+			const struct nfs4_state *state,
+			const struct nfs4_lock_state *lsp,
+			int error
+		),
+
+		TP_ARGS(state, lsp, error),
+
+		TP_STRUCT__entry(
+			__field(int, error)
+			__field(dev_t, dev)
+			__field(u32, fhandle)
+			__field(u64, fileid)
+		),
+
+		TP_fast_assign(
+			const struct inode *inode = state->inode;
+
+			__entry->error = error;
+			__entry->dev = inode->i_sb->s_dev;
+			__entry->fileid = NFS_FILEID(inode);
+			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+		),
+
+		TP_printk(
+			"error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x",
+			__entry->error,
+			show_nfsv4_errors(__entry->error),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->fileid,
+			__entry->fhandle
+		)
+);
+
+#define DEFINE_NFS4_TEST_STATEID_EVENT(name) \
+	DEFINE_EVENT(nfs4_test_stateid_event, name, \
+			TP_PROTO( \
+				const struct nfs4_state *state, \
+				const struct nfs4_lock_state *lsp, \
+				int error \
+			), \
+			TP_ARGS(state, lsp, error))
+DEFINE_NFS4_TEST_STATEID_EVENT(nfs4_test_delegation_stateid);
+DEFINE_NFS4_TEST_STATEID_EVENT(nfs4_test_open_stateid);
+DEFINE_NFS4_TEST_STATEID_EVENT(nfs4_test_lock_stateid);
+#endif /* CONFIG_NFS_V4_1 */
+
+DECLARE_EVENT_CLASS(nfs4_lookup_event,
+		TP_PROTO(
+			const struct inode *dir,
+			const struct qstr *name,
+			int error
+		),
+
+		TP_ARGS(dir, name, error),
+
+		TP_STRUCT__entry(
+			__field(dev_t, dev)
+			__field(int, error)
+			__field(u64, dir)
+			__string(name, name->name)
+		),
+
+		TP_fast_assign(
+			__entry->dev = dir->i_sb->s_dev;
+			__entry->dir = NFS_FILEID(dir);
+			__entry->error = error;
+			__assign_str(name, name->name);
+		),
+
+		TP_printk(
+			"error=%d (%s) name=%02x:%02x:%llu/%s",
+			__entry->error,
+			show_nfsv4_errors(__entry->error),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->dir,
+			__get_str(name)
+		)
+);
+
+#define DEFINE_NFS4_LOOKUP_EVENT(name) \
+	DEFINE_EVENT(nfs4_lookup_event, name, \
+			TP_PROTO( \
+				const struct inode *dir, \
+				const struct qstr *name, \
+				int error \
+			), \
+			TP_ARGS(dir, name, error))
+
+DEFINE_NFS4_LOOKUP_EVENT(nfs4_lookup);
+DEFINE_NFS4_LOOKUP_EVENT(nfs4_symlink);
+DEFINE_NFS4_LOOKUP_EVENT(nfs4_mkdir);
+DEFINE_NFS4_LOOKUP_EVENT(nfs4_mknod);
+DEFINE_NFS4_LOOKUP_EVENT(nfs4_remove);
+DEFINE_NFS4_LOOKUP_EVENT(nfs4_get_fs_locations);
+DEFINE_NFS4_LOOKUP_EVENT(nfs4_secinfo);
+
+TRACE_EVENT(nfs4_rename,
+		TP_PROTO(
+			const struct inode *olddir,
+			const struct qstr *oldname,
+			const struct inode *newdir,
+			const struct qstr *newname,
+			int error
+		),
+
+		TP_ARGS(olddir, oldname, newdir, newname, error),
+
+		TP_STRUCT__entry(
+			__field(dev_t, dev)
+			__field(int, error)
+			__field(u64, olddir)
+			__string(oldname, oldname->name)
+			__field(u64, newdir)
+			__string(newname, newname->name)
+		),
+
+		TP_fast_assign(
+			__entry->dev = olddir->i_sb->s_dev;
+			__entry->olddir = NFS_FILEID(olddir);
+			__entry->newdir = NFS_FILEID(newdir);
+			__entry->error = error;
+			__assign_str(oldname, oldname->name);
+			__assign_str(newname, newname->name);
+		),
+
+		TP_printk(
+			"error=%d (%s) oldname=%02x:%02x:%llu/%s "
+			"newname=%02x:%02x:%llu/%s",
+			__entry->error,
+			show_nfsv4_errors(__entry->error),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->olddir,
+			__get_str(oldname),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->newdir,
+			__get_str(newname)
+		)
+);
+
+DECLARE_EVENT_CLASS(nfs4_inode_event,
+		TP_PROTO(
+			const struct inode *inode,
+			int error
+		),
+
+		TP_ARGS(inode, error),
+
+		TP_STRUCT__entry(
+			__field(dev_t, dev)
+			__field(u32, fhandle)
+			__field(u64, fileid)
+			__field(int, error)
+		),
+
+		TP_fast_assign(
+			__entry->dev = inode->i_sb->s_dev;
+			__entry->fileid = NFS_FILEID(inode);
+			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+			__entry->error = error;
+		),
+
+		TP_printk(
+			"error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x",
+			__entry->error,
+			show_nfsv4_errors(__entry->error),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->fileid,
+			__entry->fhandle
+		)
+);
+
+#define DEFINE_NFS4_INODE_EVENT(name) \
+	DEFINE_EVENT(nfs4_inode_event, name, \
+			TP_PROTO( \
+				const struct inode *inode, \
+				int error \
+			), \
+			TP_ARGS(inode, error))
+
+DEFINE_NFS4_INODE_EVENT(nfs4_setattr);
+DEFINE_NFS4_INODE_EVENT(nfs4_access);
+DEFINE_NFS4_INODE_EVENT(nfs4_readlink);
+DEFINE_NFS4_INODE_EVENT(nfs4_readdir);
+DEFINE_NFS4_INODE_EVENT(nfs4_get_acl);
+DEFINE_NFS4_INODE_EVENT(nfs4_set_acl);
+#ifdef CONFIG_NFS_V4_SECURITY_LABEL
+DEFINE_NFS4_INODE_EVENT(nfs4_get_security_label);
+DEFINE_NFS4_INODE_EVENT(nfs4_set_security_label);
+#endif /* CONFIG_NFS_V4_SECURITY_LABEL */
+DEFINE_NFS4_INODE_EVENT(nfs4_recall_delegation);
+DEFINE_NFS4_INODE_EVENT(nfs4_delegreturn);
+
+DECLARE_EVENT_CLASS(nfs4_getattr_event,
+		TP_PROTO(
+			const struct nfs_server *server,
+			const struct nfs_fh *fhandle,
+			const struct nfs_fattr *fattr,
+			int error
+		),
+
+		TP_ARGS(server, fhandle, fattr, error),
+
+		TP_STRUCT__entry(
+			__field(dev_t, dev)
+			__field(u32, fhandle)
+			__field(u64, fileid)
+			__field(unsigned int, valid)
+			__field(int, error)
+		),
+
+		TP_fast_assign(
+			__entry->dev = server->s_dev;
+			__entry->valid = fattr->valid;
+			__entry->fhandle = nfs_fhandle_hash(fhandle);
+			__entry->fileid = (fattr->valid & NFS_ATTR_FATTR_FILEID) ? fattr->fileid : 0;
+			__entry->error = error;
+		),
+
+		TP_printk(
+			"error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+			"valid=%s",
+			__entry->error,
+			show_nfsv4_errors(__entry->error),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->fileid,
+			__entry->fhandle,
+			show_nfs_fattr_flags(__entry->valid)
+		)
+);
+
+#define DEFINE_NFS4_GETATTR_EVENT(name) \
+	DEFINE_EVENT(nfs4_getattr_event, name, \
+			TP_PROTO( \
+				const struct nfs_server *server, \
+				const struct nfs_fh *fhandle, \
+				const struct nfs_fattr *fattr, \
+				int error \
+			), \
+			TP_ARGS(server, fhandle, fattr, error))
+DEFINE_NFS4_GETATTR_EVENT(nfs4_getattr);
+DEFINE_NFS4_GETATTR_EVENT(nfs4_lookup_root);
+DEFINE_NFS4_GETATTR_EVENT(nfs4_fsinfo);
+
+DECLARE_EVENT_CLASS(nfs4_idmap_event,
+		TP_PROTO(
+			const char *name,
+			int len,
+			u32 id,
+			int error
+		),
+
+		TP_ARGS(name, len, id, error),
+
+		TP_STRUCT__entry(
+			__field(int, error)
+			__field(u32, id)
+			__dynamic_array(char, name, len > 0 ? len + 1 : 1)
+		),
+
+		TP_fast_assign(
+			if (len < 0)
+				len = 0;
+			__entry->error = error < 0 ? error : 0;
+			__entry->id = id;
+			memcpy(__get_dynamic_array(name), name, len);
+			((char *)__get_dynamic_array(name))[len] = 0;
+		),
+
+		TP_printk(
+			"error=%d id=%u name=%s",
+			__entry->error,
+			__entry->id,
+			__get_str(name)
+		)
+);
+#define DEFINE_NFS4_IDMAP_EVENT(name) \
+	DEFINE_EVENT(nfs4_idmap_event, name, \
+			TP_PROTO( \
+				const char *name, \
+				int len, \
+				u32 id, \
+				int error \
+			), \
+			TP_ARGS(name, len, id, error))
+DEFINE_NFS4_IDMAP_EVENT(nfs4_map_name_to_uid);
+DEFINE_NFS4_IDMAP_EVENT(nfs4_map_group_to_gid);
+DEFINE_NFS4_IDMAP_EVENT(nfs4_map_uid_to_name);
+DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group);
+
+DECLARE_EVENT_CLASS(nfs4_read_event,
+		TP_PROTO(
+			const struct nfs_read_data *data,
+			int error
+		),
+
+		TP_ARGS(data, error),
+
+		TP_STRUCT__entry(
+			__field(dev_t, dev)
+			__field(u32, fhandle)
+			__field(u64, fileid)
+			__field(loff_t, offset)
+			__field(size_t, count)
+			__field(int, error)
+		),
+
+		TP_fast_assign(
+			const struct inode *inode = data->header->inode;
+			__entry->dev = inode->i_sb->s_dev;
+			__entry->fileid = NFS_FILEID(inode);
+			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+			__entry->offset = data->args.offset;
+			__entry->count = data->args.count;
+			__entry->error = error;
+		),
+
+		TP_printk(
+			"error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+			"offset=%lld count=%zu",
+			__entry->error,
+			show_nfsv4_errors(__entry->error),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->fileid,
+			__entry->fhandle,
+			(long long)__entry->offset,
+			__entry->count
+		)
+);
+#define DEFINE_NFS4_READ_EVENT(name) \
+	DEFINE_EVENT(nfs4_read_event, name, \
+			TP_PROTO( \
+				const struct nfs_read_data *data, \
+				int error \
+			), \
+			TP_ARGS(data, error))
+DEFINE_NFS4_READ_EVENT(nfs4_read);
+#ifdef CONFIG_NFS_V4_1
+DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
+#endif /* CONFIG_NFS_V4_1 */
+
+DECLARE_EVENT_CLASS(nfs4_write_event,
+		TP_PROTO(
+			const struct nfs_write_data *data,
+			int error
+		),
+
+		TP_ARGS(data, error),
+
+		TP_STRUCT__entry(
+			__field(dev_t, dev)
+			__field(u32, fhandle)
+			__field(u64, fileid)
+			__field(loff_t, offset)
+			__field(size_t, count)
+			__field(int, error)
+		),
+
+		TP_fast_assign(
+			const struct inode *inode = data->header->inode;
+			__entry->dev = inode->i_sb->s_dev;
+			__entry->fileid = NFS_FILEID(inode);
+			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+			__entry->offset = data->args.offset;
+			__entry->count = data->args.count;
+			__entry->error = error;
+		),
+
+		TP_printk(
+			"error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+			"offset=%lld count=%zu",
+			__entry->error,
+			show_nfsv4_errors(__entry->error),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->fileid,
+			__entry->fhandle,
+			(long long)__entry->offset,
+			__entry->count
+		)
+);
+
+#define DEFINE_NFS4_WRITE_EVENT(name) \
+	DEFINE_EVENT(nfs4_write_event, name, \
+			TP_PROTO( \
+				const struct nfs_write_data *data, \
+				int error \
+			), \
+			TP_ARGS(data, error))
+DEFINE_NFS4_WRITE_EVENT(nfs4_write);
+#ifdef CONFIG_NFS_V4_1
+DEFINE_NFS4_WRITE_EVENT(nfs4_pnfs_write);
+#endif /* CONFIG_NFS_V4_1 */
+
+DECLARE_EVENT_CLASS(nfs4_commit_event,
+		TP_PROTO(
+			const struct nfs_commit_data *data,
+			int error
+		),
+
+		TP_ARGS(data, error),
+
+		TP_STRUCT__entry(
+			__field(dev_t, dev)
+			__field(u32, fhandle)
+			__field(u64, fileid)
+			__field(loff_t, offset)
+			__field(size_t, count)
+			__field(int, error)
+		),
+
+		TP_fast_assign(
+			const struct inode *inode = data->inode;
+			__entry->dev = inode->i_sb->s_dev;
+			__entry->fileid = NFS_FILEID(inode);
+			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+			__entry->offset = data->args.offset;
+			__entry->count = data->args.count;
+			__entry->error = error;
+		),
+
+		TP_printk(
+			"error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+			"offset=%lld count=%zu",
+			__entry->error,
+			show_nfsv4_errors(__entry->error),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->fileid,
+			__entry->fhandle,
+			(long long)__entry->offset,
+			__entry->count
+		)
+);
+#define DEFINE_NFS4_COMMIT_EVENT(name) \
+	DEFINE_EVENT(nfs4_commit_event, name, \
+			TP_PROTO( \
+				const struct nfs_commit_data *data, \
+				int error \
+			), \
+			TP_ARGS(data, error))
+DEFINE_NFS4_COMMIT_EVENT(nfs4_commit);
+#ifdef CONFIG_NFS_V4_1
+DEFINE_NFS4_COMMIT_EVENT(nfs4_pnfs_commit_ds);
+
+#define show_pnfs_iomode(iomode) \
+	__print_symbolic(iomode, \
+		{ IOMODE_READ, "READ" }, \
+		{ IOMODE_RW, "RW" }, \
+		{ IOMODE_ANY, "ANY" })
+
+TRACE_EVENT(nfs4_layoutget,
+		TP_PROTO(
+			const struct nfs_open_context *ctx,
+			const struct pnfs_layout_range *args,
+			const struct pnfs_layout_range *res,
+			int error
+		),
+
+		TP_ARGS(ctx, args, res, error),
+
+		TP_STRUCT__entry(
+			__field(dev_t, dev)
+			__field(u32, fhandle)
+			__field(u64, fileid)
+			__field(u32, iomode)
+			__field(u64, offset)
+			__field(u64, count)
+			__field(int, error)
+		),
+
+		TP_fast_assign(
+			const struct inode *inode = ctx->dentry->d_inode;
+			__entry->dev = inode->i_sb->s_dev;
+			__entry->fileid = NFS_FILEID(inode);
+			__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
+			__entry->iomode = args->iomode;
+			__entry->offset = args->offset;
+			__entry->count = args->length;
+			__entry->error = error;
+		),
+
+		TP_printk(
+			"error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+			"iomode=%s offset=%llu count=%llu",
+			__entry->error,
+			show_nfsv4_errors(__entry->error),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->fileid,
+			__entry->fhandle,
+			show_pnfs_iomode(__entry->iomode),
+			(unsigned long long)__entry->offset,
+			(unsigned long long)__entry->count
+		)
+);
+
+DEFINE_NFS4_INODE_EVENT(nfs4_layoutcommit);
+DEFINE_NFS4_INODE_EVENT(nfs4_layoutreturn);
+
+#endif /* CONFIG_NFS_V4_1 */
+
+#endif /* _TRACE_NFS4_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE nfs4trace
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 3850b01..fbdad9e 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -294,7 +294,9 @@
 				XDR_QUADLEN(NFS4_EXCHANGE_ID_LEN) + \
 				1 /* flags */ + \
 				1 /* spa_how */ + \
-				0 /* SP4_NONE (for now) */ + \
+				/* max is SP4_MACH_CRED (for now) */ + \
+				1 + NFS4_OP_MAP_NUM_WORDS + \
+				1 + NFS4_OP_MAP_NUM_WORDS + \
 				1 /* implementation id array of size 1 */ + \
 				1 /* nii_domain */ + \
 				XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
@@ -306,7 +308,9 @@
 				1 /* eir_sequenceid */ + \
 				1 /* eir_flags */ + \
 				1 /* spr_how */ + \
-				0 /* SP4_NONE (for now) */ + \
+				  /* max is SP4_MACH_CRED (for now) */ + \
+				1 + NFS4_OP_MAP_NUM_WORDS + \
+				1 + NFS4_OP_MAP_NUM_WORDS + \
 				2 /* eir_server_owner.so_minor_id */ + \
 				/* eir_server_owner.so_major_id<> */ \
 				XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \
@@ -997,12 +1001,10 @@
 	int owner_namelen = 0;
 	int owner_grouplen = 0;
 	__be32 *p;
-	__be32 *q;
-	int len;
-	uint32_t bmval_len = 2;
-	uint32_t bmval0 = 0;
-	uint32_t bmval1 = 0;
-	uint32_t bmval2 = 0;
+	unsigned i;
+	uint32_t len = 0;
+	uint32_t bmval_len;
+	uint32_t bmval[3] = { 0 };
 
 	/*
 	 * We reserve enough space to write the entire attribute buffer at once.
@@ -1011,13 +1013,14 @@
 	 * = 40 bytes, plus any contribution from variable-length fields
 	 *            such as owner/group.
 	 */
-	len = 8;
-
-	/* Sigh */
-	if (iap->ia_valid & ATTR_SIZE)
+	if (iap->ia_valid & ATTR_SIZE) {
+		bmval[0] |= FATTR4_WORD0_SIZE;
 		len += 8;
-	if (iap->ia_valid & ATTR_MODE)
+	}
+	if (iap->ia_valid & ATTR_MODE) {
+		bmval[1] |= FATTR4_WORD1_MODE;
 		len += 4;
+	}
 	if (iap->ia_valid & ATTR_UID) {
 		owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ);
 		if (owner_namelen < 0) {
@@ -1028,6 +1031,7 @@
 			owner_namelen = sizeof("nobody") - 1;
 			/* goto out; */
 		}
+		bmval[1] |= FATTR4_WORD1_OWNER;
 		len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
 	}
 	if (iap->ia_valid & ATTR_GID) {
@@ -1039,92 +1043,73 @@
 			owner_grouplen = sizeof("nobody") - 1;
 			/* goto out; */
 		}
+		bmval[1] |= FATTR4_WORD1_OWNER_GROUP;
 		len += 4 + (XDR_QUADLEN(owner_grouplen) << 2);
 	}
-	if (iap->ia_valid & ATTR_ATIME_SET)
-		len += 16;
-	else if (iap->ia_valid & ATTR_ATIME)
-		len += 4;
-	if (iap->ia_valid & ATTR_MTIME_SET)
-		len += 16;
-	else if (iap->ia_valid & ATTR_MTIME)
-		len += 4;
-	if (label) {
-		len += 4 + 4 + 4 + (XDR_QUADLEN(label->len) << 2);
-		bmval_len = 3;
-	}
-
-	len += bmval_len << 2;
-	p = reserve_space(xdr, len);
-
-	/*
-	 * We write the bitmap length now, but leave the bitmap and the attribute
-	 * buffer length to be backfilled at the end of this routine.
-	 */
-	*p++ = cpu_to_be32(bmval_len);
-	q = p;
-	/* Skip bitmap entries + attrlen */
-	p += bmval_len + 1;
-
-	if (iap->ia_valid & ATTR_SIZE) {
-		bmval0 |= FATTR4_WORD0_SIZE;
-		p = xdr_encode_hyper(p, iap->ia_size);
-	}
-	if (iap->ia_valid & ATTR_MODE) {
-		bmval1 |= FATTR4_WORD1_MODE;
-		*p++ = cpu_to_be32(iap->ia_mode & S_IALLUGO);
-	}
-	if (iap->ia_valid & ATTR_UID) {
-		bmval1 |= FATTR4_WORD1_OWNER;
-		p = xdr_encode_opaque(p, owner_name, owner_namelen);
-	}
-	if (iap->ia_valid & ATTR_GID) {
-		bmval1 |= FATTR4_WORD1_OWNER_GROUP;
-		p = xdr_encode_opaque(p, owner_group, owner_grouplen);
-	}
 	if (iap->ia_valid & ATTR_ATIME_SET) {
-		bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
-		*p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
-		p = xdr_encode_hyper(p, (s64)iap->ia_atime.tv_sec);
-		*p++ = cpu_to_be32(iap->ia_atime.tv_nsec);
-	}
-	else if (iap->ia_valid & ATTR_ATIME) {
-		bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
-		*p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
+		bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET;
+		len += 16;
+	} else if (iap->ia_valid & ATTR_ATIME) {
+		bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET;
+		len += 4;
 	}
 	if (iap->ia_valid & ATTR_MTIME_SET) {
-		bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
-		*p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
-		p = xdr_encode_hyper(p, (s64)iap->ia_mtime.tv_sec);
-		*p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
-	}
-	else if (iap->ia_valid & ATTR_MTIME) {
-		bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
-		*p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
+		bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET;
+		len += 16;
+	} else if (iap->ia_valid & ATTR_MTIME) {
+		bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET;
+		len += 4;
 	}
 	if (label) {
-		bmval2 |= FATTR4_WORD2_SECURITY_LABEL;
+		len += 4 + 4 + 4 + (XDR_QUADLEN(label->len) << 2);
+		bmval[2] |= FATTR4_WORD2_SECURITY_LABEL;
+	}
+
+	if (bmval[2] != 0)
+		bmval_len = 3;
+	else if (bmval[1] != 0)
+		bmval_len = 2;
+	else
+		bmval_len = 1;
+
+	p = reserve_space(xdr, 4 + (bmval_len << 2) + 4 + len);
+
+	*p++ = cpu_to_be32(bmval_len);
+	for (i = 0; i < bmval_len; i++)
+		*p++ = cpu_to_be32(bmval[i]);
+	*p++ = cpu_to_be32(len);
+
+	if (bmval[0] & FATTR4_WORD0_SIZE)
+		p = xdr_encode_hyper(p, iap->ia_size);
+	if (bmval[1] & FATTR4_WORD1_MODE)
+		*p++ = cpu_to_be32(iap->ia_mode & S_IALLUGO);
+	if (bmval[1] & FATTR4_WORD1_OWNER)
+		p = xdr_encode_opaque(p, owner_name, owner_namelen);
+	if (bmval[1] & FATTR4_WORD1_OWNER_GROUP)
+		p = xdr_encode_opaque(p, owner_group, owner_grouplen);
+	if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
+		if (iap->ia_valid & ATTR_ATIME_SET) {
+			*p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
+			p = xdr_encode_hyper(p, (s64)iap->ia_atime.tv_sec);
+			*p++ = cpu_to_be32(iap->ia_atime.tv_nsec);
+		} else
+			*p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
+	}
+	if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
+		if (iap->ia_valid & ATTR_MTIME_SET) {
+			*p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
+			p = xdr_encode_hyper(p, (s64)iap->ia_mtime.tv_sec);
+			*p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
+		} else
+			*p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
+	}
+	if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) {
 		*p++ = cpu_to_be32(label->lfs);
 		*p++ = cpu_to_be32(label->pi);
 		*p++ = cpu_to_be32(label->len);
 		p = xdr_encode_opaque_fixed(p, label->label, label->len);
 	}
 
-	/*
-	 * Now we backfill the bitmap and the attribute buffer length.
-	 */
-	if (len != ((char *)p - (char *)q) + 4) {
-		printk(KERN_ERR "NFS: Attr length error, %u != %Zu\n",
-				len, ((char *)p - (char *)q) + 4);
-		BUG();
-	}
-	*q++ = htonl(bmval0);
-	*q++ = htonl(bmval1);
-	if (bmval_len == 3)
-		*q++ = htonl(bmval2);
-	len = (char *)p - (char *)(q + 1);
-	*q = htonl(len);
-
 /* out: */
 }
 
@@ -1745,6 +1730,14 @@
 	*p = 0;	/* use_conn_in_rdma_mode = False */
 }
 
+static void encode_op_map(struct xdr_stream *xdr, struct nfs4_op_map *op_map)
+{
+	unsigned int i;
+	encode_uint32(xdr, NFS4_OP_MAP_NUM_WORDS);
+	for (i = 0; i < NFS4_OP_MAP_NUM_WORDS; i++)
+		encode_uint32(xdr, op_map->u.words[i]);
+}
+
 static void encode_exchange_id(struct xdr_stream *xdr,
 			       struct nfs41_exchange_id_args *args,
 			       struct compound_hdr *hdr)
@@ -1758,9 +1751,20 @@
 
 	encode_string(xdr, args->id_len, args->id);
 
-	p = reserve_space(xdr, 12);
-	*p++ = cpu_to_be32(args->flags);
-	*p++ = cpu_to_be32(0);	/* zero length state_protect4_a */
+	encode_uint32(xdr, args->flags);
+	encode_uint32(xdr, args->state_protect.how);
+
+	switch (args->state_protect.how) {
+	case SP4_NONE:
+		break;
+	case SP4_MACH_CRED:
+		encode_op_map(xdr, &args->state_protect.enforce);
+		encode_op_map(xdr, &args->state_protect.allow);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
 
 	if (send_implementation_id &&
 	    sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) > 1 &&
@@ -1771,7 +1775,7 @@
 			       utsname()->version, utsname()->machine);
 
 	if (len > 0) {
-		*p = cpu_to_be32(1);	/* implementation id array length=1 */
+		encode_uint32(xdr, 1);	/* implementation id array length=1 */
 
 		encode_string(xdr,
 			sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) - 1,
@@ -1782,7 +1786,7 @@
 		p = xdr_encode_hyper(p, 0);
 		*p = cpu_to_be32(0);
 	} else
-		*p = cpu_to_be32(0);	/* implementation id array length=0 */
+		encode_uint32(xdr, 0);	/* implementation id array length=0 */
 }
 
 static void encode_create_session(struct xdr_stream *xdr,
@@ -1835,7 +1839,7 @@
 	*p++ = cpu_to_be32(RPC_AUTH_UNIX);			/* auth_sys */
 
 	/* authsys_parms rfc1831 */
-	*p++ = (__be32)nn->boot_time.tv_nsec;		/* stamp */
+	*p++ = cpu_to_be32(nn->boot_time.tv_nsec);	/* stamp */
 	p = xdr_encode_opaque(p, machine_name, len);
 	*p++ = cpu_to_be32(0);				/* UID */
 	*p++ = cpu_to_be32(0);				/* GID */
@@ -1877,11 +1881,10 @@
 	struct nfs4_slot *slot = args->sa_slot;
 	__be32 *p;
 
-	if (slot == NULL)
-		return;
-
 	tp = slot->table;
 	session = tp->session;
+	if (!session)
+		return;
 
 	encode_op_hdr(xdr, OP_SEQUENCE, decode_sequence_maxsz, hdr);
 
@@ -2062,9 +2065,9 @@
 static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args)
 {
 #if defined(CONFIG_NFS_V4_1)
-
-	if (args->sa_slot)
-		return args->sa_slot->table->session->clp->cl_mvops->minor_version;
+	struct nfs4_session *session = args->sa_slot->table->session;
+	if (session)
+		return session->clp->cl_mvops->minor_version;
 #endif /* CONFIG_NFS_V4_1 */
 	return 0;
 }
@@ -4649,7 +4652,7 @@
 static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
 					 uint32_t *layouttype)
 {
-	uint32_t *p;
+	__be32 *p;
 	int num;
 
 	p = xdr_inline_decode(xdr, 4);
@@ -5394,6 +5397,23 @@
 	return decode_secinfo_common(xdr, res);
 }
 
+static int decode_op_map(struct xdr_stream *xdr, struct nfs4_op_map *op_map)
+{
+	__be32 *p;
+	uint32_t bitmap_words;
+	unsigned int i;
+
+	p = xdr_inline_decode(xdr, 4);
+	bitmap_words = be32_to_cpup(p++);
+	if (bitmap_words > NFS4_OP_MAP_NUM_WORDS)
+		return -EIO;
+	p = xdr_inline_decode(xdr, 4 * bitmap_words);
+	for (i = 0; i < bitmap_words; i++)
+		op_map->u.words[i] = be32_to_cpup(p++);
+
+	return 0;
+}
+
 static int decode_exchange_id(struct xdr_stream *xdr,
 			      struct nfs41_exchange_id_res *res)
 {
@@ -5417,10 +5437,22 @@
 	res->seqid = be32_to_cpup(p++);
 	res->flags = be32_to_cpup(p++);
 
-	/* We ask for SP4_NONE */
-	dummy = be32_to_cpup(p);
-	if (dummy != SP4_NONE)
+	res->state_protect.how = be32_to_cpup(p);
+	switch (res->state_protect.how) {
+	case SP4_NONE:
+		break;
+	case SP4_MACH_CRED:
+		status = decode_op_map(xdr, &res->state_protect.enforce);
+		if (status)
+			return status;
+		status = decode_op_map(xdr, &res->state_protect.allow);
+		if (status)
+			return status;
+		break;
+	default:
+		WARN_ON_ONCE(1);
 		return -EIO;
+	}
 
 	/* server_owner4.so_minor_id */
 	p = xdr_inline_decode(xdr, 8);
@@ -5614,6 +5646,8 @@
 
 	if (res->sr_slot == NULL)
 		return 0;
+	if (!res->sr_slot->table->session)
+		return 0;
 
 	status = decode_op_hdr(xdr, OP_SEQUENCE);
 	if (!status)
diff --git a/fs/nfs/nfstrace.c b/fs/nfs/nfstrace.c
new file mode 100644
index 0000000..4eb0aea
--- /dev/null
+++ b/fs/nfs/nfstrace.c
@@ -0,0 +1,9 @@
+/*
+ * Copyright (c) 2013 Trond Myklebust <Trond.Myklebust@netapp.com>
+ */
+#include <linux/nfs_fs.h>
+#include <linux/namei.h>
+#include "internal.h"
+
+#define CREATE_TRACE_POINTS
+#include "nfstrace.h"
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
new file mode 100644
index 0000000..89fe741
--- /dev/null
+++ b/fs/nfs/nfstrace.h
@@ -0,0 +1,729 @@
+/*
+ * Copyright (c) 2013 Trond Myklebust <Trond.Myklebust@netapp.com>
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM nfs
+
+#if !defined(_TRACE_NFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_NFS_H
+
+#include <linux/tracepoint.h>
+
+#define nfs_show_file_type(ftype) \
+	__print_symbolic(ftype, \
+			{ DT_UNKNOWN, "UNKNOWN" }, \
+			{ DT_FIFO, "FIFO" }, \
+			{ DT_CHR, "CHR" }, \
+			{ DT_DIR, "DIR" }, \
+			{ DT_BLK, "BLK" }, \
+			{ DT_REG, "REG" }, \
+			{ DT_LNK, "LNK" }, \
+			{ DT_SOCK, "SOCK" }, \
+			{ DT_WHT, "WHT" })
+
+#define nfs_show_cache_validity(v) \
+	__print_flags(v, "|", \
+			{ NFS_INO_INVALID_ATTR, "INVALID_ATTR" }, \
+			{ NFS_INO_INVALID_DATA, "INVALID_DATA" }, \
+			{ NFS_INO_INVALID_ATIME, "INVALID_ATIME" }, \
+			{ NFS_INO_INVALID_ACCESS, "INVALID_ACCESS" }, \
+			{ NFS_INO_INVALID_ACL, "INVALID_ACL" }, \
+			{ NFS_INO_REVAL_PAGECACHE, "REVAL_PAGECACHE" }, \
+			{ NFS_INO_REVAL_FORCED, "REVAL_FORCED" }, \
+			{ NFS_INO_INVALID_LABEL, "INVALID_LABEL" })
+
+#define nfs_show_nfsi_flags(v) \
+	__print_flags(v, "|", \
+			{ 1 << NFS_INO_ADVISE_RDPLUS, "ADVISE_RDPLUS" }, \
+			{ 1 << NFS_INO_STALE, "STALE" }, \
+			{ 1 << NFS_INO_FLUSHING, "FLUSHING" }, \
+			{ 1 << NFS_INO_FSCACHE, "FSCACHE" }, \
+			{ 1 << NFS_INO_COMMIT, "COMMIT" }, \
+			{ 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \
+			{ 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" })
+
+DECLARE_EVENT_CLASS(nfs_inode_event,
+		TP_PROTO(
+			const struct inode *inode
+		),
+
+		TP_ARGS(inode),
+
+		TP_STRUCT__entry(
+			__field(dev_t, dev)
+			__field(u32, fhandle)
+			__field(u64, fileid)
+			__field(u64, version)
+		),
+
+		TP_fast_assign(
+			const struct nfs_inode *nfsi = NFS_I(inode);
+			__entry->dev = inode->i_sb->s_dev;
+			__entry->fileid = nfsi->fileid;
+			__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+			__entry->version = inode->i_version;
+		),
+
+		TP_printk(
+			"fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu ",
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->fileid,
+			__entry->fhandle,
+			(unsigned long long)__entry->version
+		)
+);
+
+DECLARE_EVENT_CLASS(nfs_inode_event_done,
+		TP_PROTO(
+			const struct inode *inode,
+			int error
+		),
+
+		TP_ARGS(inode, error),
+
+		TP_STRUCT__entry(
+			__field(int, error)
+			__field(dev_t, dev)
+			__field(u32, fhandle)
+			__field(unsigned char, type)
+			__field(u64, fileid)
+			__field(u64, version)
+			__field(loff_t, size)
+			__field(unsigned long, nfsi_flags)
+			__field(unsigned long, cache_validity)
+		),
+
+		TP_fast_assign(
+			const struct nfs_inode *nfsi = NFS_I(inode);
+			__entry->error = error;
+			__entry->dev = inode->i_sb->s_dev;
+			__entry->fileid = nfsi->fileid;
+			__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
+			__entry->type = nfs_umode_to_dtype(inode->i_mode);
+			__entry->version = inode->i_version;
+			__entry->size = i_size_read(inode);
+			__entry->nfsi_flags = nfsi->flags;
+			__entry->cache_validity = nfsi->cache_validity;
+		),
+
+		TP_printk(
+			"error=%d fileid=%02x:%02x:%llu fhandle=0x%08x "
+			"type=%u (%s) version=%llu size=%lld "
+			"cache_validity=%lu (%s) nfs_flags=%ld (%s)",
+			__entry->error,
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->fileid,
+			__entry->fhandle,
+			__entry->type,
+			nfs_show_file_type(__entry->type),
+			(unsigned long long)__entry->version,
+			(long long)__entry->size,
+			__entry->cache_validity,
+			nfs_show_cache_validity(__entry->cache_validity),
+			__entry->nfsi_flags,
+			nfs_show_nfsi_flags(__entry->nfsi_flags)
+		)
+);
+
+#define DEFINE_NFS_INODE_EVENT(name) \
+	DEFINE_EVENT(nfs_inode_event, name, \
+			TP_PROTO( \
+				const struct inode *inode \
+			), \
+			TP_ARGS(inode))
+#define DEFINE_NFS_INODE_EVENT_DONE(name) \
+	DEFINE_EVENT(nfs_inode_event_done, name, \
+			TP_PROTO( \
+				const struct inode *inode, \
+				int error \
+			), \
+			TP_ARGS(inode, error))
+DEFINE_NFS_INODE_EVENT(nfs_refresh_inode_enter);
+DEFINE_NFS_INODE_EVENT_DONE(nfs_refresh_inode_exit);
+DEFINE_NFS_INODE_EVENT(nfs_revalidate_inode_enter);
+DEFINE_NFS_INODE_EVENT_DONE(nfs_revalidate_inode_exit);
+DEFINE_NFS_INODE_EVENT(nfs_invalidate_mapping_enter);
+DEFINE_NFS_INODE_EVENT_DONE(nfs_invalidate_mapping_exit);
+DEFINE_NFS_INODE_EVENT(nfs_getattr_enter);
+DEFINE_NFS_INODE_EVENT_DONE(nfs_getattr_exit);
+DEFINE_NFS_INODE_EVENT(nfs_setattr_enter);
+DEFINE_NFS_INODE_EVENT_DONE(nfs_setattr_exit);
+DEFINE_NFS_INODE_EVENT(nfs_writeback_page_enter);
+DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_page_exit);
+DEFINE_NFS_INODE_EVENT(nfs_writeback_inode_enter);
+DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_inode_exit);
+DEFINE_NFS_INODE_EVENT(nfs_fsync_enter);
+DEFINE_NFS_INODE_EVENT_DONE(nfs_fsync_exit);
+DEFINE_NFS_INODE_EVENT(nfs_access_enter);
+DEFINE_NFS_INODE_EVENT_DONE(nfs_access_exit);
+
+#define show_lookup_flags(flags) \
+	__print_flags((unsigned long)flags, "|", \
+			{ LOOKUP_AUTOMOUNT, "AUTOMOUNT" }, \
+			{ LOOKUP_DIRECTORY, "DIRECTORY" }, \
+			{ LOOKUP_OPEN, "OPEN" }, \
+			{ LOOKUP_CREATE, "CREATE" }, \
+			{ LOOKUP_EXCL, "EXCL" })
+
+DECLARE_EVENT_CLASS(nfs_lookup_event,
+		TP_PROTO(
+			const struct inode *dir,
+			const struct dentry *dentry,
+			unsigned int flags
+		),
+
+		TP_ARGS(dir, dentry, flags),
+
+		TP_STRUCT__entry(
+			__field(unsigned int, flags)
+			__field(dev_t, dev)
+			__field(u64, dir)
+			__string(name, dentry->d_name.name)
+		),
+
+		TP_fast_assign(
+			__entry->dev = dir->i_sb->s_dev;
+			__entry->dir = NFS_FILEID(dir);
+			__entry->flags = flags;
+			__assign_str(name, dentry->d_name.name);
+		),
+
+		TP_printk(
+			"flags=%u (%s) name=%02x:%02x:%llu/%s",
+			__entry->flags,
+			show_lookup_flags(__entry->flags),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->dir,
+			__get_str(name)
+		)
+);
+
+#define DEFINE_NFS_LOOKUP_EVENT(name) \
+	DEFINE_EVENT(nfs_lookup_event, name, \
+			TP_PROTO( \
+				const struct inode *dir, \
+				const struct dentry *dentry, \
+				unsigned int flags \
+			), \
+			TP_ARGS(dir, dentry, flags))
+
+DECLARE_EVENT_CLASS(nfs_lookup_event_done,
+		TP_PROTO(
+			const struct inode *dir,
+			const struct dentry *dentry,
+			unsigned int flags,
+			int error
+		),
+
+		TP_ARGS(dir, dentry, flags, error),
+
+		TP_STRUCT__entry(
+			__field(int, error)
+			__field(unsigned int, flags)
+			__field(dev_t, dev)
+			__field(u64, dir)
+			__string(name, dentry->d_name.name)
+		),
+
+		TP_fast_assign(
+			__entry->dev = dir->i_sb->s_dev;
+			__entry->dir = NFS_FILEID(dir);
+			__entry->error = error;
+			__entry->flags = flags;
+			__assign_str(name, dentry->d_name.name);
+		),
+
+		TP_printk(
+			"error=%d flags=%u (%s) name=%02x:%02x:%llu/%s",
+			__entry->error,
+			__entry->flags,
+			show_lookup_flags(__entry->flags),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->dir,
+			__get_str(name)
+		)
+);
+
+#define DEFINE_NFS_LOOKUP_EVENT_DONE(name) \
+	DEFINE_EVENT(nfs_lookup_event_done, name, \
+			TP_PROTO( \
+				const struct inode *dir, \
+				const struct dentry *dentry, \
+				unsigned int flags, \
+				int error \
+			), \
+			TP_ARGS(dir, dentry, flags, error))
+
+DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_enter);
+DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_exit);
+DEFINE_NFS_LOOKUP_EVENT(nfs_lookup_revalidate_enter);
+DEFINE_NFS_LOOKUP_EVENT_DONE(nfs_lookup_revalidate_exit);
+
+#define show_open_flags(flags) \
+	__print_flags((unsigned long)flags, "|", \
+		{ O_CREAT, "O_CREAT" }, \
+		{ O_EXCL, "O_EXCL" }, \
+		{ O_TRUNC, "O_TRUNC" }, \
+		{ O_APPEND, "O_APPEND" }, \
+		{ O_DSYNC, "O_DSYNC" }, \
+		{ O_DIRECT, "O_DIRECT" }, \
+		{ O_DIRECTORY, "O_DIRECTORY" })
+
+#define show_fmode_flags(mode) \
+	__print_flags(mode, "|", \
+		{ ((__force unsigned long)FMODE_READ), "READ" }, \
+		{ ((__force unsigned long)FMODE_WRITE), "WRITE" }, \
+		{ ((__force unsigned long)FMODE_EXEC), "EXEC" })
+
+TRACE_EVENT(nfs_atomic_open_enter,
+		TP_PROTO(
+			const struct inode *dir,
+			const struct nfs_open_context *ctx,
+			unsigned int flags
+		),
+
+		TP_ARGS(dir, ctx, flags),
+
+		TP_STRUCT__entry(
+			__field(unsigned int, flags)
+			__field(unsigned int, fmode)
+			__field(dev_t, dev)
+			__field(u64, dir)
+			__string(name, ctx->dentry->d_name.name)
+		),
+
+		TP_fast_assign(
+			__entry->dev = dir->i_sb->s_dev;
+			__entry->dir = NFS_FILEID(dir);
+			__entry->flags = flags;
+			__entry->fmode = (__force unsigned int)ctx->mode;
+			__assign_str(name, ctx->dentry->d_name.name);
+		),
+
+		TP_printk(
+			"flags=%u (%s) fmode=%s name=%02x:%02x:%llu/%s",
+			__entry->flags,
+			show_open_flags(__entry->flags),
+			show_fmode_flags(__entry->fmode),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->dir,
+			__get_str(name)
+		)
+);
+
+TRACE_EVENT(nfs_atomic_open_exit,
+		TP_PROTO(
+			const struct inode *dir,
+			const struct nfs_open_context *ctx,
+			unsigned int flags,
+			int error
+		),
+
+		TP_ARGS(dir, ctx, flags, error),
+
+		TP_STRUCT__entry(
+			__field(int, error)
+			__field(unsigned int, flags)
+			__field(unsigned int, fmode)
+			__field(dev_t, dev)
+			__field(u64, dir)
+			__string(name, ctx->dentry->d_name.name)
+		),
+
+		TP_fast_assign(
+			__entry->error = error;
+			__entry->dev = dir->i_sb->s_dev;
+			__entry->dir = NFS_FILEID(dir);
+			__entry->flags = flags;
+			__entry->fmode = (__force unsigned int)ctx->mode;
+			__assign_str(name, ctx->dentry->d_name.name);
+		),
+
+		TP_printk(
+			"error=%d flags=%u (%s) fmode=%s "
+			"name=%02x:%02x:%llu/%s",
+			__entry->error,
+			__entry->flags,
+			show_open_flags(__entry->flags),
+			show_fmode_flags(__entry->fmode),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->dir,
+			__get_str(name)
+		)
+);
+
+TRACE_EVENT(nfs_create_enter,
+		TP_PROTO(
+			const struct inode *dir,
+			const struct dentry *dentry,
+			unsigned int flags
+		),
+
+		TP_ARGS(dir, dentry, flags),
+
+		TP_STRUCT__entry(
+			__field(unsigned int, flags)
+			__field(dev_t, dev)
+			__field(u64, dir)
+			__string(name, dentry->d_name.name)
+		),
+
+		TP_fast_assign(
+			__entry->dev = dir->i_sb->s_dev;
+			__entry->dir = NFS_FILEID(dir);
+			__entry->flags = flags;
+			__assign_str(name, dentry->d_name.name);
+		),
+
+		TP_printk(
+			"flags=%u (%s) name=%02x:%02x:%llu/%s",
+			__entry->flags,
+			show_open_flags(__entry->flags),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->dir,
+			__get_str(name)
+		)
+);
+
+TRACE_EVENT(nfs_create_exit,
+		TP_PROTO(
+			const struct inode *dir,
+			const struct dentry *dentry,
+			unsigned int flags,
+			int error
+		),
+
+		TP_ARGS(dir, dentry, flags, error),
+
+		TP_STRUCT__entry(
+			__field(int, error)
+			__field(unsigned int, flags)
+			__field(dev_t, dev)
+			__field(u64, dir)
+			__string(name, dentry->d_name.name)
+		),
+
+		TP_fast_assign(
+			__entry->error = error;
+			__entry->dev = dir->i_sb->s_dev;
+			__entry->dir = NFS_FILEID(dir);
+			__entry->flags = flags;
+			__assign_str(name, dentry->d_name.name);
+		),
+
+		TP_printk(
+			"error=%d flags=%u (%s) name=%02x:%02x:%llu/%s",
+			__entry->error,
+			__entry->flags,
+			show_open_flags(__entry->flags),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->dir,
+			__get_str(name)
+		)
+);
+
+DECLARE_EVENT_CLASS(nfs_directory_event,
+		TP_PROTO(
+			const struct inode *dir,
+			const struct dentry *dentry
+		),
+
+		TP_ARGS(dir, dentry),
+
+		TP_STRUCT__entry(
+			__field(dev_t, dev)
+			__field(u64, dir)
+			__string(name, dentry->d_name.name)
+		),
+
+		TP_fast_assign(
+			__entry->dev = dir->i_sb->s_dev;
+			__entry->dir = NFS_FILEID(dir);
+			__assign_str(name, dentry->d_name.name);
+		),
+
+		TP_printk(
+			"name=%02x:%02x:%llu/%s",
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->dir,
+			__get_str(name)
+		)
+);
+
+#define DEFINE_NFS_DIRECTORY_EVENT(name) \
+	DEFINE_EVENT(nfs_directory_event, name, \
+			TP_PROTO( \
+				const struct inode *dir, \
+				const struct dentry *dentry \
+			), \
+			TP_ARGS(dir, dentry))
+
+DECLARE_EVENT_CLASS(nfs_directory_event_done,
+		TP_PROTO(
+			const struct inode *dir,
+			const struct dentry *dentry,
+			int error
+		),
+
+		TP_ARGS(dir, dentry, error),
+
+		TP_STRUCT__entry(
+			__field(int, error)
+			__field(dev_t, dev)
+			__field(u64, dir)
+			__string(name, dentry->d_name.name)
+		),
+
+		TP_fast_assign(
+			__entry->dev = dir->i_sb->s_dev;
+			__entry->dir = NFS_FILEID(dir);
+			__entry->error = error;
+			__assign_str(name, dentry->d_name.name);
+		),
+
+		TP_printk(
+			"error=%d name=%02x:%02x:%llu/%s",
+			__entry->error,
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->dir,
+			__get_str(name)
+		)
+);
+
+#define DEFINE_NFS_DIRECTORY_EVENT_DONE(name) \
+	DEFINE_EVENT(nfs_directory_event_done, name, \
+			TP_PROTO( \
+				const struct inode *dir, \
+				const struct dentry *dentry, \
+				int error \
+			), \
+			TP_ARGS(dir, dentry, error))
+
+DEFINE_NFS_DIRECTORY_EVENT(nfs_mknod_enter);
+DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_mknod_exit);
+DEFINE_NFS_DIRECTORY_EVENT(nfs_mkdir_enter);
+DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_mkdir_exit);
+DEFINE_NFS_DIRECTORY_EVENT(nfs_rmdir_enter);
+DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_rmdir_exit);
+DEFINE_NFS_DIRECTORY_EVENT(nfs_remove_enter);
+DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_remove_exit);
+DEFINE_NFS_DIRECTORY_EVENT(nfs_unlink_enter);
+DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_unlink_exit);
+DEFINE_NFS_DIRECTORY_EVENT(nfs_symlink_enter);
+DEFINE_NFS_DIRECTORY_EVENT_DONE(nfs_symlink_exit);
+
+TRACE_EVENT(nfs_link_enter,
+		TP_PROTO(
+			const struct inode *inode,
+			const struct inode *dir,
+			const struct dentry *dentry
+		),
+
+		TP_ARGS(inode, dir, dentry),
+
+		TP_STRUCT__entry(
+			__field(dev_t, dev)
+			__field(u64, fileid)
+			__field(u64, dir)
+			__string(name, dentry->d_name.name)
+		),
+
+		TP_fast_assign(
+			__entry->dev = inode->i_sb->s_dev;
+			__entry->fileid = NFS_FILEID(inode);
+			__entry->dir = NFS_FILEID(dir);
+			__assign_str(name, dentry->d_name.name);
+		),
+
+		TP_printk(
+			"fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s",
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			__entry->fileid,
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->dir,
+			__get_str(name)
+		)
+);
+
+TRACE_EVENT(nfs_link_exit,
+		TP_PROTO(
+			const struct inode *inode,
+			const struct inode *dir,
+			const struct dentry *dentry,
+			int error
+		),
+
+		TP_ARGS(inode, dir, dentry, error),
+
+		TP_STRUCT__entry(
+			__field(int, error)
+			__field(dev_t, dev)
+			__field(u64, fileid)
+			__field(u64, dir)
+			__string(name, dentry->d_name.name)
+		),
+
+		TP_fast_assign(
+			__entry->dev = inode->i_sb->s_dev;
+			__entry->fileid = NFS_FILEID(inode);
+			__entry->dir = NFS_FILEID(dir);
+			__entry->error = error;
+			__assign_str(name, dentry->d_name.name);
+		),
+
+		TP_printk(
+			"error=%d fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s",
+			__entry->error,
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			__entry->fileid,
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->dir,
+			__get_str(name)
+		)
+);
+
+DECLARE_EVENT_CLASS(nfs_rename_event,
+		TP_PROTO(
+			const struct inode *old_dir,
+			const struct dentry *old_dentry,
+			const struct inode *new_dir,
+			const struct dentry *new_dentry
+		),
+
+		TP_ARGS(old_dir, old_dentry, new_dir, new_dentry),
+
+		TP_STRUCT__entry(
+			__field(dev_t, dev)
+			__field(u64, old_dir)
+			__field(u64, new_dir)
+			__string(old_name, old_dentry->d_name.name)
+			__string(new_name, new_dentry->d_name.name)
+		),
+
+		TP_fast_assign(
+			__entry->dev = old_dir->i_sb->s_dev;
+			__entry->old_dir = NFS_FILEID(old_dir);
+			__entry->new_dir = NFS_FILEID(new_dir);
+			__assign_str(old_name, old_dentry->d_name.name);
+			__assign_str(new_name, new_dentry->d_name.name);
+		),
+
+		TP_printk(
+			"old_name=%02x:%02x:%llu/%s new_name=%02x:%02x:%llu/%s",
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->old_dir,
+			__get_str(old_name),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->new_dir,
+			__get_str(new_name)
+		)
+);
+#define DEFINE_NFS_RENAME_EVENT(name) \
+	DEFINE_EVENT(nfs_rename_event, name, \
+			TP_PROTO( \
+				const struct inode *old_dir, \
+				const struct dentry *old_dentry, \
+				const struct inode *new_dir, \
+				const struct dentry *new_dentry \
+			), \
+			TP_ARGS(old_dir, old_dentry, new_dir, new_dentry))
+
+DECLARE_EVENT_CLASS(nfs_rename_event_done,
+		TP_PROTO(
+			const struct inode *old_dir,
+			const struct dentry *old_dentry,
+			const struct inode *new_dir,
+			const struct dentry *new_dentry,
+			int error
+		),
+
+		TP_ARGS(old_dir, old_dentry, new_dir, new_dentry, error),
+
+		TP_STRUCT__entry(
+			__field(dev_t, dev)
+			__field(int, error)
+			__field(u64, old_dir)
+			__string(old_name, old_dentry->d_name.name)
+			__field(u64, new_dir)
+			__string(new_name, new_dentry->d_name.name)
+		),
+
+		TP_fast_assign(
+			__entry->dev = old_dir->i_sb->s_dev;
+			__entry->old_dir = NFS_FILEID(old_dir);
+			__entry->new_dir = NFS_FILEID(new_dir);
+			__entry->error = error;
+			__assign_str(old_name, old_dentry->d_name.name);
+			__assign_str(new_name, new_dentry->d_name.name);
+		),
+
+		TP_printk(
+			"error=%d old_name=%02x:%02x:%llu/%s "
+			"new_name=%02x:%02x:%llu/%s",
+			__entry->error,
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->old_dir,
+			__get_str(old_name),
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->new_dir,
+			__get_str(new_name)
+		)
+);
+#define DEFINE_NFS_RENAME_EVENT_DONE(name) \
+	DEFINE_EVENT(nfs_rename_event_done, name, \
+			TP_PROTO( \
+				const struct inode *old_dir, \
+				const struct dentry *old_dentry, \
+				const struct inode *new_dir, \
+				const struct dentry *new_dentry, \
+				int error \
+			), \
+			TP_ARGS(old_dir, old_dentry, new_dir, \
+				new_dentry, error))
+
+DEFINE_NFS_RENAME_EVENT(nfs_rename_enter);
+DEFINE_NFS_RENAME_EVENT_DONE(nfs_rename_exit);
+
+DEFINE_NFS_RENAME_EVENT_DONE(nfs_sillyrename_rename);
+
+TRACE_EVENT(nfs_sillyrename_unlink,
+		TP_PROTO(
+			const struct nfs_unlinkdata *data,
+			int error
+		),
+
+		TP_ARGS(data, error),
+
+		TP_STRUCT__entry(
+			__field(dev_t, dev)
+			__field(int, error)
+			__field(u64, dir)
+			__dynamic_array(char, name, data->args.name.len + 1)
+		),
+
+		TP_fast_assign(
+			struct inode *dir = data->dir;
+			size_t len = data->args.name.len;
+			__entry->dev = dir->i_sb->s_dev;
+			__entry->dir = NFS_FILEID(dir);
+			__entry->error = error;
+			memcpy(__get_dynamic_array(name),
+				data->args.name.name, len);
+			((char *)__get_dynamic_array(name))[len] = 0;
+		),
+
+		TP_printk(
+			"error=%d name=%02x:%02x:%llu/%s",
+			__entry->error,
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			(unsigned long long)__entry->dir,
+			__get_str(name)
+		)
+);
+#endif /* _TRACE_NFS_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE nfstrace
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 29cfb7a..2ffebf2 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -328,6 +328,19 @@
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_init);
 
+static bool nfs_match_open_context(const struct nfs_open_context *ctx1,
+		const struct nfs_open_context *ctx2)
+{
+	return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state;
+}
+
+static bool nfs_match_lock_context(const struct nfs_lock_context *l1,
+		const struct nfs_lock_context *l2)
+{
+	return l1->lockowner.l_owner == l2->lockowner.l_owner
+		&& l1->lockowner.l_pid == l2->lockowner.l_pid;
+}
+
 /**
  * nfs_can_coalesce_requests - test two requests for compatibility
  * @prev: pointer to nfs_page
@@ -343,13 +356,10 @@
 				      struct nfs_page *req,
 				      struct nfs_pageio_descriptor *pgio)
 {
-	if (req->wb_context->cred != prev->wb_context->cred)
+	if (!nfs_match_open_context(req->wb_context, prev->wb_context))
 		return false;
-	if (req->wb_lock_context->lockowner.l_owner != prev->wb_lock_context->lockowner.l_owner)
-		return false;
-	if (req->wb_lock_context->lockowner.l_pid != prev->wb_lock_context->lockowner.l_pid)
-		return false;
-	if (req->wb_context->state != prev->wb_context->state)
+	if (req->wb_context->dentry->d_inode->i_flock != NULL &&
+	    !nfs_match_lock_context(req->wb_lock_context, prev->wb_lock_context))
 		return false;
 	if (req->wb_pgbase != 0)
 		return false;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 3a3a79d..d75d938 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -33,6 +33,7 @@
 #include "internal.h"
 #include "pnfs.h"
 #include "iostat.h"
+#include "nfs4trace.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PNFS
 #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ)
@@ -1526,6 +1527,7 @@
 {
 	struct nfs_pgio_header *hdr = data->header;
 
+	trace_nfs4_pnfs_write(data, hdr->pnfs_error);
 	if (!hdr->pnfs_error) {
 		pnfs_set_layoutcommit(data);
 		hdr->mds_ops->rpc_call_done(&data->task, data);
@@ -1680,6 +1682,7 @@
 {
 	struct nfs_pgio_header *hdr = data->header;
 
+	trace_nfs4_pnfs_read(data, hdr->pnfs_error);
 	if (likely(!hdr->pnfs_error)) {
 		__nfs4_read_done_cb(data);
 		hdr->mds_ops->rpc_call_done(&data->task, data);
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index c041c41..a8f57c7 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -623,9 +623,10 @@
 	msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
 }
 
-static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
 {
 	rpc_call_start(task);
+	return 0;
 }
 
 static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
@@ -644,9 +645,10 @@
 	msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
 }
 
-static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
 {
 	rpc_call_start(task);
+	return 0;
 }
 
 static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 70a26c6..31db5c3 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -513,9 +513,10 @@
 void nfs_read_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs_read_data *data = calldata;
-	NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
-	if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
-		rpc_exit(task, -EIO);
+	int err;
+	err = NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
+	if (err)
+		rpc_exit(task, err);
 }
 
 static const struct rpc_call_ops nfs_read_common_ops = {
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f6db66d..5793f24 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -923,7 +923,7 @@
 		data->nfs_server.port	= NFS_UNSPEC_PORT;
 		data->nfs_server.protocol = XPRT_TRANSPORT_TCP;
 		data->auth_flavors[0]	= RPC_AUTH_MAXFLAVOR;
-		data->auth_flavor_len	= 1;
+		data->auth_flavor_len	= 0;
 		data->minorversion	= 0;
 		data->need_mount	= true;
 		data->net		= current->nsproxy->net_ns;
@@ -1018,6 +1018,13 @@
 	}
 }
 
+static void nfs_set_auth_parsed_mount_data(struct nfs_parsed_mount_data *data,
+		rpc_authflavor_t pseudoflavor)
+{
+	data->auth_flavors[0] = pseudoflavor;
+	data->auth_flavor_len = 1;
+}
+
 /*
  * Parse the value of the 'sec=' option.
  */
@@ -1025,49 +1032,50 @@
 				      struct nfs_parsed_mount_data *mnt)
 {
 	substring_t args[MAX_OPT_ARGS];
+	rpc_authflavor_t pseudoflavor;
 
 	dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value);
 
 	switch (match_token(value, nfs_secflavor_tokens, args)) {
 	case Opt_sec_none:
-		mnt->auth_flavors[0] = RPC_AUTH_NULL;
+		pseudoflavor = RPC_AUTH_NULL;
 		break;
 	case Opt_sec_sys:
-		mnt->auth_flavors[0] = RPC_AUTH_UNIX;
+		pseudoflavor = RPC_AUTH_UNIX;
 		break;
 	case Opt_sec_krb5:
-		mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5;
+		pseudoflavor = RPC_AUTH_GSS_KRB5;
 		break;
 	case Opt_sec_krb5i:
-		mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I;
+		pseudoflavor = RPC_AUTH_GSS_KRB5I;
 		break;
 	case Opt_sec_krb5p:
-		mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P;
+		pseudoflavor = RPC_AUTH_GSS_KRB5P;
 		break;
 	case Opt_sec_lkey:
-		mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY;
+		pseudoflavor = RPC_AUTH_GSS_LKEY;
 		break;
 	case Opt_sec_lkeyi:
-		mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI;
+		pseudoflavor = RPC_AUTH_GSS_LKEYI;
 		break;
 	case Opt_sec_lkeyp:
-		mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP;
+		pseudoflavor = RPC_AUTH_GSS_LKEYP;
 		break;
 	case Opt_sec_spkm:
-		mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM;
+		pseudoflavor = RPC_AUTH_GSS_SPKM;
 		break;
 	case Opt_sec_spkmi:
-		mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI;
+		pseudoflavor = RPC_AUTH_GSS_SPKMI;
 		break;
 	case Opt_sec_spkmp:
-		mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP;
+		pseudoflavor = RPC_AUTH_GSS_SPKMP;
 		break;
 	default:
 		return 0;
 	}
 
 	mnt->flags |= NFS_MOUNT_SECFLAVOUR;
-	mnt->auth_flavor_len = 1;
+	nfs_set_auth_parsed_mount_data(mnt, pseudoflavor);
 	return 1;
 }
 
@@ -1729,7 +1737,7 @@
 	 * Was a sec= authflavor specified in the options? First, verify
 	 * whether the server supports it, and then just try to use it if so.
 	 */
-	if (args->auth_flavors[0] != RPC_AUTH_MAXFLAVOR) {
+	if (args->auth_flavor_len > 0) {
 		status = nfs_verify_authflavor(args, authlist, authlist_len);
 		dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->auth_flavors[0]);
 		if (status)
@@ -1760,7 +1768,7 @@
 			/* Fallthrough */
 		}
 		dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor);
-		args->auth_flavors[0] = flavor;
+		nfs_set_auth_parsed_mount_data(args, flavor);
 		server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
 		if (!IS_ERR(server))
 			return server;
@@ -1776,7 +1784,7 @@
 
 	/* Last chance! Try AUTH_UNIX */
 	dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", RPC_AUTH_UNIX);
-	args->auth_flavors[0] = RPC_AUTH_UNIX;
+	nfs_set_auth_parsed_mount_data(args, RPC_AUTH_UNIX);
 	return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
 }
 
@@ -1893,6 +1901,7 @@
 {
 	struct nfs_mount_data *data = (struct nfs_mount_data *)options;
 	struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
+	int extra_flags = NFS_MOUNT_LEGACY_INTERFACE;
 
 	if (data == NULL)
 		goto out_no_data;
@@ -1908,6 +1917,8 @@
 			goto out_no_v3;
 		data->root.size = NFS2_FHSIZE;
 		memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
+		/* Turn off security negotiation */
+		extra_flags |= NFS_MOUNT_SECFLAVOUR;
 	case 4:
 		if (data->flags & NFS_MOUNT_SECFLAVOUR)
 			goto out_no_sec;
@@ -1935,7 +1946,7 @@
 		 * can deal with.
 		 */
 		args->flags		= data->flags & NFS_MOUNT_FLAGMASK;
-		args->flags		|= NFS_MOUNT_LEGACY_INTERFACE;
+		args->flags		|= extra_flags;
 		args->rsize		= data->rsize;
 		args->wsize		= data->wsize;
 		args->timeo		= data->timeo;
@@ -1959,9 +1970,10 @@
 		args->namlen		= data->namlen;
 		args->bsize		= data->bsize;
 
-		args->auth_flavors[0] = RPC_AUTH_UNIX;
 		if (data->flags & NFS_MOUNT_SECFLAVOUR)
-			args->auth_flavors[0] = data->pseudoflavor;
+			nfs_set_auth_parsed_mount_data(args, data->pseudoflavor);
+		else
+			nfs_set_auth_parsed_mount_data(args, RPC_AUTH_UNIX);
 		if (!args->nfs_server.hostname)
 			goto out_nomem;
 
@@ -2084,6 +2096,8 @@
 		max_namelen = NFS4_MAXNAMLEN;
 		max_pathlen = NFS4_MAXPATHLEN;
 		nfs_validate_transport_protocol(args);
+		if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP)
+			goto out_invalid_transport_udp;
 		nfs4_validate_mount_flags(args);
 #else
 		goto out_v4_not_compiled;
@@ -2106,6 +2120,10 @@
 out_v4_not_compiled:
 	dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n");
 	return -EPROTONOSUPPORT;
+#else
+out_invalid_transport_udp:
+	dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n");
+	return -EINVAL;
 #endif /* !CONFIG_NFS_V4 */
 
 out_no_address:
@@ -2170,7 +2188,7 @@
 	data->rsize = nfss->rsize;
 	data->wsize = nfss->wsize;
 	data->retrans = nfss->client->cl_timeout->to_retries;
-	data->auth_flavors[0] = nfss->client->cl_auth->au_flavor;
+	nfs_set_auth_parsed_mount_data(data, nfss->client->cl_auth->au_flavor);
 	data->acregmin = nfss->acregmin / HZ;
 	data->acregmax = nfss->acregmax / HZ;
 	data->acdirmin = nfss->acdirmin / HZ;
@@ -2277,6 +2295,18 @@
  	nfs_initialise_sb(sb);
 }
 
+#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \
+		| NFS_MOUNT_SECURE \
+		| NFS_MOUNT_TCP \
+		| NFS_MOUNT_VER3 \
+		| NFS_MOUNT_KERBEROS \
+		| NFS_MOUNT_NONLM \
+		| NFS_MOUNT_BROKEN_SUID \
+		| NFS_MOUNT_STRICTLOCK \
+		| NFS_MOUNT_UNSHARED \
+		| NFS_MOUNT_NORESVPORT \
+		| NFS_MOUNT_LEGACY_INTERFACE)
+
 static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
 {
 	const struct nfs_server *a = s->s_fs_info;
@@ -2287,7 +2317,7 @@
 		goto Ebusy;
 	if (a->nfs_client != b->nfs_client)
 		goto Ebusy;
-	if (a->flags != b->flags)
+	if ((a->flags ^ b->flags) & NFS_MOUNT_CMP_FLAGMASK)
 		goto Ebusy;
 	if (a->wsize != b->wsize)
 		goto Ebusy;
@@ -2301,7 +2331,8 @@
 		goto Ebusy;
 	if (a->acdirmax != b->acdirmax)
 		goto Ebusy;
-	if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
+	if (b->flags & NFS_MOUNT_SECFLAVOUR &&
+	   clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
 		goto Ebusy;
 	return 1;
 Ebusy:
@@ -2673,15 +2704,17 @@
 			goto out_no_address;
 		args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port);
 
-		args->auth_flavors[0] = RPC_AUTH_UNIX;
 		if (data->auth_flavourlen) {
+			rpc_authflavor_t pseudoflavor;
 			if (data->auth_flavourlen > 1)
 				goto out_inval_auth;
-			if (copy_from_user(&args->auth_flavors[0],
+			if (copy_from_user(&pseudoflavor,
 					   data->auth_flavours,
-					   sizeof(args->auth_flavors[0])))
+					   sizeof(pseudoflavor)))
 				return -EFAULT;
-		}
+			nfs_set_auth_parsed_mount_data(args, pseudoflavor);
+		} else
+			nfs_set_auth_parsed_mount_data(args, RPC_AUTH_UNIX);
 
 		c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN);
 		if (IS_ERR(c))
@@ -2715,6 +2748,8 @@
 		args->acdirmax	= data->acdirmax;
 		args->nfs_server.protocol = data->proto;
 		nfs_validate_transport_protocol(args);
+		if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP)
+			goto out_invalid_transport_udp;
 
 		break;
 	default:
@@ -2735,6 +2770,10 @@
 out_no_address:
 	dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n");
 	return -EINVAL;
+
+out_invalid_transport_udp:
+	dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n");
+	return -EINVAL;
 }
 
 /*
@@ -2750,6 +2789,7 @@
 unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE;
 unsigned short send_implementation_id = 1;
 char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN] = "";
+bool recover_lost_locks = false;
 
 EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport);
 EXPORT_SYMBOL_GPL(nfs_callback_tcpport);
@@ -2758,6 +2798,7 @@
 EXPORT_SYMBOL_GPL(max_session_slots);
 EXPORT_SYMBOL_GPL(send_implementation_id);
 EXPORT_SYMBOL_GPL(nfs4_client_id_uniquifier);
+EXPORT_SYMBOL_GPL(recover_lost_locks);
 
 #define NFS_CALLBACK_MAXPORTNR (65535U)
 
@@ -2795,4 +2836,10 @@
 		"Send implementation ID with NFSv4.1 exchange_id");
 MODULE_PARM_DESC(nfs4_unique_id, "nfs_client_id4 uniquifier string");
 
+module_param(recover_lost_locks, bool, 0644);
+MODULE_PARM_DESC(recover_lost_locks,
+		 "If the server reports that a lock might be lost, "
+		 "try to recover it risking data corruption.");
+
+
 #endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 60395ad..bb939ed 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -20,6 +20,8 @@
 #include "iostat.h"
 #include "delegation.h"
 
+#include "nfstrace.h"
+
 /**
  * nfs_free_unlinkdata - release data from a sillydelete operation.
  * @data: pointer to unlink structure.
@@ -77,6 +79,7 @@
 	struct nfs_unlinkdata *data = calldata;
 	struct inode *dir = data->dir;
 
+	trace_nfs_sillyrename_unlink(data, task->tk_status);
 	if (!NFS_PROTO(dir)->unlink_done(task, dir))
 		rpc_restart_call_prepare(task);
 }
@@ -204,6 +207,13 @@
 	return ret;
 }
 
+void nfs_wait_on_sillyrename(struct dentry *dentry)
+{
+	struct nfs_inode *nfsi = NFS_I(dentry->d_inode);
+
+	wait_event(nfsi->waitqueue, atomic_read(&nfsi->silly_count) <= 1);
+}
+
 void nfs_block_sillyrename(struct dentry *dentry)
 {
 	struct nfs_inode *nfsi = NFS_I(dentry->d_inode);
@@ -336,6 +346,8 @@
 	struct inode *new_dir = data->new_dir;
 	struct dentry *old_dentry = data->old_dentry;
 
+	trace_nfs_sillyrename_rename(old_dir, old_dentry,
+			new_dir, data->new_dentry, task->tk_status);
 	if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) {
 		rpc_restart_call_prepare(task);
 		return;
@@ -444,6 +456,14 @@
 	return rpc_run_task(&task_setup_data);
 }
 
+#define SILLYNAME_PREFIX ".nfs"
+#define SILLYNAME_PREFIX_LEN ((unsigned)sizeof(SILLYNAME_PREFIX) - 1)
+#define SILLYNAME_FILEID_LEN ((unsigned)sizeof(u64) << 1)
+#define SILLYNAME_COUNTER_LEN ((unsigned)sizeof(unsigned int) << 1)
+#define SILLYNAME_LEN (SILLYNAME_PREFIX_LEN + \
+		SILLYNAME_FILEID_LEN + \
+		SILLYNAME_COUNTER_LEN)
+
 /**
  * nfs_sillyrename - Perform a silly-rename of a dentry
  * @dir: inode of directory that contains dentry
@@ -469,10 +489,8 @@
 nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 {
 	static unsigned int sillycounter;
-	const int      fileidsize  = sizeof(NFS_FILEID(dentry->d_inode))*2;
-	const int      countersize = sizeof(sillycounter)*2;
-	const int      slen        = sizeof(".nfs")+fileidsize+countersize-1;
-	char           silly[slen+1];
+	unsigned char silly[SILLYNAME_LEN + 1];
+	unsigned long long fileid;
 	struct dentry *sdentry;
 	struct rpc_task *task;
 	int            error = -EIO;
@@ -489,20 +507,20 @@
 	if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
 		goto out;
 
-	sprintf(silly, ".nfs%*.*Lx",
-		fileidsize, fileidsize,
-		(unsigned long long)NFS_FILEID(dentry->d_inode));
+	fileid = NFS_FILEID(dentry->d_inode);
 
 	/* Return delegation in anticipation of the rename */
 	NFS_PROTO(dentry->d_inode)->return_delegation(dentry->d_inode);
 
 	sdentry = NULL;
 	do {
-		char *suffix = silly + slen - countersize;
-
+		int slen;
 		dput(sdentry);
 		sillycounter++;
-		sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
+		slen = scnprintf(silly, sizeof(silly),
+				SILLYNAME_PREFIX "%0*llx%0*x",
+				SILLYNAME_FILEID_LEN, fileid,
+				SILLYNAME_COUNTER_LEN, sillycounter);
 
 		dfprintk(VFS, "NFS: trying to rename %s to %s\n",
 				dentry->d_name.name, silly);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f1bdb72..ac1dc33 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -31,6 +31,8 @@
 #include "fscache.h"
 #include "pnfs.h"
 
+#include "nfstrace.h"
+
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
 #define MIN_POOL_WRITE		(32)
@@ -861,7 +863,7 @@
 			return 0;
 		l_ctx = req->wb_lock_context;
 		do_flush = req->wb_page != page || req->wb_context != ctx;
-		if (l_ctx) {
+		if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) {
 			do_flush |= l_ctx->lockowner.l_owner != current->files
 				|| l_ctx->lockowner.l_pid != current->tgid;
 		}
@@ -874,6 +876,33 @@
 }
 
 /*
+ * Avoid buffered writes when a open context credential's key would
+ * expire soon.
+ *
+ * Returns -EACCES if the key will expire within RPC_KEY_EXPIRE_FAIL.
+ *
+ * Return 0 and set a credential flag which triggers the inode to flush
+ * and performs  NFS_FILE_SYNC writes if the key will expired within
+ * RPC_KEY_EXPIRE_TIMEO.
+ */
+int
+nfs_key_timeout_notify(struct file *filp, struct inode *inode)
+{
+	struct nfs_open_context *ctx = nfs_file_open_context(filp);
+	struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth;
+
+	return rpcauth_key_timeout_notify(auth, ctx->cred);
+}
+
+/*
+ * Test if the open context credential key is marked to expire soon.
+ */
+bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx)
+{
+	return rpcauth_cred_key_to_expire(ctx->cred);
+}
+
+/*
  * If the page cache is marked as unsafe or invalid, then we can't rely on
  * the PageUptodate() flag. In this case, we will need to turn off
  * write optimisations that depend on the page contents being correct.
@@ -993,6 +1022,9 @@
 		data->args.count,
 		(unsigned long long)data->args.offset);
 
+	nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client,
+				 &task_setup_data.rpc_client, &msg, data);
+
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task)) {
 		ret = PTR_ERR(task);
@@ -1265,9 +1297,10 @@
 void nfs_write_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data *data = calldata;
-	NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
-	if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
-		rpc_exit(task, -EIO);
+	int err;
+	err = NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
+	if (err)
+		rpc_exit(task, err);
 }
 
 void nfs_commit_prepare(struct rpc_task *task, void *calldata)
@@ -1458,6 +1491,9 @@
 
 	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
 
+	nfs4_state_protect(NFS_SERVER(data->inode)->nfs_client,
+		NFS_SP4_MACH_CRED_COMMIT, &task_setup_data.rpc_client, &msg);
+
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
@@ -1732,8 +1768,14 @@
 		.range_start = 0,
 		.range_end = LLONG_MAX,
 	};
+	int ret;
 
-	return sync_inode(inode, &wbc);
+	trace_nfs_writeback_inode_enter(inode);
+
+	ret = sync_inode(inode, &wbc);
+
+	trace_nfs_writeback_inode_exit(inode, ret);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(nfs_wb_all);
 
@@ -1781,6 +1823,8 @@
 	};
 	int ret;
 
+	trace_nfs_writeback_page_enter(inode);
+
 	for (;;) {
 		wait_on_page_writeback(page);
 		if (clear_page_dirty_for_io(page)) {
@@ -1789,14 +1833,15 @@
 				goto out_error;
 			continue;
 		}
+		ret = 0;
 		if (!PagePrivate(page))
 			break;
 		ret = nfs_commit_inode(inode, FLUSH_SYNC);
 		if (ret < 0)
 			goto out_error;
 	}
-	return 0;
 out_error:
+	trace_nfs_writeback_page_exit(inode, ret);
 	return ret;
 }
 
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index c7314f1..dea86e8 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -27,6 +27,7 @@
 	case Q_SYNC:
 	case Q_GETINFO:
 	case Q_XGETQSTAT:
+	case Q_XGETQSTATV:
 	case Q_XQUOTASYNC:
 		break;
 	/* allow to query information for dquots we "own" */
@@ -217,6 +218,31 @@
 	return ret;
 }
 
+static int quota_getxstatev(struct super_block *sb, void __user *addr)
+{
+	struct fs_quota_statv fqs;
+	int ret;
+
+	if (!sb->s_qcop->get_xstatev)
+		return -ENOSYS;
+
+	memset(&fqs, 0, sizeof(fqs));
+	if (copy_from_user(&fqs, addr, 1)) /* Just read qs_version */
+		return -EFAULT;
+
+	/* If this kernel doesn't support user specified version, fail */
+	switch (fqs.qs_version) {
+	case FS_QSTATV_VERSION1:
+		break;
+	default:
+		return -EINVAL;
+	}
+	ret = sb->s_qcop->get_xstatev(sb, &fqs);
+	if (!ret && copy_to_user(addr, &fqs, sizeof(fqs)))
+		return -EFAULT;
+	return ret;
+}
+
 static int quota_setxquota(struct super_block *sb, int type, qid_t id,
 			   void __user *addr)
 {
@@ -293,6 +319,8 @@
 		return quota_setxstate(sb, cmd, addr);
 	case Q_XGETQSTAT:
 		return quota_getxstate(sb, addr);
+	case Q_XGETQSTATV:
+		return quota_getxstatev(sb, addr);
 	case Q_XSETQLIM:
 		return quota_setxquota(sb, type, id, addr);
 	case Q_XGETQUOTA:
@@ -317,6 +345,7 @@
 	case Q_GETINFO:
 	case Q_SYNC:
 	case Q_XGETQSTAT:
+	case Q_XGETQSTATV:
 	case Q_XGETQUOTA:
 	case Q_XQUOTASYNC:
 		return 0;
diff --git a/fs/super.c b/fs/super.c
index 5536a95..f6961ea 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -71,7 +71,7 @@
 	if (!grab_super_passive(sb))
 		return -1;
 
-	if (sb->s_op && sb->s_op->nr_cached_objects)
+	if (sb->s_op->nr_cached_objects)
 		fs_objects = sb->s_op->nr_cached_objects(sb);
 
 	total_objects = sb->s_nr_dentry_unused +
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 4a45080..0719e4d 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -27,9 +27,12 @@
 
 # highlevel code
 xfs-y				+= xfs_aops.o \
+				   xfs_attr_inactive.o \
+				   xfs_attr_list.o \
 				   xfs_bit.o \
+				   xfs_bmap_util.o \
 				   xfs_buf.o \
-				   xfs_dfrag.o \
+				   xfs_dir2_readdir.o \
 				   xfs_discard.o \
 				   xfs_error.o \
 				   xfs_export.o \
@@ -44,11 +47,11 @@
 				   xfs_iops.o \
 				   xfs_itable.o \
 				   xfs_message.o \
+				   xfs_mount.o \
 				   xfs_mru_cache.o \
-				   xfs_rename.o \
 				   xfs_super.o \
-				   xfs_utils.o \
-				   xfs_vnodeops.o \
+				   xfs_symlink.o \
+				   xfs_trans.o \
 				   xfs_xattr.o \
 				   kmem.o \
 				   uuid.o
@@ -73,10 +76,13 @@
 				   xfs_ialloc_btree.o \
 				   xfs_icreate_item.o \
 				   xfs_inode.o \
+				   xfs_inode_fork.o \
+				   xfs_inode_buf.o \
 				   xfs_log_recover.o \
-				   xfs_mount.o \
-				   xfs_symlink.o \
-				   xfs_trans.o
+				   xfs_log_rlimit.o \
+				   xfs_sb.o \
+				   xfs_symlink_remote.o \
+				   xfs_trans_resv.o
 
 # low-level transaction/log code
 xfs-y				+= xfs_log.o \
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 306d883..6951896 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -16,11 +16,13 @@
  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include "xfs.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
 #include "xfs_acl.h"
 #include "xfs_attr.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_inode.h"
-#include "xfs_vnodeops.h"
+#include "xfs_ag.h"
 #include "xfs_sb.h"
 #include "xfs_mount.h"
 #include "xfs_trace.h"
@@ -68,14 +70,15 @@
 
 		switch (acl_e->e_tag) {
 		case ACL_USER:
+			acl_e->e_uid = xfs_uid_to_kuid(be32_to_cpu(ace->ae_id));
+			break;
 		case ACL_GROUP:
-			acl_e->e_id = be32_to_cpu(ace->ae_id);
+			acl_e->e_gid = xfs_gid_to_kgid(be32_to_cpu(ace->ae_id));
 			break;
 		case ACL_USER_OBJ:
 		case ACL_GROUP_OBJ:
 		case ACL_MASK:
 		case ACL_OTHER:
-			acl_e->e_id = ACL_UNDEFINED_ID;
 			break;
 		default:
 			goto fail;
@@ -101,7 +104,18 @@
 		acl_e = &acl->a_entries[i];
 
 		ace->ae_tag = cpu_to_be32(acl_e->e_tag);
-		ace->ae_id = cpu_to_be32(acl_e->e_id);
+		switch (acl_e->e_tag) {
+		case ACL_USER:
+			ace->ae_id = cpu_to_be32(xfs_kuid_to_uid(acl_e->e_uid));
+			break;
+		case ACL_GROUP:
+			ace->ae_id = cpu_to_be32(xfs_kgid_to_gid(acl_e->e_gid));
+			break;
+		default:
+			ace->ae_id = cpu_to_be32(ACL_UNDEFINED_ID);
+			break;
+		}
+
 		ace->ae_perm = cpu_to_be16(acl_e->e_perm);
 	}
 }
@@ -360,7 +374,7 @@
 		return -EINVAL;
 	if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
 		return value ? -EACCES : 0;
-	if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
+	if (!inode_owner_or_capable(inode))
 		return -EPERM;
 
 	if (!value)
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 317aa86..1cb740a 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -227,59 +227,6 @@
 } xfs_agfl_t;
 
 /*
- * Per-ag incore structure, copies of information in agf and agi,
- * to improve the performance of allocation group selection.
- */
-#define XFS_PAGB_NUM_SLOTS	128
-
-typedef struct xfs_perag {
-	struct xfs_mount *pag_mount;	/* owner filesystem */
-	xfs_agnumber_t	pag_agno;	/* AG this structure belongs to */
-	atomic_t	pag_ref;	/* perag reference count */
-	char		pagf_init;	/* this agf's entry is initialized */
-	char		pagi_init;	/* this agi's entry is initialized */
-	char		pagf_metadata;	/* the agf is preferred to be metadata */
-	char		pagi_inodeok;	/* The agi is ok for inodes */
-	__uint8_t	pagf_levels[XFS_BTNUM_AGF];
-					/* # of levels in bno & cnt btree */
-	__uint32_t	pagf_flcount;	/* count of blocks in freelist */
-	xfs_extlen_t	pagf_freeblks;	/* total free blocks */
-	xfs_extlen_t	pagf_longest;	/* longest free space */
-	__uint32_t	pagf_btreeblks;	/* # of blocks held in AGF btrees */
-	xfs_agino_t	pagi_freecount;	/* number of free inodes */
-	xfs_agino_t	pagi_count;	/* number of allocated inodes */
-
-	/*
-	 * Inode allocation search lookup optimisation.
-	 * If the pagino matches, the search for new inodes
-	 * doesn't need to search the near ones again straight away
-	 */
-	xfs_agino_t	pagl_pagino;
-	xfs_agino_t	pagl_leftrec;
-	xfs_agino_t	pagl_rightrec;
-#ifdef __KERNEL__
-	spinlock_t	pagb_lock;	/* lock for pagb_tree */
-	struct rb_root	pagb_tree;	/* ordered tree of busy extents */
-
-	atomic_t        pagf_fstrms;    /* # of filestreams active in this AG */
-
-	spinlock_t	pag_ici_lock;	/* incore inode cache lock */
-	struct radix_tree_root pag_ici_root;	/* incore inode cache root */
-	int		pag_ici_reclaimable;	/* reclaimable inodes */
-	struct mutex	pag_ici_reclaim_lock;	/* serialisation point */
-	unsigned long	pag_ici_reclaim_cursor;	/* reclaim restart point */
-
-	/* buffer cache index */
-	spinlock_t	pag_buf_lock;	/* lock for pag_buf_tree */
-	struct rb_root	pag_buf_tree;	/* ordered tree of active buffers */
-
-	/* for rcu-safe freeing */
-	struct rcu_head	rcu_head;
-#endif
-	int		pagb_count;	/* pagb slots in use */
-} xfs_perag_t;
-
-/*
  * tags for inode radix tree
  */
 #define XFS_ICI_NO_TAG		(-1)	/* special flag for an untagged lookup
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 71596e5..5a1393f 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -878,7 +878,7 @@
 	xfs_agblock_t	ltnew;		/* useful start bno of left side */
 	xfs_extlen_t	rlen;		/* length of returned extent */
 	int		forced = 0;
-#if defined(DEBUG) && defined(__KERNEL__)
+#ifdef DEBUG
 	/*
 	 * Randomly don't execute the first algorithm.
 	 */
@@ -938,8 +938,8 @@
 		xfs_extlen_t	blen=0;
 		xfs_agblock_t	bnew=0;
 
-#if defined(DEBUG) && defined(__KERNEL__)
-		if (!dofirst)
+#ifdef DEBUG
+		if (dofirst)
 			break;
 #endif
 		/*
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index e11d654..977da0e 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -28,9 +28,9 @@
 #include "xfs_alloc.h"
 #include "xfs_error.h"
 #include "xfs_iomap.h"
-#include "xfs_vnodeops.h"
 #include "xfs_trace.h"
 #include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
 #include <linux/aio.h>
 #include <linux/gfp.h>
 #include <linux/mpage.h>
@@ -108,7 +108,7 @@
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
 
-	error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
 	if (error) {
 		xfs_trans_cancel(tp, 0);
 		return error;
@@ -440,7 +440,7 @@
 		end_page_writeback(page);
 }
 
-static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
+static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
 {
 	return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
 }
@@ -514,7 +514,7 @@
 				goto retry;
 			}
 
-			if (bio_add_buffer(bio, bh) != bh->b_size) {
+			if (xfs_bio_add_buffer(bio, bh) != bh->b_size) {
 				xfs_submit_ioend_bio(wbc, ioend, bio);
 				goto retry;
 			}
@@ -1498,13 +1498,26 @@
 	loff_t			pos,
 	unsigned		len)
 {
-	loff_t			block_offset = pos & PAGE_MASK;
+	loff_t			block_offset;
 	loff_t			block_start;
 	loff_t			block_end;
 	loff_t			from = pos & (PAGE_CACHE_SIZE - 1);
 	loff_t			to = from + len;
 	struct buffer_head	*bh, *head;
 
+	/*
+	 * The request pos offset might be 32 or 64 bit, this is all fine
+	 * on 64-bit platform.  However, for 64-bit pos request on 32-bit
+	 * platform, the high 32-bit will be masked off if we evaluate the
+	 * block_offset via (pos & PAGE_MASK) because the PAGE_MASK is
+	 * 0xfffff000 as an unsigned long, hence the result is incorrect
+	 * which could cause the following ASSERT failed in most cases.
+	 * In order to avoid this, we can evaluate the block_offset of the
+	 * start of the page by using shifts rather than masks the mismatch
+	 * problem.
+	 */
+	block_offset = (pos >> PAGE_CACHE_SHIFT) << PAGE_CACHE_SHIFT;
+
 	ASSERT(block_offset + from == pos);
 
 	head = page_buffers(page);
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 20fe3fe..ddcf226 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -17,10 +17,11 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
-#include "xfs_types.h"
+#include "xfs_format.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
 #include "xfs_trans.h"
+#include "xfs_trans_priv.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_mount.h"
@@ -32,13 +33,13 @@
 #include "xfs_alloc.h"
 #include "xfs_inode_item.h"
 #include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
 #include "xfs_attr.h"
 #include "xfs_attr_leaf.h"
 #include "xfs_attr_remote.h"
 #include "xfs_error.h"
 #include "xfs_quota.h"
 #include "xfs_trans_space.h"
-#include "xfs_vnodeops.h"
 #include "xfs_trace.h"
 
 /*
@@ -62,7 +63,6 @@
 STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
 STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args);
 STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
-STATIC int xfs_attr_leaf_list(xfs_attr_list_context_t *context);
 
 /*
  * Internal routines when attribute list is more than one block.
@@ -70,7 +70,6 @@
 STATIC int xfs_attr_node_get(xfs_da_args_t *args);
 STATIC int xfs_attr_node_addname(xfs_da_args_t *args);
 STATIC int xfs_attr_node_removename(xfs_da_args_t *args);
-STATIC int xfs_attr_node_list(xfs_attr_list_context_t *context);
 STATIC int xfs_attr_fillstate(xfs_da_state_t *state);
 STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
 
@@ -90,7 +89,7 @@
 	return 0;
 }
 
-STATIC int
+int
 xfs_inode_hasattr(
 	struct xfs_inode	*ip)
 {
@@ -227,13 +226,14 @@
 	int		valuelen,
 	int		flags)
 {
-	xfs_da_args_t	args;
-	xfs_fsblock_t	firstblock;
-	xfs_bmap_free_t flist;
-	int		error, err2, committed;
-	xfs_mount_t	*mp = dp->i_mount;
-	int             rsvd = (flags & ATTR_ROOT) != 0;
-	int		local;
+	xfs_da_args_t		args;
+	xfs_fsblock_t		firstblock;
+	xfs_bmap_free_t		flist;
+	int			error, err2, committed;
+	struct xfs_mount	*mp = dp->i_mount;
+	struct xfs_trans_res	tres;
+	int			rsvd = (flags & ATTR_ROOT) != 0;
+	int			local;
 
 	/*
 	 * Attach the dquots to the inode.
@@ -293,11 +293,11 @@
 	if (rsvd)
 		args.trans->t_flags |= XFS_TRANS_RESERVE;
 
-	error = xfs_trans_reserve(args.trans, args.total,
-				  XFS_ATTRSETM_LOG_RES(mp) +
-				  XFS_ATTRSETRT_LOG_RES(mp) * args.total,
-				  0, XFS_TRANS_PERM_LOG_RES,
-				  XFS_ATTRSET_LOG_COUNT);
+	tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
+			 M_RES(mp)->tr_attrsetrt.tr_logres * args.total;
+	tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
+	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
+	error = xfs_trans_reserve(args.trans, &tres, args.total, 0);
 	if (error) {
 		xfs_trans_cancel(args.trans, 0);
 		return(error);
@@ -517,11 +517,9 @@
 	if (flags & ATTR_ROOT)
 		args.trans->t_flags |= XFS_TRANS_RESERVE;
 
-	if ((error = xfs_trans_reserve(args.trans,
-				      XFS_ATTRRM_SPACE_RES(mp),
-				      XFS_ATTRRM_LOG_RES(mp),
-				      0, XFS_TRANS_PERM_LOG_RES,
-				      XFS_ATTRRM_LOG_COUNT))) {
+	error = xfs_trans_reserve(args.trans, &M_RES(mp)->tr_attrrm,
+				  XFS_ATTRRM_SPACE_RES(mp), 0);
+	if (error) {
 		xfs_trans_cancel(args.trans, 0);
 		return(error);
 	}
@@ -611,228 +609,6 @@
 	return xfs_attr_remove_int(dp, &xname, flags);
 }
 
-int
-xfs_attr_list_int(xfs_attr_list_context_t *context)
-{
-	int error;
-	xfs_inode_t *dp = context->dp;
-
-	XFS_STATS_INC(xs_attr_list);
-
-	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
-		return EIO;
-
-	xfs_ilock(dp, XFS_ILOCK_SHARED);
-
-	/*
-	 * Decide on what work routines to call based on the inode size.
-	 */
-	if (!xfs_inode_hasattr(dp)) {
-		error = 0;
-	} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
-		error = xfs_attr_shortform_list(context);
-	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
-		error = xfs_attr_leaf_list(context);
-	} else {
-		error = xfs_attr_node_list(context);
-	}
-
-	xfs_iunlock(dp, XFS_ILOCK_SHARED);
-
-	return error;
-}
-
-#define	ATTR_ENTBASESIZE		/* minimum bytes used by an attr */ \
-	(((struct attrlist_ent *) 0)->a_name - (char *) 0)
-#define	ATTR_ENTSIZE(namelen)		/* actual bytes used by an attr */ \
-	((ATTR_ENTBASESIZE + (namelen) + 1 + sizeof(u_int32_t)-1) \
-	 & ~(sizeof(u_int32_t)-1))
-
-/*
- * Format an attribute and copy it out to the user's buffer.
- * Take care to check values and protect against them changing later,
- * we may be reading them directly out of a user buffer.
- */
-/*ARGSUSED*/
-STATIC int
-xfs_attr_put_listent(
-	xfs_attr_list_context_t *context,
-	int		flags,
-	unsigned char	*name,
-	int		namelen,
-	int		valuelen,
-	unsigned char	*value)
-{
-	struct attrlist *alist = (struct attrlist *)context->alist;
-	attrlist_ent_t *aep;
-	int arraytop;
-
-	ASSERT(!(context->flags & ATTR_KERNOVAL));
-	ASSERT(context->count >= 0);
-	ASSERT(context->count < (ATTR_MAX_VALUELEN/8));
-	ASSERT(context->firstu >= sizeof(*alist));
-	ASSERT(context->firstu <= context->bufsize);
-
-	/*
-	 * Only list entries in the right namespace.
-	 */
-	if (((context->flags & ATTR_SECURE) == 0) !=
-	    ((flags & XFS_ATTR_SECURE) == 0))
-		return 0;
-	if (((context->flags & ATTR_ROOT) == 0) !=
-	    ((flags & XFS_ATTR_ROOT) == 0))
-		return 0;
-
-	arraytop = sizeof(*alist) +
-			context->count * sizeof(alist->al_offset[0]);
-	context->firstu -= ATTR_ENTSIZE(namelen);
-	if (context->firstu < arraytop) {
-		trace_xfs_attr_list_full(context);
-		alist->al_more = 1;
-		context->seen_enough = 1;
-		return 1;
-	}
-
-	aep = (attrlist_ent_t *)&context->alist[context->firstu];
-	aep->a_valuelen = valuelen;
-	memcpy(aep->a_name, name, namelen);
-	aep->a_name[namelen] = 0;
-	alist->al_offset[context->count++] = context->firstu;
-	alist->al_count = context->count;
-	trace_xfs_attr_list_add(context);
-	return 0;
-}
-
-/*
- * Generate a list of extended attribute names and optionally
- * also value lengths.  Positive return value follows the XFS
- * convention of being an error, zero or negative return code
- * is the length of the buffer returned (negated), indicating
- * success.
- */
-int
-xfs_attr_list(
-	xfs_inode_t	*dp,
-	char		*buffer,
-	int		bufsize,
-	int		flags,
-	attrlist_cursor_kern_t *cursor)
-{
-	xfs_attr_list_context_t context;
-	struct attrlist *alist;
-	int error;
-
-	/*
-	 * Validate the cursor.
-	 */
-	if (cursor->pad1 || cursor->pad2)
-		return(XFS_ERROR(EINVAL));
-	if ((cursor->initted == 0) &&
-	    (cursor->hashval || cursor->blkno || cursor->offset))
-		return XFS_ERROR(EINVAL);
-
-	/*
-	 * Check for a properly aligned buffer.
-	 */
-	if (((long)buffer) & (sizeof(int)-1))
-		return XFS_ERROR(EFAULT);
-	if (flags & ATTR_KERNOVAL)
-		bufsize = 0;
-
-	/*
-	 * Initialize the output buffer.
-	 */
-	memset(&context, 0, sizeof(context));
-	context.dp = dp;
-	context.cursor = cursor;
-	context.resynch = 1;
-	context.flags = flags;
-	context.alist = buffer;
-	context.bufsize = (bufsize & ~(sizeof(int)-1));  /* align */
-	context.firstu = context.bufsize;
-	context.put_listent = xfs_attr_put_listent;
-
-	alist = (struct attrlist *)context.alist;
-	alist->al_count = 0;
-	alist->al_more = 0;
-	alist->al_offset[0] = context.bufsize;
-
-	error = xfs_attr_list_int(&context);
-	ASSERT(error >= 0);
-	return error;
-}
-
-int								/* error */
-xfs_attr_inactive(xfs_inode_t *dp)
-{
-	xfs_trans_t *trans;
-	xfs_mount_t *mp;
-	int error;
-
-	mp = dp->i_mount;
-	ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
-
-	xfs_ilock(dp, XFS_ILOCK_SHARED);
-	if (!xfs_inode_hasattr(dp) ||
-	    dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
-		xfs_iunlock(dp, XFS_ILOCK_SHARED);
-		return 0;
-	}
-	xfs_iunlock(dp, XFS_ILOCK_SHARED);
-
-	/*
-	 * Start our first transaction of the day.
-	 *
-	 * All future transactions during this code must be "chained" off
-	 * this one via the trans_dup() call.  All transactions will contain
-	 * the inode, and the inode will always be marked with trans_ihold().
-	 * Since the inode will be locked in all transactions, we must log
-	 * the inode in every transaction to let it float upward through
-	 * the log.
-	 */
-	trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL);
-	if ((error = xfs_trans_reserve(trans, 0, XFS_ATTRINVAL_LOG_RES(mp), 0,
-				      XFS_TRANS_PERM_LOG_RES,
-				      XFS_ATTRINVAL_LOG_COUNT))) {
-		xfs_trans_cancel(trans, 0);
-		return(error);
-	}
-	xfs_ilock(dp, XFS_ILOCK_EXCL);
-
-	/*
-	 * No need to make quota reservations here. We expect to release some
-	 * blocks, not allocate, in the common case.
-	 */
-	xfs_trans_ijoin(trans, dp, 0);
-
-	/*
-	 * Decide on what work routines to call based on the inode size.
-	 */
-	if (!xfs_inode_hasattr(dp) ||
-	    dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
-		error = 0;
-		goto out;
-	}
-	error = xfs_attr3_root_inactive(&trans, dp);
-	if (error)
-		goto out;
-
-	error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
-	if (error)
-		goto out;
-
-	error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
-	xfs_iunlock(dp, XFS_ILOCK_EXCL);
-
-	return(error);
-
-out:
-	xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
-	xfs_iunlock(dp, XFS_ILOCK_EXCL);
-	return(error);
-}
-
-
 
 /*========================================================================
  * External routines when attribute list is inside the inode
@@ -1166,28 +942,6 @@
 	return error;
 }
 
-/*
- * Copy out attribute entries for attr_list(), for leaf attribute lists.
- */
-STATIC int
-xfs_attr_leaf_list(xfs_attr_list_context_t *context)
-{
-	int error;
-	struct xfs_buf *bp;
-
-	trace_xfs_attr_leaf_list(context);
-
-	context->cursor->blkno = 0;
-	error = xfs_attr3_leaf_read(NULL, context->dp, 0, -1, &bp);
-	if (error)
-		return XFS_ERROR(error);
-
-	error = xfs_attr3_leaf_list_int(bp, context);
-	xfs_trans_brelse(NULL, bp);
-	return XFS_ERROR(error);
-}
-
-
 /*========================================================================
  * External routines when attribute list size > XFS_LBSIZE(mp).
  *========================================================================*/
@@ -1260,6 +1014,7 @@
 			 * have been a b-tree.
 			 */
 			xfs_da_state_free(state);
+			state = NULL;
 			xfs_bmap_init(args->flist, args->firstblock);
 			error = xfs_attr3_leaf_to_node(args);
 			if (!error) {
@@ -1780,143 +1535,3 @@
 	xfs_da_state_free(state);
 	return(retval);
 }
-
-STATIC int							/* error */
-xfs_attr_node_list(xfs_attr_list_context_t *context)
-{
-	attrlist_cursor_kern_t *cursor;
-	xfs_attr_leafblock_t *leaf;
-	xfs_da_intnode_t *node;
-	struct xfs_attr3_icleaf_hdr leafhdr;
-	struct xfs_da3_icnode_hdr nodehdr;
-	struct xfs_da_node_entry *btree;
-	int error, i;
-	struct xfs_buf *bp;
-
-	trace_xfs_attr_node_list(context);
-
-	cursor = context->cursor;
-	cursor->initted = 1;
-
-	/*
-	 * Do all sorts of validation on the passed-in cursor structure.
-	 * If anything is amiss, ignore the cursor and look up the hashval
-	 * starting from the btree root.
-	 */
-	bp = NULL;
-	if (cursor->blkno > 0) {
-		error = xfs_da3_node_read(NULL, context->dp, cursor->blkno, -1,
-					      &bp, XFS_ATTR_FORK);
-		if ((error != 0) && (error != EFSCORRUPTED))
-			return(error);
-		if (bp) {
-			struct xfs_attr_leaf_entry *entries;
-
-			node = bp->b_addr;
-			switch (be16_to_cpu(node->hdr.info.magic)) {
-			case XFS_DA_NODE_MAGIC:
-			case XFS_DA3_NODE_MAGIC:
-				trace_xfs_attr_list_wrong_blk(context);
-				xfs_trans_brelse(NULL, bp);
-				bp = NULL;
-				break;
-			case XFS_ATTR_LEAF_MAGIC:
-			case XFS_ATTR3_LEAF_MAGIC:
-				leaf = bp->b_addr;
-				xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
-				entries = xfs_attr3_leaf_entryp(leaf);
-				if (cursor->hashval > be32_to_cpu(
-						entries[leafhdr.count - 1].hashval)) {
-					trace_xfs_attr_list_wrong_blk(context);
-					xfs_trans_brelse(NULL, bp);
-					bp = NULL;
-				} else if (cursor->hashval <= be32_to_cpu(
-						entries[0].hashval)) {
-					trace_xfs_attr_list_wrong_blk(context);
-					xfs_trans_brelse(NULL, bp);
-					bp = NULL;
-				}
-				break;
-			default:
-				trace_xfs_attr_list_wrong_blk(context);
-				xfs_trans_brelse(NULL, bp);
-				bp = NULL;
-			}
-		}
-	}
-
-	/*
-	 * We did not find what we expected given the cursor's contents,
-	 * so we start from the top and work down based on the hash value.
-	 * Note that start of node block is same as start of leaf block.
-	 */
-	if (bp == NULL) {
-		cursor->blkno = 0;
-		for (;;) {
-			__uint16_t magic;
-
-			error = xfs_da3_node_read(NULL, context->dp,
-						      cursor->blkno, -1, &bp,
-						      XFS_ATTR_FORK);
-			if (error)
-				return(error);
-			node = bp->b_addr;
-			magic = be16_to_cpu(node->hdr.info.magic);
-			if (magic == XFS_ATTR_LEAF_MAGIC ||
-			    magic == XFS_ATTR3_LEAF_MAGIC)
-				break;
-			if (magic != XFS_DA_NODE_MAGIC &&
-			    magic != XFS_DA3_NODE_MAGIC) {
-				XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)",
-						     XFS_ERRLEVEL_LOW,
-						     context->dp->i_mount,
-						     node);
-				xfs_trans_brelse(NULL, bp);
-				return XFS_ERROR(EFSCORRUPTED);
-			}
-
-			xfs_da3_node_hdr_from_disk(&nodehdr, node);
-			btree = xfs_da3_node_tree_p(node);
-			for (i = 0; i < nodehdr.count; btree++, i++) {
-				if (cursor->hashval
-						<= be32_to_cpu(btree->hashval)) {
-					cursor->blkno = be32_to_cpu(btree->before);
-					trace_xfs_attr_list_node_descend(context,
-									 btree);
-					break;
-				}
-			}
-			if (i == nodehdr.count) {
-				xfs_trans_brelse(NULL, bp);
-				return 0;
-			}
-			xfs_trans_brelse(NULL, bp);
-		}
-	}
-	ASSERT(bp != NULL);
-
-	/*
-	 * Roll upward through the blocks, processing each leaf block in
-	 * order.  As long as there is space in the result buffer, keep
-	 * adding the information.
-	 */
-	for (;;) {
-		leaf = bp->b_addr;
-		error = xfs_attr3_leaf_list_int(bp, context);
-		if (error) {
-			xfs_trans_brelse(NULL, bp);
-			return error;
-		}
-		xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
-		if (context->seen_enough || leafhdr.forw == 0)
-			break;
-		cursor->blkno = leafhdr.forw;
-		xfs_trans_brelse(NULL, bp);
-		error = xfs_attr3_leaf_read(NULL, context->dp, cursor->blkno, -1,
-					   &bp);
-		if (error)
-			return error;
-	}
-	xfs_trans_brelse(NULL, bp);
-	return 0;
-}
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index de8dd58..dd48245 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -141,5 +141,14 @@
  */
 int xfs_attr_inactive(struct xfs_inode *dp);
 int xfs_attr_list_int(struct xfs_attr_list_context *);
+int xfs_inode_hasattr(struct xfs_inode *ip);
+int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name,
+		 unsigned char *value, int *valuelenp, int flags);
+int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,
+		 unsigned char *value, int valuelen, int flags);
+int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags);
+int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
+		  int flags, struct attrlist_cursor_kern *cursor);
+
 
 #endif	/* __XFS_ATTR_H__ */
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
new file mode 100644
index 0000000..bb24b07
--- /dev/null
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -0,0 +1,453 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * Copyright (c) 2013 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_format.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_btree.h"
+#include "xfs_attr_remote.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_inode_item.h"
+#include "xfs_bmap.h"
+#include "xfs_attr.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_error.h"
+#include "xfs_quota.h"
+#include "xfs_trace.h"
+#include "xfs_trans_priv.h"
+
+/*
+ * Look at all the extents for this logical region,
+ * invalidate any buffers that are incore/in transactions.
+ */
+STATIC int
+xfs_attr3_leaf_freextent(
+	struct xfs_trans	**trans,
+	struct xfs_inode	*dp,
+	xfs_dablk_t		blkno,
+	int			blkcnt)
+{
+	struct xfs_bmbt_irec	map;
+	struct xfs_buf		*bp;
+	xfs_dablk_t		tblkno;
+	xfs_daddr_t		dblkno;
+	int			tblkcnt;
+	int			dblkcnt;
+	int			nmap;
+	int			error;
+
+	/*
+	 * Roll through the "value", invalidating the attribute value's
+	 * blocks.
+	 */
+	tblkno = blkno;
+	tblkcnt = blkcnt;
+	while (tblkcnt > 0) {
+		/*
+		 * Try to remember where we decided to put the value.
+		 */
+		nmap = 1;
+		error = xfs_bmapi_read(dp, (xfs_fileoff_t)tblkno, tblkcnt,
+				       &map, &nmap, XFS_BMAPI_ATTRFORK);
+		if (error) {
+			return(error);
+		}
+		ASSERT(nmap == 1);
+		ASSERT(map.br_startblock != DELAYSTARTBLOCK);
+
+		/*
+		 * If it's a hole, these are already unmapped
+		 * so there's nothing to invalidate.
+		 */
+		if (map.br_startblock != HOLESTARTBLOCK) {
+
+			dblkno = XFS_FSB_TO_DADDR(dp->i_mount,
+						  map.br_startblock);
+			dblkcnt = XFS_FSB_TO_BB(dp->i_mount,
+						map.br_blockcount);
+			bp = xfs_trans_get_buf(*trans,
+					dp->i_mount->m_ddev_targp,
+					dblkno, dblkcnt, 0);
+			if (!bp)
+				return ENOMEM;
+			xfs_trans_binval(*trans, bp);
+			/*
+			 * Roll to next transaction.
+			 */
+			error = xfs_trans_roll(trans, dp);
+			if (error)
+				return (error);
+		}
+
+		tblkno += map.br_blockcount;
+		tblkcnt -= map.br_blockcount;
+	}
+
+	return(0);
+}
+
+/*
+ * Invalidate all of the "remote" value regions pointed to by a particular
+ * leaf block.
+ * Note that we must release the lock on the buffer so that we are not
+ * caught holding something that the logging code wants to flush to disk.
+ */
+STATIC int
+xfs_attr3_leaf_inactive(
+	struct xfs_trans	**trans,
+	struct xfs_inode	*dp,
+	struct xfs_buf		*bp)
+{
+	struct xfs_attr_leafblock *leaf;
+	struct xfs_attr3_icleaf_hdr ichdr;
+	struct xfs_attr_leaf_entry *entry;
+	struct xfs_attr_leaf_name_remote *name_rmt;
+	struct xfs_attr_inactive_list *list;
+	struct xfs_attr_inactive_list *lp;
+	int			error;
+	int			count;
+	int			size;
+	int			tmp;
+	int			i;
+
+	leaf = bp->b_addr;
+	xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+
+	/*
+	 * Count the number of "remote" value extents.
+	 */
+	count = 0;
+	entry = xfs_attr3_leaf_entryp(leaf);
+	for (i = 0; i < ichdr.count; entry++, i++) {
+		if (be16_to_cpu(entry->nameidx) &&
+		    ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
+			name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
+			if (name_rmt->valueblk)
+				count++;
+		}
+	}
+
+	/*
+	 * If there are no "remote" values, we're done.
+	 */
+	if (count == 0) {
+		xfs_trans_brelse(*trans, bp);
+		return 0;
+	}
+
+	/*
+	 * Allocate storage for a list of all the "remote" value extents.
+	 */
+	size = count * sizeof(xfs_attr_inactive_list_t);
+	list = kmem_alloc(size, KM_SLEEP);
+
+	/*
+	 * Identify each of the "remote" value extents.
+	 */
+	lp = list;
+	entry = xfs_attr3_leaf_entryp(leaf);
+	for (i = 0; i < ichdr.count; entry++, i++) {
+		if (be16_to_cpu(entry->nameidx) &&
+		    ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
+			name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
+			if (name_rmt->valueblk) {
+				lp->valueblk = be32_to_cpu(name_rmt->valueblk);
+				lp->valuelen = xfs_attr3_rmt_blocks(dp->i_mount,
+						    be32_to_cpu(name_rmt->valuelen));
+				lp++;
+			}
+		}
+	}
+	xfs_trans_brelse(*trans, bp);	/* unlock for trans. in freextent() */
+
+	/*
+	 * Invalidate each of the "remote" value extents.
+	 */
+	error = 0;
+	for (lp = list, i = 0; i < count; i++, lp++) {
+		tmp = xfs_attr3_leaf_freextent(trans, dp,
+				lp->valueblk, lp->valuelen);
+
+		if (error == 0)
+			error = tmp;	/* save only the 1st errno */
+	}
+
+	kmem_free(list);
+	return error;
+}
+
+/*
+ * Recurse (gasp!) through the attribute nodes until we find leaves.
+ * We're doing a depth-first traversal in order to invalidate everything.
+ */
+STATIC int
+xfs_attr3_node_inactive(
+	struct xfs_trans **trans,
+	struct xfs_inode *dp,
+	struct xfs_buf	*bp,
+	int		level)
+{
+	xfs_da_blkinfo_t *info;
+	xfs_da_intnode_t *node;
+	xfs_dablk_t child_fsb;
+	xfs_daddr_t parent_blkno, child_blkno;
+	int error, i;
+	struct xfs_buf *child_bp;
+	struct xfs_da_node_entry *btree;
+	struct xfs_da3_icnode_hdr ichdr;
+
+	/*
+	 * Since this code is recursive (gasp!) we must protect ourselves.
+	 */
+	if (level > XFS_DA_NODE_MAXDEPTH) {
+		xfs_trans_brelse(*trans, bp);	/* no locks for later trans */
+		return XFS_ERROR(EIO);
+	}
+
+	node = bp->b_addr;
+	xfs_da3_node_hdr_from_disk(&ichdr, node);
+	parent_blkno = bp->b_bn;
+	if (!ichdr.count) {
+		xfs_trans_brelse(*trans, bp);
+		return 0;
+	}
+	btree = xfs_da3_node_tree_p(node);
+	child_fsb = be32_to_cpu(btree[0].before);
+	xfs_trans_brelse(*trans, bp);	/* no locks for later trans */
+
+	/*
+	 * If this is the node level just above the leaves, simply loop
+	 * over the leaves removing all of them.  If this is higher up
+	 * in the tree, recurse downward.
+	 */
+	for (i = 0; i < ichdr.count; i++) {
+		/*
+		 * Read the subsidiary block to see what we have to work with.
+		 * Don't do this in a transaction.  This is a depth-first
+		 * traversal of the tree so we may deal with many blocks
+		 * before we come back to this one.
+		 */
+		error = xfs_da3_node_read(*trans, dp, child_fsb, -2, &child_bp,
+						XFS_ATTR_FORK);
+		if (error)
+			return(error);
+		if (child_bp) {
+						/* save for re-read later */
+			child_blkno = XFS_BUF_ADDR(child_bp);
+
+			/*
+			 * Invalidate the subtree, however we have to.
+			 */
+			info = child_bp->b_addr;
+			switch (info->magic) {
+			case cpu_to_be16(XFS_DA_NODE_MAGIC):
+			case cpu_to_be16(XFS_DA3_NODE_MAGIC):
+				error = xfs_attr3_node_inactive(trans, dp,
+							child_bp, level + 1);
+				break;
+			case cpu_to_be16(XFS_ATTR_LEAF_MAGIC):
+			case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
+				error = xfs_attr3_leaf_inactive(trans, dp,
+							child_bp);
+				break;
+			default:
+				error = XFS_ERROR(EIO);
+				xfs_trans_brelse(*trans, child_bp);
+				break;
+			}
+			if (error)
+				return error;
+
+			/*
+			 * Remove the subsidiary block from the cache
+			 * and from the log.
+			 */
+			error = xfs_da_get_buf(*trans, dp, 0, child_blkno,
+				&child_bp, XFS_ATTR_FORK);
+			if (error)
+				return error;
+			xfs_trans_binval(*trans, child_bp);
+		}
+
+		/*
+		 * If we're not done, re-read the parent to get the next
+		 * child block number.
+		 */
+		if (i + 1 < ichdr.count) {
+			error = xfs_da3_node_read(*trans, dp, 0, parent_blkno,
+						 &bp, XFS_ATTR_FORK);
+			if (error)
+				return error;
+			child_fsb = be32_to_cpu(btree[i + 1].before);
+			xfs_trans_brelse(*trans, bp);
+		}
+		/*
+		 * Atomically commit the whole invalidate stuff.
+		 */
+		error = xfs_trans_roll(trans, dp);
+		if (error)
+			return  error;
+	}
+
+	return 0;
+}
+
+/*
+ * Indiscriminately delete the entire attribute fork
+ *
+ * Recurse (gasp!) through the attribute nodes until we find leaves.
+ * We're doing a depth-first traversal in order to invalidate everything.
+ */
+int
+xfs_attr3_root_inactive(
+	struct xfs_trans	**trans,
+	struct xfs_inode	*dp)
+{
+	struct xfs_da_blkinfo	*info;
+	struct xfs_buf		*bp;
+	xfs_daddr_t		blkno;
+	int			error;
+
+	/*
+	 * Read block 0 to see what we have to work with.
+	 * We only get here if we have extents, since we remove
+	 * the extents in reverse order the extent containing
+	 * block 0 must still be there.
+	 */
+	error = xfs_da3_node_read(*trans, dp, 0, -1, &bp, XFS_ATTR_FORK);
+	if (error)
+		return error;
+	blkno = bp->b_bn;
+
+	/*
+	 * Invalidate the tree, even if the "tree" is only a single leaf block.
+	 * This is a depth-first traversal!
+	 */
+	info = bp->b_addr;
+	switch (info->magic) {
+	case cpu_to_be16(XFS_DA_NODE_MAGIC):
+	case cpu_to_be16(XFS_DA3_NODE_MAGIC):
+		error = xfs_attr3_node_inactive(trans, dp, bp, 1);
+		break;
+	case cpu_to_be16(XFS_ATTR_LEAF_MAGIC):
+	case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
+		error = xfs_attr3_leaf_inactive(trans, dp, bp);
+		break;
+	default:
+		error = XFS_ERROR(EIO);
+		xfs_trans_brelse(*trans, bp);
+		break;
+	}
+	if (error)
+		return error;
+
+	/*
+	 * Invalidate the incore copy of the root block.
+	 */
+	error = xfs_da_get_buf(*trans, dp, 0, blkno, &bp, XFS_ATTR_FORK);
+	if (error)
+		return error;
+	xfs_trans_binval(*trans, bp);	/* remove from cache */
+	/*
+	 * Commit the invalidate and start the next transaction.
+	 */
+	error = xfs_trans_roll(trans, dp);
+
+	return error;
+}
+
+int
+xfs_attr_inactive(xfs_inode_t *dp)
+{
+	xfs_trans_t *trans;
+	xfs_mount_t *mp;
+	int error;
+
+	mp = dp->i_mount;
+	ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
+
+	xfs_ilock(dp, XFS_ILOCK_SHARED);
+	if (!xfs_inode_hasattr(dp) ||
+	    dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
+		xfs_iunlock(dp, XFS_ILOCK_SHARED);
+		return 0;
+	}
+	xfs_iunlock(dp, XFS_ILOCK_SHARED);
+
+	/*
+	 * Start our first transaction of the day.
+	 *
+	 * All future transactions during this code must be "chained" off
+	 * this one via the trans_dup() call.  All transactions will contain
+	 * the inode, and the inode will always be marked with trans_ihold().
+	 * Since the inode will be locked in all transactions, we must log
+	 * the inode in every transaction to let it float upward through
+	 * the log.
+	 */
+	trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL);
+	error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0);
+	if (error) {
+		xfs_trans_cancel(trans, 0);
+		return(error);
+	}
+	xfs_ilock(dp, XFS_ILOCK_EXCL);
+
+	/*
+	 * No need to make quota reservations here. We expect to release some
+	 * blocks, not allocate, in the common case.
+	 */
+	xfs_trans_ijoin(trans, dp, 0);
+
+	/*
+	 * Decide on what work routines to call based on the inode size.
+	 */
+	if (!xfs_inode_hasattr(dp) ||
+	    dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
+		error = 0;
+		goto out;
+	}
+	error = xfs_attr3_root_inactive(&trans, dp);
+	if (error)
+		goto out;
+
+	error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
+	if (error)
+		goto out;
+
+	error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
+	xfs_iunlock(dp, XFS_ILOCK_EXCL);
+
+	return(error);
+
+out:
+	xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
+	xfs_iunlock(dp, XFS_ILOCK_EXCL);
+	return(error);
+}
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index b800fbc..86db20a 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -22,6 +22,7 @@
 #include "xfs_bit.h"
 #include "xfs_log.h"
 #include "xfs_trans.h"
+#include "xfs_trans_priv.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_mount.h"
@@ -78,16 +79,6 @@
 			int *number_usedbytes_in_blk1);
 
 /*
- * Routines used for shrinking the Btree.
- */
-STATIC int xfs_attr3_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp,
-				  struct xfs_buf *bp, int level);
-STATIC int xfs_attr3_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp,
-				  struct xfs_buf *bp);
-STATIC int xfs_attr3_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
-				   xfs_dablk_t blkno, int blkcnt);
-
-/*
  * Utility routines.
  */
 STATIC void xfs_attr3_leaf_moveents(struct xfs_attr_leafblock *src_leaf,
@@ -635,7 +626,7 @@
 	xfs_attr_sf_entry_t *sfe;
 	int i;
 
-	ASSERT(args->dp->i_d.di_aformat == XFS_IFINLINE);
+	ASSERT(args->dp->i_afp->if_flags == XFS_IFINLINE);
 	sf = (xfs_attr_shortform_t *)args->dp->i_afp->if_u1.if_data;
 	sfe = &sf->list[0];
 	for (i = 0; i < sf->hdr.count;
@@ -751,182 +742,6 @@
 	return(error);
 }
 
-STATIC int
-xfs_attr_shortform_compare(const void *a, const void *b)
-{
-	xfs_attr_sf_sort_t *sa, *sb;
-
-	sa = (xfs_attr_sf_sort_t *)a;
-	sb = (xfs_attr_sf_sort_t *)b;
-	if (sa->hash < sb->hash) {
-		return(-1);
-	} else if (sa->hash > sb->hash) {
-		return(1);
-	} else {
-		return(sa->entno - sb->entno);
-	}
-}
-
-
-#define XFS_ISRESET_CURSOR(cursor) \
-	(!((cursor)->initted) && !((cursor)->hashval) && \
-	 !((cursor)->blkno) && !((cursor)->offset))
-/*
- * Copy out entries of shortform attribute lists for attr_list().
- * Shortform attribute lists are not stored in hashval sorted order.
- * If the output buffer is not large enough to hold them all, then we
- * we have to calculate each entries' hashvalue and sort them before
- * we can begin returning them to the user.
- */
-/*ARGSUSED*/
-int
-xfs_attr_shortform_list(xfs_attr_list_context_t *context)
-{
-	attrlist_cursor_kern_t *cursor;
-	xfs_attr_sf_sort_t *sbuf, *sbp;
-	xfs_attr_shortform_t *sf;
-	xfs_attr_sf_entry_t *sfe;
-	xfs_inode_t *dp;
-	int sbsize, nsbuf, count, i;
-	int error;
-
-	ASSERT(context != NULL);
-	dp = context->dp;
-	ASSERT(dp != NULL);
-	ASSERT(dp->i_afp != NULL);
-	sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data;
-	ASSERT(sf != NULL);
-	if (!sf->hdr.count)
-		return(0);
-	cursor = context->cursor;
-	ASSERT(cursor != NULL);
-
-	trace_xfs_attr_list_sf(context);
-
-	/*
-	 * If the buffer is large enough and the cursor is at the start,
-	 * do not bother with sorting since we will return everything in
-	 * one buffer and another call using the cursor won't need to be
-	 * made.
-	 * Note the generous fudge factor of 16 overhead bytes per entry.
-	 * If bufsize is zero then put_listent must be a search function
-	 * and can just scan through what we have.
-	 */
-	if (context->bufsize == 0 ||
-	    (XFS_ISRESET_CURSOR(cursor) &&
-             (dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) {
-		for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
-			error = context->put_listent(context,
-					   sfe->flags,
-					   sfe->nameval,
-					   (int)sfe->namelen,
-					   (int)sfe->valuelen,
-					   &sfe->nameval[sfe->namelen]);
-
-			/*
-			 * Either search callback finished early or
-			 * didn't fit it all in the buffer after all.
-			 */
-			if (context->seen_enough)
-				break;
-
-			if (error)
-				return error;
-			sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
-		}
-		trace_xfs_attr_list_sf_all(context);
-		return(0);
-	}
-
-	/* do no more for a search callback */
-	if (context->bufsize == 0)
-		return 0;
-
-	/*
-	 * It didn't all fit, so we have to sort everything on hashval.
-	 */
-	sbsize = sf->hdr.count * sizeof(*sbuf);
-	sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP | KM_NOFS);
-
-	/*
-	 * Scan the attribute list for the rest of the entries, storing
-	 * the relevant info from only those that match into a buffer.
-	 */
-	nsbuf = 0;
-	for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
-		if (unlikely(
-		    ((char *)sfe < (char *)sf) ||
-		    ((char *)sfe >= ((char *)sf + dp->i_afp->if_bytes)))) {
-			XFS_CORRUPTION_ERROR("xfs_attr_shortform_list",
-					     XFS_ERRLEVEL_LOW,
-					     context->dp->i_mount, sfe);
-			kmem_free(sbuf);
-			return XFS_ERROR(EFSCORRUPTED);
-		}
-
-		sbp->entno = i;
-		sbp->hash = xfs_da_hashname(sfe->nameval, sfe->namelen);
-		sbp->name = sfe->nameval;
-		sbp->namelen = sfe->namelen;
-		/* These are bytes, and both on-disk, don't endian-flip */
-		sbp->valuelen = sfe->valuelen;
-		sbp->flags = sfe->flags;
-		sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
-		sbp++;
-		nsbuf++;
-	}
-
-	/*
-	 * Sort the entries on hash then entno.
-	 */
-	xfs_sort(sbuf, nsbuf, sizeof(*sbuf), xfs_attr_shortform_compare);
-
-	/*
-	 * Re-find our place IN THE SORTED LIST.
-	 */
-	count = 0;
-	cursor->initted = 1;
-	cursor->blkno = 0;
-	for (sbp = sbuf, i = 0; i < nsbuf; i++, sbp++) {
-		if (sbp->hash == cursor->hashval) {
-			if (cursor->offset == count) {
-				break;
-			}
-			count++;
-		} else if (sbp->hash > cursor->hashval) {
-			break;
-		}
-	}
-	if (i == nsbuf) {
-		kmem_free(sbuf);
-		return(0);
-	}
-
-	/*
-	 * Loop putting entries into the user buffer.
-	 */
-	for ( ; i < nsbuf; i++, sbp++) {
-		if (cursor->hashval != sbp->hash) {
-			cursor->hashval = sbp->hash;
-			cursor->offset = 0;
-		}
-		error = context->put_listent(context,
-					sbp->flags,
-					sbp->name,
-					sbp->namelen,
-					sbp->valuelen,
-					&sbp->name[sbp->namelen]);
-		if (error)
-			return error;
-		if (context->seen_enough)
-			break;
-		cursor->offset++;
-	}
-
-	kmem_free(sbuf);
-	return(0);
-}
-
 /*
  * Check a leaf attribute block to see if all the entries would fit into
  * a shortform attribute list.
@@ -1121,7 +936,6 @@
 	return error;
 }
 
-
 /*========================================================================
  * Routines used for growing the Btree.
  *========================================================================*/
@@ -1482,7 +1296,6 @@
 	ichdr_dst->freemap[0].size = ichdr_dst->firstused -
 						ichdr_dst->freemap[0].base;
 
-
 	/* write the header back to initialise the underlying buffer */
 	xfs_attr3_leaf_hdr_to_disk(leaf_dst, ichdr_dst);
 
@@ -2643,130 +2456,6 @@
 	return size;
 }
 
-/*
- * Copy out attribute list entries for attr_list(), for leaf attribute lists.
- */
-int
-xfs_attr3_leaf_list_int(
-	struct xfs_buf			*bp,
-	struct xfs_attr_list_context	*context)
-{
-	struct attrlist_cursor_kern	*cursor;
-	struct xfs_attr_leafblock	*leaf;
-	struct xfs_attr3_icleaf_hdr	ichdr;
-	struct xfs_attr_leaf_entry	*entries;
-	struct xfs_attr_leaf_entry	*entry;
-	int				retval;
-	int				i;
-
-	trace_xfs_attr_list_leaf(context);
-
-	leaf = bp->b_addr;
-	xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
-	entries = xfs_attr3_leaf_entryp(leaf);
-
-	cursor = context->cursor;
-	cursor->initted = 1;
-
-	/*
-	 * Re-find our place in the leaf block if this is a new syscall.
-	 */
-	if (context->resynch) {
-		entry = &entries[0];
-		for (i = 0; i < ichdr.count; entry++, i++) {
-			if (be32_to_cpu(entry->hashval) == cursor->hashval) {
-				if (cursor->offset == context->dupcnt) {
-					context->dupcnt = 0;
-					break;
-				}
-				context->dupcnt++;
-			} else if (be32_to_cpu(entry->hashval) >
-					cursor->hashval) {
-				context->dupcnt = 0;
-				break;
-			}
-		}
-		if (i == ichdr.count) {
-			trace_xfs_attr_list_notfound(context);
-			return 0;
-		}
-	} else {
-		entry = &entries[0];
-		i = 0;
-	}
-	context->resynch = 0;
-
-	/*
-	 * We have found our place, start copying out the new attributes.
-	 */
-	retval = 0;
-	for (; i < ichdr.count; entry++, i++) {
-		if (be32_to_cpu(entry->hashval) != cursor->hashval) {
-			cursor->hashval = be32_to_cpu(entry->hashval);
-			cursor->offset = 0;
-		}
-
-		if (entry->flags & XFS_ATTR_INCOMPLETE)
-			continue;		/* skip incomplete entries */
-
-		if (entry->flags & XFS_ATTR_LOCAL) {
-			xfs_attr_leaf_name_local_t *name_loc =
-				xfs_attr3_leaf_name_local(leaf, i);
-
-			retval = context->put_listent(context,
-						entry->flags,
-						name_loc->nameval,
-						(int)name_loc->namelen,
-						be16_to_cpu(name_loc->valuelen),
-						&name_loc->nameval[name_loc->namelen]);
-			if (retval)
-				return retval;
-		} else {
-			xfs_attr_leaf_name_remote_t *name_rmt =
-				xfs_attr3_leaf_name_remote(leaf, i);
-
-			int valuelen = be32_to_cpu(name_rmt->valuelen);
-
-			if (context->put_value) {
-				xfs_da_args_t args;
-
-				memset((char *)&args, 0, sizeof(args));
-				args.dp = context->dp;
-				args.whichfork = XFS_ATTR_FORK;
-				args.valuelen = valuelen;
-				args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);
-				args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
-				args.rmtblkcnt = xfs_attr3_rmt_blocks(
-							args.dp->i_mount, valuelen);
-				retval = xfs_attr_rmtval_get(&args);
-				if (retval)
-					return retval;
-				retval = context->put_listent(context,
-						entry->flags,
-						name_rmt->name,
-						(int)name_rmt->namelen,
-						valuelen,
-						args.value);
-				kmem_free(args.value);
-			} else {
-				retval = context->put_listent(context,
-						entry->flags,
-						name_rmt->name,
-						(int)name_rmt->namelen,
-						valuelen,
-						NULL);
-			}
-			if (retval)
-				return retval;
-		}
-		if (context->seen_enough)
-			break;
-		cursor->offset++;
-	}
-	trace_xfs_attr_list_leaf_end(context);
-	return retval;
-}
-
 
 /*========================================================================
  * Manage the INCOMPLETE flag in a leaf entry
@@ -3011,345 +2700,3 @@
 
 	return error;
 }
-
-/*========================================================================
- * Indiscriminately delete the entire attribute fork
- *========================================================================*/
-
-/*
- * Recurse (gasp!) through the attribute nodes until we find leaves.
- * We're doing a depth-first traversal in order to invalidate everything.
- */
-int
-xfs_attr3_root_inactive(
-	struct xfs_trans	**trans,
-	struct xfs_inode	*dp)
-{
-	struct xfs_da_blkinfo	*info;
-	struct xfs_buf		*bp;
-	xfs_daddr_t		blkno;
-	int			error;
-
-	/*
-	 * Read block 0 to see what we have to work with.
-	 * We only get here if we have extents, since we remove
-	 * the extents in reverse order the extent containing
-	 * block 0 must still be there.
-	 */
-	error = xfs_da3_node_read(*trans, dp, 0, -1, &bp, XFS_ATTR_FORK);
-	if (error)
-		return error;
-	blkno = bp->b_bn;
-
-	/*
-	 * Invalidate the tree, even if the "tree" is only a single leaf block.
-	 * This is a depth-first traversal!
-	 */
-	info = bp->b_addr;
-	switch (info->magic) {
-	case cpu_to_be16(XFS_DA_NODE_MAGIC):
-	case cpu_to_be16(XFS_DA3_NODE_MAGIC):
-		error = xfs_attr3_node_inactive(trans, dp, bp, 1);
-		break;
-	case cpu_to_be16(XFS_ATTR_LEAF_MAGIC):
-	case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
-		error = xfs_attr3_leaf_inactive(trans, dp, bp);
-		break;
-	default:
-		error = XFS_ERROR(EIO);
-		xfs_trans_brelse(*trans, bp);
-		break;
-	}
-	if (error)
-		return error;
-
-	/*
-	 * Invalidate the incore copy of the root block.
-	 */
-	error = xfs_da_get_buf(*trans, dp, 0, blkno, &bp, XFS_ATTR_FORK);
-	if (error)
-		return error;
-	xfs_trans_binval(*trans, bp);	/* remove from cache */
-	/*
-	 * Commit the invalidate and start the next transaction.
-	 */
-	error = xfs_trans_roll(trans, dp);
-
-	return error;
-}
-
-/*
- * Recurse (gasp!) through the attribute nodes until we find leaves.
- * We're doing a depth-first traversal in order to invalidate everything.
- */
-STATIC int
-xfs_attr3_node_inactive(
-	struct xfs_trans **trans,
-	struct xfs_inode *dp,
-	struct xfs_buf	*bp,
-	int		level)
-{
-	xfs_da_blkinfo_t *info;
-	xfs_da_intnode_t *node;
-	xfs_dablk_t child_fsb;
-	xfs_daddr_t parent_blkno, child_blkno;
-	int error, i;
-	struct xfs_buf *child_bp;
-	struct xfs_da_node_entry *btree;
-	struct xfs_da3_icnode_hdr ichdr;
-
-	/*
-	 * Since this code is recursive (gasp!) we must protect ourselves.
-	 */
-	if (level > XFS_DA_NODE_MAXDEPTH) {
-		xfs_trans_brelse(*trans, bp);	/* no locks for later trans */
-		return XFS_ERROR(EIO);
-	}
-
-	node = bp->b_addr;
-	xfs_da3_node_hdr_from_disk(&ichdr, node);
-	parent_blkno = bp->b_bn;
-	if (!ichdr.count) {
-		xfs_trans_brelse(*trans, bp);
-		return 0;
-	}
-	btree = xfs_da3_node_tree_p(node);
-	child_fsb = be32_to_cpu(btree[0].before);
-	xfs_trans_brelse(*trans, bp);	/* no locks for later trans */
-
-	/*
-	 * If this is the node level just above the leaves, simply loop
-	 * over the leaves removing all of them.  If this is higher up
-	 * in the tree, recurse downward.
-	 */
-	for (i = 0; i < ichdr.count; i++) {
-		/*
-		 * Read the subsidiary block to see what we have to work with.
-		 * Don't do this in a transaction.  This is a depth-first
-		 * traversal of the tree so we may deal with many blocks
-		 * before we come back to this one.
-		 */
-		error = xfs_da3_node_read(*trans, dp, child_fsb, -2, &child_bp,
-						XFS_ATTR_FORK);
-		if (error)
-			return(error);
-		if (child_bp) {
-						/* save for re-read later */
-			child_blkno = XFS_BUF_ADDR(child_bp);
-
-			/*
-			 * Invalidate the subtree, however we have to.
-			 */
-			info = child_bp->b_addr;
-			switch (info->magic) {
-			case cpu_to_be16(XFS_DA_NODE_MAGIC):
-			case cpu_to_be16(XFS_DA3_NODE_MAGIC):
-				error = xfs_attr3_node_inactive(trans, dp,
-							child_bp, level + 1);
-				break;
-			case cpu_to_be16(XFS_ATTR_LEAF_MAGIC):
-			case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
-				error = xfs_attr3_leaf_inactive(trans, dp,
-							child_bp);
-				break;
-			default:
-				error = XFS_ERROR(EIO);
-				xfs_trans_brelse(*trans, child_bp);
-				break;
-			}
-			if (error)
-				return error;
-
-			/*
-			 * Remove the subsidiary block from the cache
-			 * and from the log.
-			 */
-			error = xfs_da_get_buf(*trans, dp, 0, child_blkno,
-				&child_bp, XFS_ATTR_FORK);
-			if (error)
-				return error;
-			xfs_trans_binval(*trans, child_bp);
-		}
-
-		/*
-		 * If we're not done, re-read the parent to get the next
-		 * child block number.
-		 */
-		if (i + 1 < ichdr.count) {
-			error = xfs_da3_node_read(*trans, dp, 0, parent_blkno,
-						 &bp, XFS_ATTR_FORK);
-			if (error)
-				return error;
-			child_fsb = be32_to_cpu(btree[i + 1].before);
-			xfs_trans_brelse(*trans, bp);
-		}
-		/*
-		 * Atomically commit the whole invalidate stuff.
-		 */
-		error = xfs_trans_roll(trans, dp);
-		if (error)
-			return  error;
-	}
-
-	return 0;
-}
-
-/*
- * Invalidate all of the "remote" value regions pointed to by a particular
- * leaf block.
- * Note that we must release the lock on the buffer so that we are not
- * caught holding something that the logging code wants to flush to disk.
- */
-STATIC int
-xfs_attr3_leaf_inactive(
-	struct xfs_trans	**trans,
-	struct xfs_inode	*dp,
-	struct xfs_buf		*bp)
-{
-	struct xfs_attr_leafblock *leaf;
-	struct xfs_attr3_icleaf_hdr ichdr;
-	struct xfs_attr_leaf_entry *entry;
-	struct xfs_attr_leaf_name_remote *name_rmt;
-	struct xfs_attr_inactive_list *list;
-	struct xfs_attr_inactive_list *lp;
-	int			error;
-	int			count;
-	int			size;
-	int			tmp;
-	int			i;
-
-	leaf = bp->b_addr;
-	xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
-
-	/*
-	 * Count the number of "remote" value extents.
-	 */
-	count = 0;
-	entry = xfs_attr3_leaf_entryp(leaf);
-	for (i = 0; i < ichdr.count; entry++, i++) {
-		if (be16_to_cpu(entry->nameidx) &&
-		    ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
-			name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
-			if (name_rmt->valueblk)
-				count++;
-		}
-	}
-
-	/*
-	 * If there are no "remote" values, we're done.
-	 */
-	if (count == 0) {
-		xfs_trans_brelse(*trans, bp);
-		return 0;
-	}
-
-	/*
-	 * Allocate storage for a list of all the "remote" value extents.
-	 */
-	size = count * sizeof(xfs_attr_inactive_list_t);
-	list = kmem_alloc(size, KM_SLEEP);
-
-	/*
-	 * Identify each of the "remote" value extents.
-	 */
-	lp = list;
-	entry = xfs_attr3_leaf_entryp(leaf);
-	for (i = 0; i < ichdr.count; entry++, i++) {
-		if (be16_to_cpu(entry->nameidx) &&
-		    ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
-			name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
-			if (name_rmt->valueblk) {
-				lp->valueblk = be32_to_cpu(name_rmt->valueblk);
-				lp->valuelen = xfs_attr3_rmt_blocks(dp->i_mount,
-						    be32_to_cpu(name_rmt->valuelen));
-				lp++;
-			}
-		}
-	}
-	xfs_trans_brelse(*trans, bp);	/* unlock for trans. in freextent() */
-
-	/*
-	 * Invalidate each of the "remote" value extents.
-	 */
-	error = 0;
-	for (lp = list, i = 0; i < count; i++, lp++) {
-		tmp = xfs_attr3_leaf_freextent(trans, dp,
-				lp->valueblk, lp->valuelen);
-
-		if (error == 0)
-			error = tmp;	/* save only the 1st errno */
-	}
-
-	kmem_free(list);
-	return error;
-}
-
-/*
- * Look at all the extents for this logical region,
- * invalidate any buffers that are incore/in transactions.
- */
-STATIC int
-xfs_attr3_leaf_freextent(
-	struct xfs_trans	**trans,
-	struct xfs_inode	*dp,
-	xfs_dablk_t		blkno,
-	int			blkcnt)
-{
-	struct xfs_bmbt_irec	map;
-	struct xfs_buf		*bp;
-	xfs_dablk_t		tblkno;
-	xfs_daddr_t		dblkno;
-	int			tblkcnt;
-	int			dblkcnt;
-	int			nmap;
-	int			error;
-
-	/*
-	 * Roll through the "value", invalidating the attribute value's
-	 * blocks.
-	 */
-	tblkno = blkno;
-	tblkcnt = blkcnt;
-	while (tblkcnt > 0) {
-		/*
-		 * Try to remember where we decided to put the value.
-		 */
-		nmap = 1;
-		error = xfs_bmapi_read(dp, (xfs_fileoff_t)tblkno, tblkcnt,
-				       &map, &nmap, XFS_BMAPI_ATTRFORK);
-		if (error) {
-			return(error);
-		}
-		ASSERT(nmap == 1);
-		ASSERT(map.br_startblock != DELAYSTARTBLOCK);
-
-		/*
-		 * If it's a hole, these are already unmapped
-		 * so there's nothing to invalidate.
-		 */
-		if (map.br_startblock != HOLESTARTBLOCK) {
-
-			dblkno = XFS_FSB_TO_DADDR(dp->i_mount,
-						  map.br_startblock);
-			dblkcnt = XFS_FSB_TO_BB(dp->i_mount,
-						map.br_blockcount);
-			bp = xfs_trans_get_buf(*trans,
-					dp->i_mount->m_ddev_targp,
-					dblkno, dblkcnt, 0);
-			if (!bp)
-				return ENOMEM;
-			xfs_trans_binval(*trans, bp);
-			/*
-			 * Roll to next transaction.
-			 */
-			error = xfs_trans_roll(trans, dp);
-			if (error)
-				return (error);
-		}
-
-		tblkno += map.br_blockcount;
-		tblkcnt -= map.br_blockcount;
-	}
-
-	return(0);
-}
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 444a770..c102213 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -333,6 +333,8 @@
 			struct xfs_buf **bpp);
 void	xfs_attr3_leaf_hdr_from_disk(struct xfs_attr3_icleaf_hdr *to,
 				     struct xfs_attr_leafblock *from);
+void	xfs_attr3_leaf_hdr_to_disk(struct xfs_attr_leafblock *to,
+				   struct xfs_attr3_icleaf_hdr *from);
 
 extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops;
 
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
new file mode 100644
index 0000000..cbc80d4
--- /dev/null
+++ b/fs/xfs/xfs_attr_list.c
@@ -0,0 +1,655 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * Copyright (c) 2013 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_types.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_btree.h"
+#include "xfs_attr_sf.h"
+#include "xfs_attr_remote.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_inode_item.h"
+#include "xfs_bmap.h"
+#include "xfs_attr.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_buf_item.h"
+#include "xfs_cksum.h"
+
+STATIC int
+xfs_attr_shortform_compare(const void *a, const void *b)
+{
+	xfs_attr_sf_sort_t *sa, *sb;
+
+	sa = (xfs_attr_sf_sort_t *)a;
+	sb = (xfs_attr_sf_sort_t *)b;
+	if (sa->hash < sb->hash) {
+		return(-1);
+	} else if (sa->hash > sb->hash) {
+		return(1);
+	} else {
+		return(sa->entno - sb->entno);
+	}
+}
+
+#define XFS_ISRESET_CURSOR(cursor) \
+	(!((cursor)->initted) && !((cursor)->hashval) && \
+	 !((cursor)->blkno) && !((cursor)->offset))
+/*
+ * Copy out entries of shortform attribute lists for attr_list().
+ * Shortform attribute lists are not stored in hashval sorted order.
+ * If the output buffer is not large enough to hold them all, then we
+ * we have to calculate each entries' hashvalue and sort them before
+ * we can begin returning them to the user.
+ */
+int
+xfs_attr_shortform_list(xfs_attr_list_context_t *context)
+{
+	attrlist_cursor_kern_t *cursor;
+	xfs_attr_sf_sort_t *sbuf, *sbp;
+	xfs_attr_shortform_t *sf;
+	xfs_attr_sf_entry_t *sfe;
+	xfs_inode_t *dp;
+	int sbsize, nsbuf, count, i;
+	int error;
+
+	ASSERT(context != NULL);
+	dp = context->dp;
+	ASSERT(dp != NULL);
+	ASSERT(dp->i_afp != NULL);
+	sf = (xfs_attr_shortform_t *)dp->i_afp->if_u1.if_data;
+	ASSERT(sf != NULL);
+	if (!sf->hdr.count)
+		return(0);
+	cursor = context->cursor;
+	ASSERT(cursor != NULL);
+
+	trace_xfs_attr_list_sf(context);
+
+	/*
+	 * If the buffer is large enough and the cursor is at the start,
+	 * do not bother with sorting since we will return everything in
+	 * one buffer and another call using the cursor won't need to be
+	 * made.
+	 * Note the generous fudge factor of 16 overhead bytes per entry.
+	 * If bufsize is zero then put_listent must be a search function
+	 * and can just scan through what we have.
+	 */
+	if (context->bufsize == 0 ||
+	    (XFS_ISRESET_CURSOR(cursor) &&
+             (dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) {
+		for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
+			error = context->put_listent(context,
+					   sfe->flags,
+					   sfe->nameval,
+					   (int)sfe->namelen,
+					   (int)sfe->valuelen,
+					   &sfe->nameval[sfe->namelen]);
+
+			/*
+			 * Either search callback finished early or
+			 * didn't fit it all in the buffer after all.
+			 */
+			if (context->seen_enough)
+				break;
+
+			if (error)
+				return error;
+			sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
+		}
+		trace_xfs_attr_list_sf_all(context);
+		return(0);
+	}
+
+	/* do no more for a search callback */
+	if (context->bufsize == 0)
+		return 0;
+
+	/*
+	 * It didn't all fit, so we have to sort everything on hashval.
+	 */
+	sbsize = sf->hdr.count * sizeof(*sbuf);
+	sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP | KM_NOFS);
+
+	/*
+	 * Scan the attribute list for the rest of the entries, storing
+	 * the relevant info from only those that match into a buffer.
+	 */
+	nsbuf = 0;
+	for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
+		if (unlikely(
+		    ((char *)sfe < (char *)sf) ||
+		    ((char *)sfe >= ((char *)sf + dp->i_afp->if_bytes)))) {
+			XFS_CORRUPTION_ERROR("xfs_attr_shortform_list",
+					     XFS_ERRLEVEL_LOW,
+					     context->dp->i_mount, sfe);
+			kmem_free(sbuf);
+			return XFS_ERROR(EFSCORRUPTED);
+		}
+
+		sbp->entno = i;
+		sbp->hash = xfs_da_hashname(sfe->nameval, sfe->namelen);
+		sbp->name = sfe->nameval;
+		sbp->namelen = sfe->namelen;
+		/* These are bytes, and both on-disk, don't endian-flip */
+		sbp->valuelen = sfe->valuelen;
+		sbp->flags = sfe->flags;
+		sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
+		sbp++;
+		nsbuf++;
+	}
+
+	/*
+	 * Sort the entries on hash then entno.
+	 */
+	xfs_sort(sbuf, nsbuf, sizeof(*sbuf), xfs_attr_shortform_compare);
+
+	/*
+	 * Re-find our place IN THE SORTED LIST.
+	 */
+	count = 0;
+	cursor->initted = 1;
+	cursor->blkno = 0;
+	for (sbp = sbuf, i = 0; i < nsbuf; i++, sbp++) {
+		if (sbp->hash == cursor->hashval) {
+			if (cursor->offset == count) {
+				break;
+			}
+			count++;
+		} else if (sbp->hash > cursor->hashval) {
+			break;
+		}
+	}
+	if (i == nsbuf) {
+		kmem_free(sbuf);
+		return(0);
+	}
+
+	/*
+	 * Loop putting entries into the user buffer.
+	 */
+	for ( ; i < nsbuf; i++, sbp++) {
+		if (cursor->hashval != sbp->hash) {
+			cursor->hashval = sbp->hash;
+			cursor->offset = 0;
+		}
+		error = context->put_listent(context,
+					sbp->flags,
+					sbp->name,
+					sbp->namelen,
+					sbp->valuelen,
+					&sbp->name[sbp->namelen]);
+		if (error)
+			return error;
+		if (context->seen_enough)
+			break;
+		cursor->offset++;
+	}
+
+	kmem_free(sbuf);
+	return(0);
+}
+
+STATIC int
+xfs_attr_node_list(xfs_attr_list_context_t *context)
+{
+	attrlist_cursor_kern_t *cursor;
+	xfs_attr_leafblock_t *leaf;
+	xfs_da_intnode_t *node;
+	struct xfs_attr3_icleaf_hdr leafhdr;
+	struct xfs_da3_icnode_hdr nodehdr;
+	struct xfs_da_node_entry *btree;
+	int error, i;
+	struct xfs_buf *bp;
+
+	trace_xfs_attr_node_list(context);
+
+	cursor = context->cursor;
+	cursor->initted = 1;
+
+	/*
+	 * Do all sorts of validation on the passed-in cursor structure.
+	 * If anything is amiss, ignore the cursor and look up the hashval
+	 * starting from the btree root.
+	 */
+	bp = NULL;
+	if (cursor->blkno > 0) {
+		error = xfs_da3_node_read(NULL, context->dp, cursor->blkno, -1,
+					      &bp, XFS_ATTR_FORK);
+		if ((error != 0) && (error != EFSCORRUPTED))
+			return(error);
+		if (bp) {
+			struct xfs_attr_leaf_entry *entries;
+
+			node = bp->b_addr;
+			switch (be16_to_cpu(node->hdr.info.magic)) {
+			case XFS_DA_NODE_MAGIC:
+			case XFS_DA3_NODE_MAGIC:
+				trace_xfs_attr_list_wrong_blk(context);
+				xfs_trans_brelse(NULL, bp);
+				bp = NULL;
+				break;
+			case XFS_ATTR_LEAF_MAGIC:
+			case XFS_ATTR3_LEAF_MAGIC:
+				leaf = bp->b_addr;
+				xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
+				entries = xfs_attr3_leaf_entryp(leaf);
+				if (cursor->hashval > be32_to_cpu(
+						entries[leafhdr.count - 1].hashval)) {
+					trace_xfs_attr_list_wrong_blk(context);
+					xfs_trans_brelse(NULL, bp);
+					bp = NULL;
+				} else if (cursor->hashval <= be32_to_cpu(
+						entries[0].hashval)) {
+					trace_xfs_attr_list_wrong_blk(context);
+					xfs_trans_brelse(NULL, bp);
+					bp = NULL;
+				}
+				break;
+			default:
+				trace_xfs_attr_list_wrong_blk(context);
+				xfs_trans_brelse(NULL, bp);
+				bp = NULL;
+			}
+		}
+	}
+
+	/*
+	 * We did not find what we expected given the cursor's contents,
+	 * so we start from the top and work down based on the hash value.
+	 * Note that start of node block is same as start of leaf block.
+	 */
+	if (bp == NULL) {
+		cursor->blkno = 0;
+		for (;;) {
+			__uint16_t magic;
+
+			error = xfs_da3_node_read(NULL, context->dp,
+						      cursor->blkno, -1, &bp,
+						      XFS_ATTR_FORK);
+			if (error)
+				return(error);
+			node = bp->b_addr;
+			magic = be16_to_cpu(node->hdr.info.magic);
+			if (magic == XFS_ATTR_LEAF_MAGIC ||
+			    magic == XFS_ATTR3_LEAF_MAGIC)
+				break;
+			if (magic != XFS_DA_NODE_MAGIC &&
+			    magic != XFS_DA3_NODE_MAGIC) {
+				XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)",
+						     XFS_ERRLEVEL_LOW,
+						     context->dp->i_mount,
+						     node);
+				xfs_trans_brelse(NULL, bp);
+				return XFS_ERROR(EFSCORRUPTED);
+			}
+
+			xfs_da3_node_hdr_from_disk(&nodehdr, node);
+			btree = xfs_da3_node_tree_p(node);
+			for (i = 0; i < nodehdr.count; btree++, i++) {
+				if (cursor->hashval
+						<= be32_to_cpu(btree->hashval)) {
+					cursor->blkno = be32_to_cpu(btree->before);
+					trace_xfs_attr_list_node_descend(context,
+									 btree);
+					break;
+				}
+			}
+			if (i == nodehdr.count) {
+				xfs_trans_brelse(NULL, bp);
+				return 0;
+			}
+			xfs_trans_brelse(NULL, bp);
+		}
+	}
+	ASSERT(bp != NULL);
+
+	/*
+	 * Roll upward through the blocks, processing each leaf block in
+	 * order.  As long as there is space in the result buffer, keep
+	 * adding the information.
+	 */
+	for (;;) {
+		leaf = bp->b_addr;
+		error = xfs_attr3_leaf_list_int(bp, context);
+		if (error) {
+			xfs_trans_brelse(NULL, bp);
+			return error;
+		}
+		xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
+		if (context->seen_enough || leafhdr.forw == 0)
+			break;
+		cursor->blkno = leafhdr.forw;
+		xfs_trans_brelse(NULL, bp);
+		error = xfs_attr3_leaf_read(NULL, context->dp, cursor->blkno, -1,
+					   &bp);
+		if (error)
+			return error;
+	}
+	xfs_trans_brelse(NULL, bp);
+	return 0;
+}
+
+/*
+ * Copy out attribute list entries for attr_list(), for leaf attribute lists.
+ */
+int
+xfs_attr3_leaf_list_int(
+	struct xfs_buf			*bp,
+	struct xfs_attr_list_context	*context)
+{
+	struct attrlist_cursor_kern	*cursor;
+	struct xfs_attr_leafblock	*leaf;
+	struct xfs_attr3_icleaf_hdr	ichdr;
+	struct xfs_attr_leaf_entry	*entries;
+	struct xfs_attr_leaf_entry	*entry;
+	int				retval;
+	int				i;
+
+	trace_xfs_attr_list_leaf(context);
+
+	leaf = bp->b_addr;
+	xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+	entries = xfs_attr3_leaf_entryp(leaf);
+
+	cursor = context->cursor;
+	cursor->initted = 1;
+
+	/*
+	 * Re-find our place in the leaf block if this is a new syscall.
+	 */
+	if (context->resynch) {
+		entry = &entries[0];
+		for (i = 0; i < ichdr.count; entry++, i++) {
+			if (be32_to_cpu(entry->hashval) == cursor->hashval) {
+				if (cursor->offset == context->dupcnt) {
+					context->dupcnt = 0;
+					break;
+				}
+				context->dupcnt++;
+			} else if (be32_to_cpu(entry->hashval) >
+					cursor->hashval) {
+				context->dupcnt = 0;
+				break;
+			}
+		}
+		if (i == ichdr.count) {
+			trace_xfs_attr_list_notfound(context);
+			return 0;
+		}
+	} else {
+		entry = &entries[0];
+		i = 0;
+	}
+	context->resynch = 0;
+
+	/*
+	 * We have found our place, start copying out the new attributes.
+	 */
+	retval = 0;
+	for (; i < ichdr.count; entry++, i++) {
+		if (be32_to_cpu(entry->hashval) != cursor->hashval) {
+			cursor->hashval = be32_to_cpu(entry->hashval);
+			cursor->offset = 0;
+		}
+
+		if (entry->flags & XFS_ATTR_INCOMPLETE)
+			continue;		/* skip incomplete entries */
+
+		if (entry->flags & XFS_ATTR_LOCAL) {
+			xfs_attr_leaf_name_local_t *name_loc =
+				xfs_attr3_leaf_name_local(leaf, i);
+
+			retval = context->put_listent(context,
+						entry->flags,
+						name_loc->nameval,
+						(int)name_loc->namelen,
+						be16_to_cpu(name_loc->valuelen),
+						&name_loc->nameval[name_loc->namelen]);
+			if (retval)
+				return retval;
+		} else {
+			xfs_attr_leaf_name_remote_t *name_rmt =
+				xfs_attr3_leaf_name_remote(leaf, i);
+
+			int valuelen = be32_to_cpu(name_rmt->valuelen);
+
+			if (context->put_value) {
+				xfs_da_args_t args;
+
+				memset((char *)&args, 0, sizeof(args));
+				args.dp = context->dp;
+				args.whichfork = XFS_ATTR_FORK;
+				args.valuelen = valuelen;
+				args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);
+				args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
+				args.rmtblkcnt = xfs_attr3_rmt_blocks(
+							args.dp->i_mount, valuelen);
+				retval = xfs_attr_rmtval_get(&args);
+				if (retval)
+					return retval;
+				retval = context->put_listent(context,
+						entry->flags,
+						name_rmt->name,
+						(int)name_rmt->namelen,
+						valuelen,
+						args.value);
+				kmem_free(args.value);
+			} else {
+				retval = context->put_listent(context,
+						entry->flags,
+						name_rmt->name,
+						(int)name_rmt->namelen,
+						valuelen,
+						NULL);
+			}
+			if (retval)
+				return retval;
+		}
+		if (context->seen_enough)
+			break;
+		cursor->offset++;
+	}
+	trace_xfs_attr_list_leaf_end(context);
+	return retval;
+}
+
+/*
+ * Copy out attribute entries for attr_list(), for leaf attribute lists.
+ */
+STATIC int
+xfs_attr_leaf_list(xfs_attr_list_context_t *context)
+{
+	int error;
+	struct xfs_buf *bp;
+
+	trace_xfs_attr_leaf_list(context);
+
+	context->cursor->blkno = 0;
+	error = xfs_attr3_leaf_read(NULL, context->dp, 0, -1, &bp);
+	if (error)
+		return XFS_ERROR(error);
+
+	error = xfs_attr3_leaf_list_int(bp, context);
+	xfs_trans_brelse(NULL, bp);
+	return XFS_ERROR(error);
+}
+
+int
+xfs_attr_list_int(
+	xfs_attr_list_context_t *context)
+{
+	int error;
+	xfs_inode_t *dp = context->dp;
+
+	XFS_STATS_INC(xs_attr_list);
+
+	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
+		return EIO;
+
+	xfs_ilock(dp, XFS_ILOCK_SHARED);
+
+	/*
+	 * Decide on what work routines to call based on the inode size.
+	 */
+	if (!xfs_inode_hasattr(dp)) {
+		error = 0;
+	} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
+		error = xfs_attr_shortform_list(context);
+	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
+		error = xfs_attr_leaf_list(context);
+	} else {
+		error = xfs_attr_node_list(context);
+	}
+
+	xfs_iunlock(dp, XFS_ILOCK_SHARED);
+
+	return error;
+}
+
+#define	ATTR_ENTBASESIZE		/* minimum bytes used by an attr */ \
+	(((struct attrlist_ent *) 0)->a_name - (char *) 0)
+#define	ATTR_ENTSIZE(namelen)		/* actual bytes used by an attr */ \
+	((ATTR_ENTBASESIZE + (namelen) + 1 + sizeof(u_int32_t)-1) \
+	 & ~(sizeof(u_int32_t)-1))
+
+/*
+ * Format an attribute and copy it out to the user's buffer.
+ * Take care to check values and protect against them changing later,
+ * we may be reading them directly out of a user buffer.
+ */
+STATIC int
+xfs_attr_put_listent(
+	xfs_attr_list_context_t *context,
+	int		flags,
+	unsigned char	*name,
+	int		namelen,
+	int		valuelen,
+	unsigned char	*value)
+{
+	struct attrlist *alist = (struct attrlist *)context->alist;
+	attrlist_ent_t *aep;
+	int arraytop;
+
+	ASSERT(!(context->flags & ATTR_KERNOVAL));
+	ASSERT(context->count >= 0);
+	ASSERT(context->count < (ATTR_MAX_VALUELEN/8));
+	ASSERT(context->firstu >= sizeof(*alist));
+	ASSERT(context->firstu <= context->bufsize);
+
+	/*
+	 * Only list entries in the right namespace.
+	 */
+	if (((context->flags & ATTR_SECURE) == 0) !=
+	    ((flags & XFS_ATTR_SECURE) == 0))
+		return 0;
+	if (((context->flags & ATTR_ROOT) == 0) !=
+	    ((flags & XFS_ATTR_ROOT) == 0))
+		return 0;
+
+	arraytop = sizeof(*alist) +
+			context->count * sizeof(alist->al_offset[0]);
+	context->firstu -= ATTR_ENTSIZE(namelen);
+	if (context->firstu < arraytop) {
+		trace_xfs_attr_list_full(context);
+		alist->al_more = 1;
+		context->seen_enough = 1;
+		return 1;
+	}
+
+	aep = (attrlist_ent_t *)&context->alist[context->firstu];
+	aep->a_valuelen = valuelen;
+	memcpy(aep->a_name, name, namelen);
+	aep->a_name[namelen] = 0;
+	alist->al_offset[context->count++] = context->firstu;
+	alist->al_count = context->count;
+	trace_xfs_attr_list_add(context);
+	return 0;
+}
+
+/*
+ * Generate a list of extended attribute names and optionally
+ * also value lengths.  Positive return value follows the XFS
+ * convention of being an error, zero or negative return code
+ * is the length of the buffer returned (negated), indicating
+ * success.
+ */
+int
+xfs_attr_list(
+	xfs_inode_t	*dp,
+	char		*buffer,
+	int		bufsize,
+	int		flags,
+	attrlist_cursor_kern_t *cursor)
+{
+	xfs_attr_list_context_t context;
+	struct attrlist *alist;
+	int error;
+
+	/*
+	 * Validate the cursor.
+	 */
+	if (cursor->pad1 || cursor->pad2)
+		return(XFS_ERROR(EINVAL));
+	if ((cursor->initted == 0) &&
+	    (cursor->hashval || cursor->blkno || cursor->offset))
+		return XFS_ERROR(EINVAL);
+
+	/*
+	 * Check for a properly aligned buffer.
+	 */
+	if (((long)buffer) & (sizeof(int)-1))
+		return XFS_ERROR(EFAULT);
+	if (flags & ATTR_KERNOVAL)
+		bufsize = 0;
+
+	/*
+	 * Initialize the output buffer.
+	 */
+	memset(&context, 0, sizeof(context));
+	context.dp = dp;
+	context.cursor = cursor;
+	context.resynch = 1;
+	context.flags = flags;
+	context.alist = buffer;
+	context.bufsize = (bufsize & ~(sizeof(int)-1));  /* align */
+	context.firstu = context.bufsize;
+	context.put_listent = xfs_attr_put_listent;
+
+	alist = (struct attrlist *)context.alist;
+	alist->al_count = 0;
+	alist->al_more = 0;
+	alist->al_offset[0] = context.bufsize;
+
+	error = xfs_attr_list_int(&context);
+	ASSERT(error >= 0);
+	return error;
+}
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c
index ef6b0c1..712a502 100644
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/xfs_attr_remote.c
@@ -22,6 +22,7 @@
 #include "xfs_bit.h"
 #include "xfs_log.h"
 #include "xfs_trans.h"
+#include "xfs_trans_priv.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_mount.h"
@@ -33,6 +34,7 @@
 #include "xfs_alloc.h"
 #include "xfs_inode_item.h"
 #include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
 #include "xfs_attr.h"
 #include "xfs_attr_leaf.h"
 #include "xfs_attr_remote.h"
@@ -237,7 +239,7 @@
 	xfs_ino_t	ino,
 	int		*offset,
 	int		*valuelen,
-	char		**dst)
+	__uint8_t	**dst)
 {
 	char		*src = bp->b_addr;
 	xfs_daddr_t	bno = bp->b_bn;
@@ -249,7 +251,7 @@
 		int hdr_size = 0;
 		int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp));
 
-		byte_cnt = min_t(int, *valuelen, byte_cnt);
+		byte_cnt = min(*valuelen, byte_cnt);
 
 		if (xfs_sb_version_hascrc(&mp->m_sb)) {
 			if (!xfs_attr3_rmt_hdr_ok(mp, src, ino, *offset,
@@ -284,7 +286,7 @@
 	xfs_ino_t	ino,
 	int		*offset,
 	int		*valuelen,
-	char		**src)
+	__uint8_t	**src)
 {
 	char		*dst = bp->b_addr;
 	xfs_daddr_t	bno = bp->b_bn;
@@ -337,7 +339,7 @@
 	struct xfs_mount	*mp = args->dp->i_mount;
 	struct xfs_buf		*bp;
 	xfs_dablk_t		lblkno = args->rmtblkno;
-	char			*dst = args->value;
+	__uint8_t		*dst = args->value;
 	int			valuelen = args->valuelen;
 	int			nmap;
 	int			error;
@@ -401,7 +403,7 @@
 	struct xfs_bmbt_irec	map;
 	xfs_dablk_t		lblkno;
 	xfs_fileoff_t		lfileoff = 0;
-	char			*src = args->value;
+	__uint8_t		*src = args->value;
 	int			blkcnt;
 	int			valuelen;
 	int			nmap;
@@ -543,11 +545,6 @@
 
 	/*
 	 * Roll through the "value", invalidating the attribute value's blocks.
-	 * Note that args->rmtblkcnt is the minimum number of data blocks we'll
-	 * see for a CRC enabled remote attribute. Each extent will have a
-	 * header, and so we may have more blocks than we realise here.  If we
-	 * fail to map the blocks correctly, we'll have problems with the buffer
-	 * lookups.
 	 */
 	lblkno = args->rmtblkno;
 	blkcnt = args->rmtblkcnt;
@@ -628,4 +625,3 @@
 	}
 	return(0);
 }
-
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 05c698c..92b8309 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -17,16 +17,17 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
-#include "xfs_types.h"
+#include "xfs_format.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
 #include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
@@ -39,6 +40,7 @@
 #include "xfs_extfree_item.h"
 #include "xfs_alloc.h"
 #include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_attr_leaf.h"
@@ -46,7 +48,6 @@
 #include "xfs_trans_space.h"
 #include "xfs_buf_item.h"
 #include "xfs_filestream.h"
-#include "xfs_vnodeops.h"
 #include "xfs_trace.h"
 #include "xfs_symlink.h"
 
@@ -108,19 +109,6 @@
 	mp->m_bm_maxlevels[whichfork] = level;
 }
 
-/*
- * Convert the given file system block to a disk block.  We have to treat it
- * differently based on whether the file is a real time file or not, because the
- * bmap code does.
- */
-xfs_daddr_t
-xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
-{
-	return (XFS_IS_REALTIME_INODE(ip) ? \
-		 (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \
-		 XFS_FSB_TO_DADDR((ip)->i_mount, (fsb)));
-}
-
 STATIC int				/* error */
 xfs_bmbt_lookup_eq(
 	struct xfs_btree_cur	*cur,
@@ -263,173 +251,6 @@
 }
 
 /*
- * Extent tree block counting routines.
- */
-
-/*
- * Count leaf blocks given a range of extent records.
- */
-STATIC void
-xfs_bmap_count_leaves(
-	xfs_ifork_t		*ifp,
-	xfs_extnum_t		idx,
-	int			numrecs,
-	int			*count)
-{
-	int		b;
-
-	for (b = 0; b < numrecs; b++) {
-		xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b);
-		*count += xfs_bmbt_get_blockcount(frp);
-	}
-}
-
-/*
- * Count leaf blocks given a range of extent records originally
- * in btree format.
- */
-STATIC void
-xfs_bmap_disk_count_leaves(
-	struct xfs_mount	*mp,
-	struct xfs_btree_block	*block,
-	int			numrecs,
-	int			*count)
-{
-	int		b;
-	xfs_bmbt_rec_t	*frp;
-
-	for (b = 1; b <= numrecs; b++) {
-		frp = XFS_BMBT_REC_ADDR(mp, block, b);
-		*count += xfs_bmbt_disk_get_blockcount(frp);
-	}
-}
-
-/*
- * Recursively walks each level of a btree
- * to count total fsblocks is use.
- */
-STATIC int                                     /* error */
-xfs_bmap_count_tree(
-	xfs_mount_t     *mp,            /* file system mount point */
-	xfs_trans_t     *tp,            /* transaction pointer */
-	xfs_ifork_t	*ifp,		/* inode fork pointer */
-	xfs_fsblock_t   blockno,	/* file system block number */
-	int             levelin,	/* level in btree */
-	int		*count)		/* Count of blocks */
-{
-	int			error;
-	xfs_buf_t		*bp, *nbp;
-	int			level = levelin;
-	__be64			*pp;
-	xfs_fsblock_t           bno = blockno;
-	xfs_fsblock_t		nextbno;
-	struct xfs_btree_block	*block, *nextblock;
-	int			numrecs;
-
-	error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF,
-						&xfs_bmbt_buf_ops);
-	if (error)
-		return error;
-	*count += 1;
-	block = XFS_BUF_TO_BLOCK(bp);
-
-	if (--level) {
-		/* Not at node above leaves, count this level of nodes */
-		nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
-		while (nextbno != NULLFSBLOCK) {
-			error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp,
-						XFS_BMAP_BTREE_REF,
-						&xfs_bmbt_buf_ops);
-			if (error)
-				return error;
-			*count += 1;
-			nextblock = XFS_BUF_TO_BLOCK(nbp);
-			nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib);
-			xfs_trans_brelse(tp, nbp);
-		}
-
-		/* Dive to the next level */
-		pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
-		bno = be64_to_cpu(*pp);
-		if (unlikely((error =
-		     xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
-			xfs_trans_brelse(tp, bp);
-			XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
-					 XFS_ERRLEVEL_LOW, mp);
-			return XFS_ERROR(EFSCORRUPTED);
-		}
-		xfs_trans_brelse(tp, bp);
-	} else {
-		/* count all level 1 nodes and their leaves */
-		for (;;) {
-			nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
-			numrecs = be16_to_cpu(block->bb_numrecs);
-			xfs_bmap_disk_count_leaves(mp, block, numrecs, count);
-			xfs_trans_brelse(tp, bp);
-			if (nextbno == NULLFSBLOCK)
-				break;
-			bno = nextbno;
-			error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
-						XFS_BMAP_BTREE_REF,
-						&xfs_bmbt_buf_ops);
-			if (error)
-				return error;
-			*count += 1;
-			block = XFS_BUF_TO_BLOCK(bp);
-		}
-	}
-	return 0;
-}
-
-/*
- * Count fsblocks of the given fork.
- */
-int						/* error */
-xfs_bmap_count_blocks(
-	xfs_trans_t		*tp,		/* transaction pointer */
-	xfs_inode_t		*ip,		/* incore inode */
-	int			whichfork,	/* data or attr fork */
-	int			*count)		/* out: count of blocks */
-{
-	struct xfs_btree_block	*block;	/* current btree block */
-	xfs_fsblock_t		bno;	/* block # of "block" */
-	xfs_ifork_t		*ifp;	/* fork structure */
-	int			level;	/* btree level, for checking */
-	xfs_mount_t		*mp;	/* file system mount structure */
-	__be64			*pp;	/* pointer to block address */
-
-	bno = NULLFSBLOCK;
-	mp = ip->i_mount;
-	ifp = XFS_IFORK_PTR(ip, whichfork);
-	if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
-		xfs_bmap_count_leaves(ifp, 0,
-			ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t),
-			count);
-		return 0;
-	}
-
-	/*
-	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
-	 */
-	block = ifp->if_broot;
-	level = be16_to_cpu(block->bb_level);
-	ASSERT(level > 0);
-	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
-	bno = be64_to_cpu(*pp);
-	ASSERT(bno != NULLDFSBNO);
-	ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
-	ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
-
-	if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
-		XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
-				 mp);
-		return XFS_ERROR(EFSCORRUPTED);
-	}
-
-	return 0;
-}
-
-/*
  * Debug/sanity checking code
  */
 
@@ -724,8 +545,8 @@
 
 /*
  * Validate that the bmbt_irecs being returned from bmapi are valid
- * given the callers original parameters.  Specifically check the
- * ranges of the returned irecs to ensure that they only extent beyond
+ * given the caller's original parameters.  Specifically check the
+ * ranges of the returned irecs to ensure that they only extend beyond
  * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
  */
 STATIC void
@@ -823,7 +644,7 @@
  * Remove the entry "free" from the free item list.  Prev points to the
  * previous entry, unless "free" is the head of the list.
  */
-STATIC void
+void
 xfs_bmap_del_free(
 	xfs_bmap_free_t		*flist,	/* free item list header */
 	xfs_bmap_free_item_t	*prev,	/* previous item on list, if any */
@@ -837,92 +658,6 @@
 	kmem_zone_free(xfs_bmap_free_item_zone, free);
 }
 
-
-/*
- * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
- * caller.  Frees all the extents that need freeing, which must be done
- * last due to locking considerations.  We never free any extents in
- * the first transaction.
- *
- * Return 1 if the given transaction was committed and a new one
- * started, and 0 otherwise in the committed parameter.
- */
-int						/* error */
-xfs_bmap_finish(
-	xfs_trans_t		**tp,		/* transaction pointer addr */
-	xfs_bmap_free_t		*flist,		/* i/o: list extents to free */
-	int			*committed)	/* xact committed or not */
-{
-	xfs_efd_log_item_t	*efd;		/* extent free data */
-	xfs_efi_log_item_t	*efi;		/* extent free intention */
-	int			error;		/* error return value */
-	xfs_bmap_free_item_t	*free;		/* free extent item */
-	unsigned int		logres;		/* new log reservation */
-	unsigned int		logcount;	/* new log count */
-	xfs_mount_t		*mp;		/* filesystem mount structure */
-	xfs_bmap_free_item_t	*next;		/* next item on free list */
-	xfs_trans_t		*ntp;		/* new transaction pointer */
-
-	ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
-	if (flist->xbf_count == 0) {
-		*committed = 0;
-		return 0;
-	}
-	ntp = *tp;
-	efi = xfs_trans_get_efi(ntp, flist->xbf_count);
-	for (free = flist->xbf_first; free; free = free->xbfi_next)
-		xfs_trans_log_efi_extent(ntp, efi, free->xbfi_startblock,
-			free->xbfi_blockcount);
-	logres = ntp->t_log_res;
-	logcount = ntp->t_log_count;
-	ntp = xfs_trans_dup(*tp);
-	error = xfs_trans_commit(*tp, 0);
-	*tp = ntp;
-	*committed = 1;
-	/*
-	 * We have a new transaction, so we should return committed=1,
-	 * even though we're returning an error.
-	 */
-	if (error)
-		return error;
-
-	/*
-	 * transaction commit worked ok so we can drop the extra ticket
-	 * reference that we gained in xfs_trans_dup()
-	 */
-	xfs_log_ticket_put(ntp->t_ticket);
-
-	if ((error = xfs_trans_reserve(ntp, 0, logres, 0, XFS_TRANS_PERM_LOG_RES,
-			logcount)))
-		return error;
-	efd = xfs_trans_get_efd(ntp, efi, flist->xbf_count);
-	for (free = flist->xbf_first; free != NULL; free = next) {
-		next = free->xbfi_next;
-		if ((error = xfs_free_extent(ntp, free->xbfi_startblock,
-				free->xbfi_blockcount))) {
-			/*
-			 * The bmap free list will be cleaned up at a
-			 * higher level.  The EFI will be canceled when
-			 * this transaction is aborted.
-			 * Need to force shutdown here to make sure it
-			 * happens, since this transaction may not be
-			 * dirty yet.
-			 */
-			mp = ntp->t_mountp;
-			if (!XFS_FORCED_SHUTDOWN(mp))
-				xfs_force_shutdown(mp,
-						   (error == EFSCORRUPTED) ?
-						   SHUTDOWN_CORRUPT_INCORE :
-						   SHUTDOWN_META_IO_ERROR);
-			return error;
-		}
-		xfs_trans_log_efd_extent(ntp, efd, free->xbfi_startblock,
-			free->xbfi_blockcount);
-		xfs_bmap_del_free(flist, NULL, free);
-	}
-	return 0;
-}
-
 /*
  * Free up any items left in the list.
  */
@@ -1413,8 +1148,8 @@
 	blks = XFS_ADDAFORK_SPACE_RES(mp);
 	if (rsvd)
 		tp->t_flags |= XFS_TRANS_RESERVE;
-	if ((error = xfs_trans_reserve(tp, blks, XFS_ADDAFORK_LOG_RES(mp), 0,
-			XFS_TRANS_PERM_LOG_RES, XFS_ADDAFORK_LOG_COUNT)))
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0);
+	if (error)
 		goto error0;
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
@@ -1815,7 +1550,7 @@
 }
 
 /*
- * Returns the file-relative block number of the last block + 1 before
+ * Returns the file-relative block number of the last block - 1 before
  * last_block (input value) in the file.
  * This is not based on i_size, it is based on the extent records.
  * Returns 0 for local files, as they do not have extent records.
@@ -1863,7 +1598,7 @@
 	return 0;
 }
 
-STATIC int
+int
 xfs_bmap_last_extent(
 	struct xfs_trans	*tp,
 	struct xfs_inode	*ip,
@@ -1927,29 +1662,6 @@
 }
 
 /*
- * Check if the endoff is outside the last extent. If so the caller will grow
- * the allocation to a stripe unit boundary.  All offsets are considered outside
- * the end of file for an empty fork, so 1 is returned in *eof in that case.
- */
-int
-xfs_bmap_eof(
-	struct xfs_inode	*ip,
-	xfs_fileoff_t		endoff,
-	int			whichfork,
-	int			*eof)
-{
-	struct xfs_bmbt_irec	rec;
-	int			error;
-
-	error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof);
-	if (error || *eof)
-		return error;
-
-	*eof = endoff >= rec.br_startoff + rec.br_blockcount;
-	return 0;
-}
-
-/*
  * Returns the file-relative block number of the first block past eof in
  * the file.  This is not based on i_size, it is based on the extent records.
  * Returns 0 for local files, as they do not have extent records.
@@ -3488,7 +3200,7 @@
 /*
  * Adjust the size of the new extent based on di_extsize and rt extsize.
  */
-STATIC int
+int
 xfs_bmap_extsize_align(
 	xfs_mount_t	*mp,
 	xfs_bmbt_irec_t	*gotp,		/* next extent pointer */
@@ -3650,9 +3362,9 @@
 
 #define XFS_ALLOC_GAP_UNITS	4
 
-STATIC void
+void
 xfs_bmap_adjacent(
-	xfs_bmalloca_t	*ap)		/* bmap alloc argument struct */
+	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
 {
 	xfs_fsblock_t	adjust;		/* adjustment to block numbers */
 	xfs_agnumber_t	fb_agno;	/* ag number of ap->firstblock */
@@ -3799,109 +3511,6 @@
 }
 
 STATIC int
-xfs_bmap_rtalloc(
-	xfs_bmalloca_t	*ap)		/* bmap alloc argument struct */
-{
-	xfs_alloctype_t	atype = 0;	/* type for allocation routines */
-	int		error;		/* error return value */
-	xfs_mount_t	*mp;		/* mount point structure */
-	xfs_extlen_t	prod = 0;	/* product factor for allocators */
-	xfs_extlen_t	ralen = 0;	/* realtime allocation length */
-	xfs_extlen_t	align;		/* minimum allocation alignment */
-	xfs_rtblock_t	rtb;
-
-	mp = ap->ip->i_mount;
-	align = xfs_get_extsz_hint(ap->ip);
-	prod = align / mp->m_sb.sb_rextsize;
-	error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
-					align, 1, ap->eof, 0,
-					ap->conv, &ap->offset, &ap->length);
-	if (error)
-		return error;
-	ASSERT(ap->length);
-	ASSERT(ap->length % mp->m_sb.sb_rextsize == 0);
-
-	/*
-	 * If the offset & length are not perfectly aligned
-	 * then kill prod, it will just get us in trouble.
-	 */
-	if (do_mod(ap->offset, align) || ap->length % align)
-		prod = 1;
-	/*
-	 * Set ralen to be the actual requested length in rtextents.
-	 */
-	ralen = ap->length / mp->m_sb.sb_rextsize;
-	/*
-	 * If the old value was close enough to MAXEXTLEN that
-	 * we rounded up to it, cut it back so it's valid again.
-	 * Note that if it's a really large request (bigger than
-	 * MAXEXTLEN), we don't hear about that number, and can't
-	 * adjust the starting point to match it.
-	 */
-	if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN)
-		ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
-
-	/*
-	 * Lock out other modifications to the RT bitmap inode.
-	 */
-	xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
-	xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
-
-	/*
-	 * If it's an allocation to an empty file at offset 0,
-	 * pick an extent that will space things out in the rt area.
-	 */
-	if (ap->eof && ap->offset == 0) {
-		xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */
-
-		error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
-		if (error)
-			return error;
-		ap->blkno = rtx * mp->m_sb.sb_rextsize;
-	} else {
-		ap->blkno = 0;
-	}
-
-	xfs_bmap_adjacent(ap);
-
-	/*
-	 * Realtime allocation, done through xfs_rtallocate_extent.
-	 */
-	atype = ap->blkno == 0 ?  XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO;
-	do_div(ap->blkno, mp->m_sb.sb_rextsize);
-	rtb = ap->blkno;
-	ap->length = ralen;
-	if ((error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length,
-				&ralen, atype, ap->wasdel, prod, &rtb)))
-		return error;
-	if (rtb == NULLFSBLOCK && prod > 1 &&
-	    (error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1,
-					   ap->length, &ralen, atype,
-					   ap->wasdel, 1, &rtb)))
-		return error;
-	ap->blkno = rtb;
-	if (ap->blkno != NULLFSBLOCK) {
-		ap->blkno *= mp->m_sb.sb_rextsize;
-		ralen *= mp->m_sb.sb_rextsize;
-		ap->length = ralen;
-		ap->ip->i_d.di_nblocks += ralen;
-		xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
-		if (ap->wasdel)
-			ap->ip->i_delayed_blks -= ralen;
-		/*
-		 * Adjust the disk quota also. This was reserved
-		 * earlier.
-		 */
-		xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
-			ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
-					XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
-	} else {
-		ap->length = 0;
-	}
-	return 0;
-}
-
-STATIC int
 xfs_bmap_btalloc_nullfb(
 	struct xfs_bmalloca	*ap,
 	struct xfs_alloc_arg	*args,
@@ -4018,7 +3627,7 @@
 
 STATIC int
 xfs_bmap_btalloc(
-	xfs_bmalloca_t	*ap)		/* bmap alloc argument struct */
+	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
 {
 	xfs_mount_t	*mp;		/* mount point structure */
 	xfs_alloctype_t	atype = 0;	/* type for allocation routines */
@@ -4250,7 +3859,7 @@
  */
 STATIC int
 xfs_bmap_alloc(
-	xfs_bmalloca_t	*ap)		/* bmap alloc argument struct */
+	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
 {
 	if (XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata)
 		return xfs_bmap_rtalloc(ap);
@@ -4638,7 +4247,7 @@
 }
 
 
-STATIC int
+int
 __xfs_bmapi_allocate(
 	struct xfs_bmalloca	*bma)
 {
@@ -4648,12 +4257,9 @@
 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(bma->ip, whichfork);
 	int			tmp_logflags = 0;
 	int			error;
-	int			rt;
 
 	ASSERT(bma->length > 0);
 
-	rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(bma->ip);
-
 	/*
 	 * For the wasdelay case, we could also just allocate the stuff asked
 	 * for in this bmap call but that wouldn't be as good.
@@ -4756,45 +4362,6 @@
 	return 0;
 }
 
-static void
-xfs_bmapi_allocate_worker(
-	struct work_struct	*work)
-{
-	struct xfs_bmalloca	*args = container_of(work,
-						struct xfs_bmalloca, work);
-	unsigned long		pflags;
-
-	/* we are in a transaction context here */
-	current_set_flags_nested(&pflags, PF_FSTRANS);
-
-	args->result = __xfs_bmapi_allocate(args);
-	complete(args->done);
-
-	current_restore_flags_nested(&pflags, PF_FSTRANS);
-}
-
-/*
- * Some allocation requests often come in with little stack to work on. Push
- * them off to a worker thread so there is lots of stack to use. Otherwise just
- * call directly to avoid the context switch overhead here.
- */
-int
-xfs_bmapi_allocate(
-	struct xfs_bmalloca	*args)
-{
-	DECLARE_COMPLETION_ONSTACK(done);
-
-	if (!args->stack_switch)
-		return __xfs_bmapi_allocate(args);
-
-
-	args->done = &done;
-	INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
-	queue_work(xfs_alloc_wq, &args->work);
-	wait_for_completion(&done);
-	return args->result;
-}
-
 STATIC int
 xfs_bmapi_convert_unwritten(
 	struct xfs_bmalloca	*bma,
@@ -5789,359 +5356,3 @@
 	}
 	return error;
 }
-
-/*
- * returns 1 for success, 0 if we failed to map the extent.
- */
-STATIC int
-xfs_getbmapx_fix_eof_hole(
-	xfs_inode_t		*ip,		/* xfs incore inode pointer */
-	struct getbmapx		*out,		/* output structure */
-	int			prealloced,	/* this is a file with
-						 * preallocated data space */
-	__int64_t		end,		/* last block requested */
-	xfs_fsblock_t		startblock)
-{
-	__int64_t		fixlen;
-	xfs_mount_t		*mp;		/* file system mount point */
-	xfs_ifork_t		*ifp;		/* inode fork pointer */
-	xfs_extnum_t		lastx;		/* last extent pointer */
-	xfs_fileoff_t		fileblock;
-
-	if (startblock == HOLESTARTBLOCK) {
-		mp = ip->i_mount;
-		out->bmv_block = -1;
-		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
-		fixlen -= out->bmv_offset;
-		if (prealloced && out->bmv_offset + out->bmv_length == end) {
-			/* Came to hole at EOF. Trim it. */
-			if (fixlen <= 0)
-				return 0;
-			out->bmv_length = fixlen;
-		}
-	} else {
-		if (startblock == DELAYSTARTBLOCK)
-			out->bmv_block = -2;
-		else
-			out->bmv_block = xfs_fsb_to_db(ip, startblock);
-		fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset);
-		ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
-		if (xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
-		   (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1))
-			out->bmv_oflags |= BMV_OF_LAST;
-	}
-
-	return 1;
-}
-
-/*
- * Get inode's extents as described in bmv, and format for output.
- * Calls formatter to fill the user's buffer until all extents
- * are mapped, until the passed-in bmv->bmv_count slots have
- * been filled, or until the formatter short-circuits the loop,
- * if it is tracking filled-in extents on its own.
- */
-int						/* error code */
-xfs_getbmap(
-	xfs_inode_t		*ip,
-	struct getbmapx		*bmv,		/* user bmap structure */
-	xfs_bmap_format_t	formatter,	/* format to user */
-	void			*arg)		/* formatter arg */
-{
-	__int64_t		bmvend;		/* last block requested */
-	int			error = 0;	/* return value */
-	__int64_t		fixlen;		/* length for -1 case */
-	int			i;		/* extent number */
-	int			lock;		/* lock state */
-	xfs_bmbt_irec_t		*map;		/* buffer for user's data */
-	xfs_mount_t		*mp;		/* file system mount point */
-	int			nex;		/* # of user extents can do */
-	int			nexleft;	/* # of user extents left */
-	int			subnex;		/* # of bmapi's can do */
-	int			nmap;		/* number of map entries */
-	struct getbmapx		*out;		/* output structure */
-	int			whichfork;	/* data or attr fork */
-	int			prealloced;	/* this is a file with
-						 * preallocated data space */
-	int			iflags;		/* interface flags */
-	int			bmapi_flags;	/* flags for xfs_bmapi */
-	int			cur_ext = 0;
-
-	mp = ip->i_mount;
-	iflags = bmv->bmv_iflags;
-	whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK;
-
-	if (whichfork == XFS_ATTR_FORK) {
-		if (XFS_IFORK_Q(ip)) {
-			if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
-			    ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
-			    ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
-				return XFS_ERROR(EINVAL);
-		} else if (unlikely(
-			   ip->i_d.di_aformat != 0 &&
-			   ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
-			XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
-					 ip->i_mount);
-			return XFS_ERROR(EFSCORRUPTED);
-		}
-
-		prealloced = 0;
-		fixlen = 1LL << 32;
-	} else {
-		if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
-		    ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
-		    ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
-			return XFS_ERROR(EINVAL);
-
-		if (xfs_get_extsz_hint(ip) ||
-		    ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
-			prealloced = 1;
-			fixlen = mp->m_super->s_maxbytes;
-		} else {
-			prealloced = 0;
-			fixlen = XFS_ISIZE(ip);
-		}
-	}
-
-	if (bmv->bmv_length == -1) {
-		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
-		bmv->bmv_length =
-			max_t(__int64_t, fixlen - bmv->bmv_offset, 0);
-	} else if (bmv->bmv_length == 0) {
-		bmv->bmv_entries = 0;
-		return 0;
-	} else if (bmv->bmv_length < 0) {
-		return XFS_ERROR(EINVAL);
-	}
-
-	nex = bmv->bmv_count - 1;
-	if (nex <= 0)
-		return XFS_ERROR(EINVAL);
-	bmvend = bmv->bmv_offset + bmv->bmv_length;
-
-
-	if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
-		return XFS_ERROR(ENOMEM);
-	out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL);
-	if (!out) {
-		out = kmem_zalloc_large(bmv->bmv_count *
-					sizeof(struct getbmapx));
-		if (!out)
-			return XFS_ERROR(ENOMEM);
-	}
-
-	xfs_ilock(ip, XFS_IOLOCK_SHARED);
-	if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
-		if (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size) {
-			error = -filemap_write_and_wait(VFS_I(ip)->i_mapping);
-			if (error)
-				goto out_unlock_iolock;
-		}
-		/*
-		 * even after flushing the inode, there can still be delalloc
-		 * blocks on the inode beyond EOF due to speculative
-		 * preallocation. These are not removed until the release
-		 * function is called or the inode is inactivated. Hence we
-		 * cannot assert here that ip->i_delayed_blks == 0.
-		 */
-	}
-
-	lock = xfs_ilock_map_shared(ip);
-
-	/*
-	 * Don't let nex be bigger than the number of extents
-	 * we can have assuming alternating holes and real extents.
-	 */
-	if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1)
-		nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
-
-	bmapi_flags = xfs_bmapi_aflag(whichfork);
-	if (!(iflags & BMV_IF_PREALLOC))
-		bmapi_flags |= XFS_BMAPI_IGSTATE;
-
-	/*
-	 * Allocate enough space to handle "subnex" maps at a time.
-	 */
-	error = ENOMEM;
-	subnex = 16;
-	map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
-	if (!map)
-		goto out_unlock_ilock;
-
-	bmv->bmv_entries = 0;
-
-	if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 &&
-	    (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
-		error = 0;
-		goto out_free_map;
-	}
-
-	nexleft = nex;
-
-	do {
-		nmap = (nexleft > subnex) ? subnex : nexleft;
-		error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
-				       XFS_BB_TO_FSB(mp, bmv->bmv_length),
-				       map, &nmap, bmapi_flags);
-		if (error)
-			goto out_free_map;
-		ASSERT(nmap <= subnex);
-
-		for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) {
-			out[cur_ext].bmv_oflags = 0;
-			if (map[i].br_state == XFS_EXT_UNWRITTEN)
-				out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
-			else if (map[i].br_startblock == DELAYSTARTBLOCK)
-				out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC;
-			out[cur_ext].bmv_offset =
-				XFS_FSB_TO_BB(mp, map[i].br_startoff);
-			out[cur_ext].bmv_length =
-				XFS_FSB_TO_BB(mp, map[i].br_blockcount);
-			out[cur_ext].bmv_unused1 = 0;
-			out[cur_ext].bmv_unused2 = 0;
-
-			/*
-			 * delayed allocation extents that start beyond EOF can
-			 * occur due to speculative EOF allocation when the
-			 * delalloc extent is larger than the largest freespace
-			 * extent at conversion time. These extents cannot be
-			 * converted by data writeback, so can exist here even
-			 * if we are not supposed to be finding delalloc
-			 * extents.
-			 */
-			if (map[i].br_startblock == DELAYSTARTBLOCK &&
-			    map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
-				ASSERT((iflags & BMV_IF_DELALLOC) != 0);
-
-                        if (map[i].br_startblock == HOLESTARTBLOCK &&
-			    whichfork == XFS_ATTR_FORK) {
-				/* came to the end of attribute fork */
-				out[cur_ext].bmv_oflags |= BMV_OF_LAST;
-				goto out_free_map;
-			}
-
-			if (!xfs_getbmapx_fix_eof_hole(ip, &out[cur_ext],
-					prealloced, bmvend,
-					map[i].br_startblock))
-				goto out_free_map;
-
-			bmv->bmv_offset =
-				out[cur_ext].bmv_offset +
-				out[cur_ext].bmv_length;
-			bmv->bmv_length =
-				max_t(__int64_t, 0, bmvend - bmv->bmv_offset);
-
-			/*
-			 * In case we don't want to return the hole,
-			 * don't increase cur_ext so that we can reuse
-			 * it in the next loop.
-			 */
-			if ((iflags & BMV_IF_NO_HOLES) &&
-			    map[i].br_startblock == HOLESTARTBLOCK) {
-				memset(&out[cur_ext], 0, sizeof(out[cur_ext]));
-				continue;
-			}
-
-			nexleft--;
-			bmv->bmv_entries++;
-			cur_ext++;
-		}
-	} while (nmap && nexleft && bmv->bmv_length);
-
- out_free_map:
-	kmem_free(map);
- out_unlock_ilock:
-	xfs_iunlock_map_shared(ip, lock);
- out_unlock_iolock:
-	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-
-	for (i = 0; i < cur_ext; i++) {
-		int full = 0;	/* user array is full */
-
-		/* format results & advance arg */
-		error = formatter(&arg, &out[i], &full);
-		if (error || full)
-			break;
-	}
-
-	if (is_vmalloc_addr(out))
-		kmem_free_large(out);
-	else
-		kmem_free(out);
-	return error;
-}
-
-/*
- * dead simple method of punching delalyed allocation blocks from a range in
- * the inode. Walks a block at a time so will be slow, but is only executed in
- * rare error cases so the overhead is not critical. This will alays punch out
- * both the start and end blocks, even if the ranges only partially overlap
- * them, so it is up to the caller to ensure that partial blocks are not
- * passed in.
- */
-int
-xfs_bmap_punch_delalloc_range(
-	struct xfs_inode	*ip,
-	xfs_fileoff_t		start_fsb,
-	xfs_fileoff_t		length)
-{
-	xfs_fileoff_t		remaining = length;
-	int			error = 0;
-
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-
-	do {
-		int		done;
-		xfs_bmbt_irec_t	imap;
-		int		nimaps = 1;
-		xfs_fsblock_t	firstblock;
-		xfs_bmap_free_t flist;
-
-		/*
-		 * Map the range first and check that it is a delalloc extent
-		 * before trying to unmap the range. Otherwise we will be
-		 * trying to remove a real extent (which requires a
-		 * transaction) or a hole, which is probably a bad idea...
-		 */
-		error = xfs_bmapi_read(ip, start_fsb, 1, &imap, &nimaps,
-				       XFS_BMAPI_ENTIRE);
-
-		if (error) {
-			/* something screwed, just bail */
-			if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-				xfs_alert(ip->i_mount,
-			"Failed delalloc mapping lookup ino %lld fsb %lld.",
-						ip->i_ino, start_fsb);
-			}
-			break;
-		}
-		if (!nimaps) {
-			/* nothing there */
-			goto next_block;
-		}
-		if (imap.br_startblock != DELAYSTARTBLOCK) {
-			/* been converted, ignore */
-			goto next_block;
-		}
-		WARN_ON(imap.br_blockcount == 0);
-
-		/*
-		 * Note: while we initialise the firstblock/flist pair, they
-		 * should never be used because blocks should never be
-		 * allocated or freed for a delalloc extent and hence we need
-		 * don't cancel or finish them after the xfs_bunmapi() call.
-		 */
-		xfs_bmap_init(&flist, &firstblock);
-		error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock,
-					&flist, &done);
-		if (error)
-			break;
-
-		ASSERT(!flist.xbf_count && !flist.xbf_first);
-next_block:
-		start_fsb++;
-		remaining--;
-	} while(remaining > 0);
-
-	return error;
-}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 1cf1292..33b41f3 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -108,41 +108,6 @@
 }
 
 /*
- * Argument structure for xfs_bmap_alloc.
- */
-typedef struct xfs_bmalloca {
-	xfs_fsblock_t		*firstblock; /* i/o first block allocated */
-	struct xfs_bmap_free	*flist;	/* bmap freelist */
-	struct xfs_trans	*tp;	/* transaction pointer */
-	struct xfs_inode	*ip;	/* incore inode pointer */
-	struct xfs_bmbt_irec	prev;	/* extent before the new one */
-	struct xfs_bmbt_irec	got;	/* extent after, or delayed */
-
-	xfs_fileoff_t		offset;	/* offset in file filling in */
-	xfs_extlen_t		length;	/* i/o length asked/allocated */
-	xfs_fsblock_t		blkno;	/* starting block of new extent */
-
-	struct xfs_btree_cur	*cur;	/* btree cursor */
-	xfs_extnum_t		idx;	/* current extent index */
-	int			nallocs;/* number of extents alloc'd */
-	int			logflags;/* flags for transaction logging */
-
-	xfs_extlen_t		total;	/* total blocks needed for xaction */
-	xfs_extlen_t		minlen;	/* minimum allocation size (blocks) */
-	xfs_extlen_t		minleft; /* amount must be left after alloc */
-	char			eof;	/* set if allocating past last extent */
-	char			wasdel;	/* replacing a delayed allocation */
-	char			userdata;/* set if is user data */
-	char			aeof;	/* allocated space at eof */
-	char			conv;	/* overwriting unwritten extents */
-	char			stack_switch;
-	int			flags;
-	struct completion	*done;
-	struct work_struct	work;
-	int			result;
-} xfs_bmalloca_t;
-
-/*
  * Flags for xfs_bmap_add_extent*.
  */
 #define BMAP_LEFT_CONTIG	(1 << 0)
@@ -162,7 +127,7 @@
 	{ BMAP_RIGHT_FILLING,	"RF" }, \
 	{ BMAP_ATTRFORK,	"ATTR" }
 
-#if defined(__KERNEL) && defined(DEBUG)
+#ifdef DEBUG
 void	xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
 		int whichfork, unsigned long caller_ip);
 #define	XFS_BMAP_TRACE_EXLIST(ip,c,w)	\
@@ -205,23 +170,4 @@
 		xfs_extnum_t num);
 uint	xfs_default_attroffset(struct xfs_inode *ip);
 
-#ifdef __KERNEL__
-/* bmap to userspace formatter - copy to user & advance pointer */
-typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *, int *);
-
-int	xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
-		int *committed);
-int	xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv,
-		xfs_bmap_format_t formatter, void *arg);
-int	xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
-		int whichfork, int *eof);
-int	xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
-		int whichfork, int *count);
-int	xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
-		xfs_fileoff_t start_fsb, xfs_fileoff_t length);
-
-xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb);
-
-#endif	/* __KERNEL__ */
-
 #endif	/* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 0c61a22..cf3bc76 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -17,7 +17,7 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
-#include "xfs_types.h"
+#include "xfs_format.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
 #include "xfs_trans.h"
@@ -722,7 +722,7 @@
 				      cur->bc_rec.b.br_startoff;
 }
 
-static int
+static bool
 xfs_bmbt_verify(
 	struct xfs_buf		*bp)
 {
@@ -775,7 +775,6 @@
 		return false;
 
 	return true;
-
 }
 
 static void
@@ -789,7 +788,6 @@
 				     bp->b_target->bt_mount, bp->b_addr);
 		xfs_buf_ioerror(bp, EFSCORRUPTED);
 	}
-
 }
 
 static void
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
new file mode 100644
index 0000000..541d59f
--- /dev/null
+++ b/fs/xfs/xfs_bmap_util.c
@@ -0,0 +1,2026 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * Copyright (c) 2012 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_format.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_extfree_item.h"
+#include "xfs_alloc.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_quota.h"
+#include "xfs_trans_space.h"
+#include "xfs_trace.h"
+#include "xfs_icache.h"
+
+/* Kernel only BMAP related definitions and functions */
+
+/*
+ * Convert the given file system block to a disk block.  We have to treat it
+ * differently based on whether the file is a real time file or not, because the
+ * bmap code does.
+ */
+xfs_daddr_t
+xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
+{
+	return (XFS_IS_REALTIME_INODE(ip) ? \
+		 (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \
+		 XFS_FSB_TO_DADDR((ip)->i_mount, (fsb)));
+}
+
+/*
+ * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
+ * caller.  Frees all the extents that need freeing, which must be done
+ * last due to locking considerations.  We never free any extents in
+ * the first transaction.
+ *
+ * Return 1 if the given transaction was committed and a new one
+ * started, and 0 otherwise in the committed parameter.
+ */
+int						/* error */
+xfs_bmap_finish(
+	xfs_trans_t		**tp,		/* transaction pointer addr */
+	xfs_bmap_free_t		*flist,		/* i/o: list extents to free */
+	int			*committed)	/* xact committed or not */
+{
+	xfs_efd_log_item_t	*efd;		/* extent free data */
+	xfs_efi_log_item_t	*efi;		/* extent free intention */
+	int			error;		/* error return value */
+	xfs_bmap_free_item_t	*free;		/* free extent item */
+	struct xfs_trans_res	tres;		/* new log reservation */
+	xfs_mount_t		*mp;		/* filesystem mount structure */
+	xfs_bmap_free_item_t	*next;		/* next item on free list */
+	xfs_trans_t		*ntp;		/* new transaction pointer */
+
+	ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
+	if (flist->xbf_count == 0) {
+		*committed = 0;
+		return 0;
+	}
+	ntp = *tp;
+	efi = xfs_trans_get_efi(ntp, flist->xbf_count);
+	for (free = flist->xbf_first; free; free = free->xbfi_next)
+		xfs_trans_log_efi_extent(ntp, efi, free->xbfi_startblock,
+			free->xbfi_blockcount);
+
+	tres.tr_logres = ntp->t_log_res;
+	tres.tr_logcount = ntp->t_log_count;
+	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
+	ntp = xfs_trans_dup(*tp);
+	error = xfs_trans_commit(*tp, 0);
+	*tp = ntp;
+	*committed = 1;
+	/*
+	 * We have a new transaction, so we should return committed=1,
+	 * even though we're returning an error.
+	 */
+	if (error)
+		return error;
+
+	/*
+	 * transaction commit worked ok so we can drop the extra ticket
+	 * reference that we gained in xfs_trans_dup()
+	 */
+	xfs_log_ticket_put(ntp->t_ticket);
+
+	error = xfs_trans_reserve(ntp, &tres, 0, 0);
+	if (error)
+		return error;
+	efd = xfs_trans_get_efd(ntp, efi, flist->xbf_count);
+	for (free = flist->xbf_first; free != NULL; free = next) {
+		next = free->xbfi_next;
+		if ((error = xfs_free_extent(ntp, free->xbfi_startblock,
+				free->xbfi_blockcount))) {
+			/*
+			 * The bmap free list will be cleaned up at a
+			 * higher level.  The EFI will be canceled when
+			 * this transaction is aborted.
+			 * Need to force shutdown here to make sure it
+			 * happens, since this transaction may not be
+			 * dirty yet.
+			 */
+			mp = ntp->t_mountp;
+			if (!XFS_FORCED_SHUTDOWN(mp))
+				xfs_force_shutdown(mp,
+						   (error == EFSCORRUPTED) ?
+						   SHUTDOWN_CORRUPT_INCORE :
+						   SHUTDOWN_META_IO_ERROR);
+			return error;
+		}
+		xfs_trans_log_efd_extent(ntp, efd, free->xbfi_startblock,
+			free->xbfi_blockcount);
+		xfs_bmap_del_free(flist, NULL, free);
+	}
+	return 0;
+}
+
+int
+xfs_bmap_rtalloc(
+	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
+{
+	xfs_alloctype_t	atype = 0;	/* type for allocation routines */
+	int		error;		/* error return value */
+	xfs_mount_t	*mp;		/* mount point structure */
+	xfs_extlen_t	prod = 0;	/* product factor for allocators */
+	xfs_extlen_t	ralen = 0;	/* realtime allocation length */
+	xfs_extlen_t	align;		/* minimum allocation alignment */
+	xfs_rtblock_t	rtb;
+
+	mp = ap->ip->i_mount;
+	align = xfs_get_extsz_hint(ap->ip);
+	prod = align / mp->m_sb.sb_rextsize;
+	error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
+					align, 1, ap->eof, 0,
+					ap->conv, &ap->offset, &ap->length);
+	if (error)
+		return error;
+	ASSERT(ap->length);
+	ASSERT(ap->length % mp->m_sb.sb_rextsize == 0);
+
+	/*
+	 * If the offset & length are not perfectly aligned
+	 * then kill prod, it will just get us in trouble.
+	 */
+	if (do_mod(ap->offset, align) || ap->length % align)
+		prod = 1;
+	/*
+	 * Set ralen to be the actual requested length in rtextents.
+	 */
+	ralen = ap->length / mp->m_sb.sb_rextsize;
+	/*
+	 * If the old value was close enough to MAXEXTLEN that
+	 * we rounded up to it, cut it back so it's valid again.
+	 * Note that if it's a really large request (bigger than
+	 * MAXEXTLEN), we don't hear about that number, and can't
+	 * adjust the starting point to match it.
+	 */
+	if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN)
+		ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
+
+	/*
+	 * Lock out other modifications to the RT bitmap inode.
+	 */
+	xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
+	xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
+
+	/*
+	 * If it's an allocation to an empty file at offset 0,
+	 * pick an extent that will space things out in the rt area.
+	 */
+	if (ap->eof && ap->offset == 0) {
+		xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */
+
+		error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
+		if (error)
+			return error;
+		ap->blkno = rtx * mp->m_sb.sb_rextsize;
+	} else {
+		ap->blkno = 0;
+	}
+
+	xfs_bmap_adjacent(ap);
+
+	/*
+	 * Realtime allocation, done through xfs_rtallocate_extent.
+	 */
+	atype = ap->blkno == 0 ?  XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO;
+	do_div(ap->blkno, mp->m_sb.sb_rextsize);
+	rtb = ap->blkno;
+	ap->length = ralen;
+	if ((error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length,
+				&ralen, atype, ap->wasdel, prod, &rtb)))
+		return error;
+	if (rtb == NULLFSBLOCK && prod > 1 &&
+	    (error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1,
+					   ap->length, &ralen, atype,
+					   ap->wasdel, 1, &rtb)))
+		return error;
+	ap->blkno = rtb;
+	if (ap->blkno != NULLFSBLOCK) {
+		ap->blkno *= mp->m_sb.sb_rextsize;
+		ralen *= mp->m_sb.sb_rextsize;
+		ap->length = ralen;
+		ap->ip->i_d.di_nblocks += ralen;
+		xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
+		if (ap->wasdel)
+			ap->ip->i_delayed_blks -= ralen;
+		/*
+		 * Adjust the disk quota also. This was reserved
+		 * earlier.
+		 */
+		xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
+			ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
+					XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
+	} else {
+		ap->length = 0;
+	}
+	return 0;
+}
+
+/*
+ * Stack switching interfaces for allocation
+ */
+static void
+xfs_bmapi_allocate_worker(
+	struct work_struct	*work)
+{
+	struct xfs_bmalloca	*args = container_of(work,
+						struct xfs_bmalloca, work);
+	unsigned long		pflags;
+
+	/* we are in a transaction context here */
+	current_set_flags_nested(&pflags, PF_FSTRANS);
+
+	args->result = __xfs_bmapi_allocate(args);
+	complete(args->done);
+
+	current_restore_flags_nested(&pflags, PF_FSTRANS);
+}
+
+/*
+ * Some allocation requests often come in with little stack to work on. Push
+ * them off to a worker thread so there is lots of stack to use. Otherwise just
+ * call directly to avoid the context switch overhead here.
+ */
+int
+xfs_bmapi_allocate(
+	struct xfs_bmalloca	*args)
+{
+	DECLARE_COMPLETION_ONSTACK(done);
+
+	if (!args->stack_switch)
+		return __xfs_bmapi_allocate(args);
+
+
+	args->done = &done;
+	INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker);
+	queue_work(xfs_alloc_wq, &args->work);
+	wait_for_completion(&done);
+	return args->result;
+}
+
+/*
+ * Check if the endoff is outside the last extent. If so the caller will grow
+ * the allocation to a stripe unit boundary.  All offsets are considered outside
+ * the end of file for an empty fork, so 1 is returned in *eof in that case.
+ */
+int
+xfs_bmap_eof(
+	struct xfs_inode	*ip,
+	xfs_fileoff_t		endoff,
+	int			whichfork,
+	int			*eof)
+{
+	struct xfs_bmbt_irec	rec;
+	int			error;
+
+	error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof);
+	if (error || *eof)
+		return error;
+
+	*eof = endoff >= rec.br_startoff + rec.br_blockcount;
+	return 0;
+}
+
+/*
+ * Extent tree block counting routines.
+ */
+
+/*
+ * Count leaf blocks given a range of extent records.
+ */
+STATIC void
+xfs_bmap_count_leaves(
+	xfs_ifork_t		*ifp,
+	xfs_extnum_t		idx,
+	int			numrecs,
+	int			*count)
+{
+	int		b;
+
+	for (b = 0; b < numrecs; b++) {
+		xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b);
+		*count += xfs_bmbt_get_blockcount(frp);
+	}
+}
+
+/*
+ * Count leaf blocks given a range of extent records originally
+ * in btree format.
+ */
+STATIC void
+xfs_bmap_disk_count_leaves(
+	struct xfs_mount	*mp,
+	struct xfs_btree_block	*block,
+	int			numrecs,
+	int			*count)
+{
+	int		b;
+	xfs_bmbt_rec_t	*frp;
+
+	for (b = 1; b <= numrecs; b++) {
+		frp = XFS_BMBT_REC_ADDR(mp, block, b);
+		*count += xfs_bmbt_disk_get_blockcount(frp);
+	}
+}
+
+/*
+ * Recursively walks each level of a btree
+ * to count total fsblocks in use.
+ */
+STATIC int                                     /* error */
+xfs_bmap_count_tree(
+	xfs_mount_t     *mp,            /* file system mount point */
+	xfs_trans_t     *tp,            /* transaction pointer */
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_fsblock_t   blockno,	/* file system block number */
+	int             levelin,	/* level in btree */
+	int		*count)		/* Count of blocks */
+{
+	int			error;
+	xfs_buf_t		*bp, *nbp;
+	int			level = levelin;
+	__be64			*pp;
+	xfs_fsblock_t           bno = blockno;
+	xfs_fsblock_t		nextbno;
+	struct xfs_btree_block	*block, *nextblock;
+	int			numrecs;
+
+	error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF,
+						&xfs_bmbt_buf_ops);
+	if (error)
+		return error;
+	*count += 1;
+	block = XFS_BUF_TO_BLOCK(bp);
+
+	if (--level) {
+		/* Not at node above leaves, count this level of nodes */
+		nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
+		while (nextbno != NULLFSBLOCK) {
+			error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp,
+						XFS_BMAP_BTREE_REF,
+						&xfs_bmbt_buf_ops);
+			if (error)
+				return error;
+			*count += 1;
+			nextblock = XFS_BUF_TO_BLOCK(nbp);
+			nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib);
+			xfs_trans_brelse(tp, nbp);
+		}
+
+		/* Dive to the next level */
+		pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
+		bno = be64_to_cpu(*pp);
+		if (unlikely((error =
+		     xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
+			xfs_trans_brelse(tp, bp);
+			XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
+					 XFS_ERRLEVEL_LOW, mp);
+			return XFS_ERROR(EFSCORRUPTED);
+		}
+		xfs_trans_brelse(tp, bp);
+	} else {
+		/* count all level 1 nodes and their leaves */
+		for (;;) {
+			nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
+			numrecs = be16_to_cpu(block->bb_numrecs);
+			xfs_bmap_disk_count_leaves(mp, block, numrecs, count);
+			xfs_trans_brelse(tp, bp);
+			if (nextbno == NULLFSBLOCK)
+				break;
+			bno = nextbno;
+			error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+						XFS_BMAP_BTREE_REF,
+						&xfs_bmbt_buf_ops);
+			if (error)
+				return error;
+			*count += 1;
+			block = XFS_BUF_TO_BLOCK(bp);
+		}
+	}
+	return 0;
+}
+
+/*
+ * Count fsblocks of the given fork.
+ */
+int						/* error */
+xfs_bmap_count_blocks(
+	xfs_trans_t		*tp,		/* transaction pointer */
+	xfs_inode_t		*ip,		/* incore inode */
+	int			whichfork,	/* data or attr fork */
+	int			*count)		/* out: count of blocks */
+{
+	struct xfs_btree_block	*block;	/* current btree block */
+	xfs_fsblock_t		bno;	/* block # of "block" */
+	xfs_ifork_t		*ifp;	/* fork structure */
+	int			level;	/* btree level, for checking */
+	xfs_mount_t		*mp;	/* file system mount structure */
+	__be64			*pp;	/* pointer to block address */
+
+	bno = NULLFSBLOCK;
+	mp = ip->i_mount;
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
+		xfs_bmap_count_leaves(ifp, 0,
+			ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t),
+			count);
+		return 0;
+	}
+
+	/*
+	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
+	 */
+	block = ifp->if_broot;
+	level = be16_to_cpu(block->bb_level);
+	ASSERT(level > 0);
+	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
+	bno = be64_to_cpu(*pp);
+	ASSERT(bno != NULLDFSBNO);
+	ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
+	ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
+
+	if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
+		XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
+				 mp);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+
+	return 0;
+}
+
+/*
+ * returns 1 for success, 0 if we failed to map the extent.
+ */
+STATIC int
+xfs_getbmapx_fix_eof_hole(
+	xfs_inode_t		*ip,		/* xfs incore inode pointer */
+	struct getbmapx		*out,		/* output structure */
+	int			prealloced,	/* this is a file with
+						 * preallocated data space */
+	__int64_t		end,		/* last block requested */
+	xfs_fsblock_t		startblock)
+{
+	__int64_t		fixlen;
+	xfs_mount_t		*mp;		/* file system mount point */
+	xfs_ifork_t		*ifp;		/* inode fork pointer */
+	xfs_extnum_t		lastx;		/* last extent pointer */
+	xfs_fileoff_t		fileblock;
+
+	if (startblock == HOLESTARTBLOCK) {
+		mp = ip->i_mount;
+		out->bmv_block = -1;
+		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
+		fixlen -= out->bmv_offset;
+		if (prealloced && out->bmv_offset + out->bmv_length == end) {
+			/* Came to hole at EOF. Trim it. */
+			if (fixlen <= 0)
+				return 0;
+			out->bmv_length = fixlen;
+		}
+	} else {
+		if (startblock == DELAYSTARTBLOCK)
+			out->bmv_block = -2;
+		else
+			out->bmv_block = xfs_fsb_to_db(ip, startblock);
+		fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset);
+		ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+		if (xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
+		   (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1))
+			out->bmv_oflags |= BMV_OF_LAST;
+	}
+
+	return 1;
+}
+
+/*
+ * Get inode's extents as described in bmv, and format for output.
+ * Calls formatter to fill the user's buffer until all extents
+ * are mapped, until the passed-in bmv->bmv_count slots have
+ * been filled, or until the formatter short-circuits the loop,
+ * if it is tracking filled-in extents on its own.
+ */
+int						/* error code */
+xfs_getbmap(
+	xfs_inode_t		*ip,
+	struct getbmapx		*bmv,		/* user bmap structure */
+	xfs_bmap_format_t	formatter,	/* format to user */
+	void			*arg)		/* formatter arg */
+{
+	__int64_t		bmvend;		/* last block requested */
+	int			error = 0;	/* return value */
+	__int64_t		fixlen;		/* length for -1 case */
+	int			i;		/* extent number */
+	int			lock;		/* lock state */
+	xfs_bmbt_irec_t		*map;		/* buffer for user's data */
+	xfs_mount_t		*mp;		/* file system mount point */
+	int			nex;		/* # of user extents can do */
+	int			nexleft;	/* # of user extents left */
+	int			subnex;		/* # of bmapi's can do */
+	int			nmap;		/* number of map entries */
+	struct getbmapx		*out;		/* output structure */
+	int			whichfork;	/* data or attr fork */
+	int			prealloced;	/* this is a file with
+						 * preallocated data space */
+	int			iflags;		/* interface flags */
+	int			bmapi_flags;	/* flags for xfs_bmapi */
+	int			cur_ext = 0;
+
+	mp = ip->i_mount;
+	iflags = bmv->bmv_iflags;
+	whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK;
+
+	if (whichfork == XFS_ATTR_FORK) {
+		if (XFS_IFORK_Q(ip)) {
+			if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
+			    ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
+			    ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
+				return XFS_ERROR(EINVAL);
+		} else if (unlikely(
+			   ip->i_d.di_aformat != 0 &&
+			   ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
+			XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
+					 ip->i_mount);
+			return XFS_ERROR(EFSCORRUPTED);
+		}
+
+		prealloced = 0;
+		fixlen = 1LL << 32;
+	} else {
+		if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
+		    ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
+		    ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
+			return XFS_ERROR(EINVAL);
+
+		if (xfs_get_extsz_hint(ip) ||
+		    ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
+			prealloced = 1;
+			fixlen = mp->m_super->s_maxbytes;
+		} else {
+			prealloced = 0;
+			fixlen = XFS_ISIZE(ip);
+		}
+	}
+
+	if (bmv->bmv_length == -1) {
+		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
+		bmv->bmv_length =
+			max_t(__int64_t, fixlen - bmv->bmv_offset, 0);
+	} else if (bmv->bmv_length == 0) {
+		bmv->bmv_entries = 0;
+		return 0;
+	} else if (bmv->bmv_length < 0) {
+		return XFS_ERROR(EINVAL);
+	}
+
+	nex = bmv->bmv_count - 1;
+	if (nex <= 0)
+		return XFS_ERROR(EINVAL);
+	bmvend = bmv->bmv_offset + bmv->bmv_length;
+
+
+	if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
+		return XFS_ERROR(ENOMEM);
+	out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL);
+	if (!out) {
+		out = kmem_zalloc_large(bmv->bmv_count *
+					sizeof(struct getbmapx));
+		if (!out)
+			return XFS_ERROR(ENOMEM);
+	}
+
+	xfs_ilock(ip, XFS_IOLOCK_SHARED);
+	if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
+		if (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size) {
+			error = -filemap_write_and_wait(VFS_I(ip)->i_mapping);
+			if (error)
+				goto out_unlock_iolock;
+		}
+		/*
+		 * even after flushing the inode, there can still be delalloc
+		 * blocks on the inode beyond EOF due to speculative
+		 * preallocation. These are not removed until the release
+		 * function is called or the inode is inactivated. Hence we
+		 * cannot assert here that ip->i_delayed_blks == 0.
+		 */
+	}
+
+	lock = xfs_ilock_map_shared(ip);
+
+	/*
+	 * Don't let nex be bigger than the number of extents
+	 * we can have assuming alternating holes and real extents.
+	 */
+	if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1)
+		nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
+
+	bmapi_flags = xfs_bmapi_aflag(whichfork);
+	if (!(iflags & BMV_IF_PREALLOC))
+		bmapi_flags |= XFS_BMAPI_IGSTATE;
+
+	/*
+	 * Allocate enough space to handle "subnex" maps at a time.
+	 */
+	error = ENOMEM;
+	subnex = 16;
+	map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
+	if (!map)
+		goto out_unlock_ilock;
+
+	bmv->bmv_entries = 0;
+
+	if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 &&
+	    (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
+		error = 0;
+		goto out_free_map;
+	}
+
+	nexleft = nex;
+
+	do {
+		nmap = (nexleft > subnex) ? subnex : nexleft;
+		error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
+				       XFS_BB_TO_FSB(mp, bmv->bmv_length),
+				       map, &nmap, bmapi_flags);
+		if (error)
+			goto out_free_map;
+		ASSERT(nmap <= subnex);
+
+		for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) {
+			out[cur_ext].bmv_oflags = 0;
+			if (map[i].br_state == XFS_EXT_UNWRITTEN)
+				out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
+			else if (map[i].br_startblock == DELAYSTARTBLOCK)
+				out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC;
+			out[cur_ext].bmv_offset =
+				XFS_FSB_TO_BB(mp, map[i].br_startoff);
+			out[cur_ext].bmv_length =
+				XFS_FSB_TO_BB(mp, map[i].br_blockcount);
+			out[cur_ext].bmv_unused1 = 0;
+			out[cur_ext].bmv_unused2 = 0;
+
+			/*
+			 * delayed allocation extents that start beyond EOF can
+			 * occur due to speculative EOF allocation when the
+			 * delalloc extent is larger than the largest freespace
+			 * extent at conversion time. These extents cannot be
+			 * converted by data writeback, so can exist here even
+			 * if we are not supposed to be finding delalloc
+			 * extents.
+			 */
+			if (map[i].br_startblock == DELAYSTARTBLOCK &&
+			    map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
+				ASSERT((iflags & BMV_IF_DELALLOC) != 0);
+
+                        if (map[i].br_startblock == HOLESTARTBLOCK &&
+			    whichfork == XFS_ATTR_FORK) {
+				/* came to the end of attribute fork */
+				out[cur_ext].bmv_oflags |= BMV_OF_LAST;
+				goto out_free_map;
+			}
+
+			if (!xfs_getbmapx_fix_eof_hole(ip, &out[cur_ext],
+					prealloced, bmvend,
+					map[i].br_startblock))
+				goto out_free_map;
+
+			bmv->bmv_offset =
+				out[cur_ext].bmv_offset +
+				out[cur_ext].bmv_length;
+			bmv->bmv_length =
+				max_t(__int64_t, 0, bmvend - bmv->bmv_offset);
+
+			/*
+			 * In case we don't want to return the hole,
+			 * don't increase cur_ext so that we can reuse
+			 * it in the next loop.
+			 */
+			if ((iflags & BMV_IF_NO_HOLES) &&
+			    map[i].br_startblock == HOLESTARTBLOCK) {
+				memset(&out[cur_ext], 0, sizeof(out[cur_ext]));
+				continue;
+			}
+
+			nexleft--;
+			bmv->bmv_entries++;
+			cur_ext++;
+		}
+	} while (nmap && nexleft && bmv->bmv_length);
+
+ out_free_map:
+	kmem_free(map);
+ out_unlock_ilock:
+	xfs_iunlock_map_shared(ip, lock);
+ out_unlock_iolock:
+	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+
+	for (i = 0; i < cur_ext; i++) {
+		int full = 0;	/* user array is full */
+
+		/* format results & advance arg */
+		error = formatter(&arg, &out[i], &full);
+		if (error || full)
+			break;
+	}
+
+	if (is_vmalloc_addr(out))
+		kmem_free_large(out);
+	else
+		kmem_free(out);
+	return error;
+}
+
+/*
+ * dead simple method of punching delalyed allocation blocks from a range in
+ * the inode. Walks a block at a time so will be slow, but is only executed in
+ * rare error cases so the overhead is not critical. This will always punch out
+ * both the start and end blocks, even if the ranges only partially overlap
+ * them, so it is up to the caller to ensure that partial blocks are not
+ * passed in.
+ */
+int
+xfs_bmap_punch_delalloc_range(
+	struct xfs_inode	*ip,
+	xfs_fileoff_t		start_fsb,
+	xfs_fileoff_t		length)
+{
+	xfs_fileoff_t		remaining = length;
+	int			error = 0;
+
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
+	do {
+		int		done;
+		xfs_bmbt_irec_t	imap;
+		int		nimaps = 1;
+		xfs_fsblock_t	firstblock;
+		xfs_bmap_free_t flist;
+
+		/*
+		 * Map the range first and check that it is a delalloc extent
+		 * before trying to unmap the range. Otherwise we will be
+		 * trying to remove a real extent (which requires a
+		 * transaction) or a hole, which is probably a bad idea...
+		 */
+		error = xfs_bmapi_read(ip, start_fsb, 1, &imap, &nimaps,
+				       XFS_BMAPI_ENTIRE);
+
+		if (error) {
+			/* something screwed, just bail */
+			if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+				xfs_alert(ip->i_mount,
+			"Failed delalloc mapping lookup ino %lld fsb %lld.",
+						ip->i_ino, start_fsb);
+			}
+			break;
+		}
+		if (!nimaps) {
+			/* nothing there */
+			goto next_block;
+		}
+		if (imap.br_startblock != DELAYSTARTBLOCK) {
+			/* been converted, ignore */
+			goto next_block;
+		}
+		WARN_ON(imap.br_blockcount == 0);
+
+		/*
+		 * Note: while we initialise the firstblock/flist pair, they
+		 * should never be used because blocks should never be
+		 * allocated or freed for a delalloc extent and hence we need
+		 * don't cancel or finish them after the xfs_bunmapi() call.
+		 */
+		xfs_bmap_init(&flist, &firstblock);
+		error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock,
+					&flist, &done);
+		if (error)
+			break;
+
+		ASSERT(!flist.xbf_count && !flist.xbf_first);
+next_block:
+		start_fsb++;
+		remaining--;
+	} while(remaining > 0);
+
+	return error;
+}
+
+/*
+ * Test whether it is appropriate to check an inode for and free post EOF
+ * blocks. The 'force' parameter determines whether we should also consider
+ * regular files that are marked preallocated or append-only.
+ */
+bool
+xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
+{
+	/* prealloc/delalloc exists only on regular files */
+	if (!S_ISREG(ip->i_d.di_mode))
+		return false;
+
+	/*
+	 * Zero sized files with no cached pages and delalloc blocks will not
+	 * have speculative prealloc/delalloc blocks to remove.
+	 */
+	if (VFS_I(ip)->i_size == 0 &&
+	    VN_CACHED(VFS_I(ip)) == 0 &&
+	    ip->i_delayed_blks == 0)
+		return false;
+
+	/* If we haven't read in the extent list, then don't do it now. */
+	if (!(ip->i_df.if_flags & XFS_IFEXTENTS))
+		return false;
+
+	/*
+	 * Do not free real preallocated or append-only files unless the file
+	 * has delalloc blocks and we are forced to remove them.
+	 */
+	if (ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))
+		if (!force || ip->i_delayed_blks == 0)
+			return false;
+
+	return true;
+}
+
+/*
+ * This is called by xfs_inactive to free any blocks beyond eof
+ * when the link count isn't zero and by xfs_dm_punch_hole() when
+ * punching a hole to EOF.
+ */
+int
+xfs_free_eofblocks(
+	xfs_mount_t	*mp,
+	xfs_inode_t	*ip,
+	bool		need_iolock)
+{
+	xfs_trans_t	*tp;
+	int		error;
+	xfs_fileoff_t	end_fsb;
+	xfs_fileoff_t	last_fsb;
+	xfs_filblks_t	map_len;
+	int		nimaps;
+	xfs_bmbt_irec_t	imap;
+
+	/*
+	 * Figure out if there are any blocks beyond the end
+	 * of the file.  If not, then there is nothing to do.
+	 */
+	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
+	last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
+	if (last_fsb <= end_fsb)
+		return 0;
+	map_len = last_fsb - end_fsb;
+
+	nimaps = 1;
+	xfs_ilock(ip, XFS_ILOCK_SHARED);
+	error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0);
+	xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+	if (!error && (nimaps != 0) &&
+	    (imap.br_startblock != HOLESTARTBLOCK ||
+	     ip->i_delayed_blks)) {
+		/*
+		 * Attach the dquots to the inode up front.
+		 */
+		error = xfs_qm_dqattach(ip, 0);
+		if (error)
+			return error;
+
+		/*
+		 * There are blocks after the end of file.
+		 * Free them up now by truncating the file to
+		 * its current size.
+		 */
+		tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
+
+		if (need_iolock) {
+			if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
+				xfs_trans_cancel(tp, 0);
+				return EAGAIN;
+			}
+		}
+
+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
+		if (error) {
+			ASSERT(XFS_FORCED_SHUTDOWN(mp));
+			xfs_trans_cancel(tp, 0);
+			if (need_iolock)
+				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+			return error;
+		}
+
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+		xfs_trans_ijoin(tp, ip, 0);
+
+		/*
+		 * Do not update the on-disk file size.  If we update the
+		 * on-disk file size and then the system crashes before the
+		 * contents of the file are flushed to disk then the files
+		 * may be full of holes (ie NULL files bug).
+		 */
+		error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK,
+					      XFS_ISIZE(ip));
+		if (error) {
+			/*
+			 * If we get an error at this point we simply don't
+			 * bother truncating the file.
+			 */
+			xfs_trans_cancel(tp,
+					 (XFS_TRANS_RELEASE_LOG_RES |
+					  XFS_TRANS_ABORT));
+		} else {
+			error = xfs_trans_commit(tp,
+						XFS_TRANS_RELEASE_LOG_RES);
+			if (!error)
+				xfs_inode_clear_eofblocks_tag(ip);
+		}
+
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		if (need_iolock)
+			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+	}
+	return error;
+}
+
+/*
+ * xfs_alloc_file_space()
+ *      This routine allocates disk space for the given file.
+ *
+ *	If alloc_type == 0, this request is for an ALLOCSP type
+ *	request which will change the file size.  In this case, no
+ *	DMAPI event will be generated by the call.  A TRUNCATE event
+ *	will be generated later by xfs_setattr.
+ *
+ *	If alloc_type != 0, this request is for a RESVSP type
+ *	request, and a DMAPI DM_EVENT_WRITE will be generated if the
+ *	lower block boundary byte address is less than the file's
+ *	length.
+ *
+ * RETURNS:
+ *       0 on success
+ *      errno on error
+ *
+ */
+STATIC int
+xfs_alloc_file_space(
+	xfs_inode_t		*ip,
+	xfs_off_t		offset,
+	xfs_off_t		len,
+	int			alloc_type,
+	int			attr_flags)
+{
+	xfs_mount_t		*mp = ip->i_mount;
+	xfs_off_t		count;
+	xfs_filblks_t		allocated_fsb;
+	xfs_filblks_t		allocatesize_fsb;
+	xfs_extlen_t		extsz, temp;
+	xfs_fileoff_t		startoffset_fsb;
+	xfs_fsblock_t		firstfsb;
+	int			nimaps;
+	int			quota_flag;
+	int			rt;
+	xfs_trans_t		*tp;
+	xfs_bmbt_irec_t		imaps[1], *imapp;
+	xfs_bmap_free_t		free_list;
+	uint			qblocks, resblks, resrtextents;
+	int			committed;
+	int			error;
+
+	trace_xfs_alloc_file_space(ip);
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return XFS_ERROR(EIO);
+
+	error = xfs_qm_dqattach(ip, 0);
+	if (error)
+		return error;
+
+	if (len <= 0)
+		return XFS_ERROR(EINVAL);
+
+	rt = XFS_IS_REALTIME_INODE(ip);
+	extsz = xfs_get_extsz_hint(ip);
+
+	count = len;
+	imapp = &imaps[0];
+	nimaps = 1;
+	startoffset_fsb	= XFS_B_TO_FSBT(mp, offset);
+	allocatesize_fsb = XFS_B_TO_FSB(mp, count);
+
+	/*
+	 * Allocate file space until done or until there is an error
+	 */
+	while (allocatesize_fsb && !error) {
+		xfs_fileoff_t	s, e;
+
+		/*
+		 * Determine space reservations for data/realtime.
+		 */
+		if (unlikely(extsz)) {
+			s = startoffset_fsb;
+			do_div(s, extsz);
+			s *= extsz;
+			e = startoffset_fsb + allocatesize_fsb;
+			if ((temp = do_mod(startoffset_fsb, extsz)))
+				e += temp;
+			if ((temp = do_mod(e, extsz)))
+				e += extsz - temp;
+		} else {
+			s = 0;
+			e = allocatesize_fsb;
+		}
+
+		/*
+		 * The transaction reservation is limited to a 32-bit block
+		 * count, hence we need to limit the number of blocks we are
+		 * trying to reserve to avoid an overflow. We can't allocate
+		 * more than @nimaps extents, and an extent is limited on disk
+		 * to MAXEXTLEN (21 bits), so use that to enforce the limit.
+		 */
+		resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps));
+		if (unlikely(rt)) {
+			resrtextents = qblocks = resblks;
+			resrtextents /= mp->m_sb.sb_rextsize;
+			resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+			quota_flag = XFS_QMOPT_RES_RTBLKS;
+		} else {
+			resrtextents = 0;
+			resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks);
+			quota_flag = XFS_QMOPT_RES_REGBLKS;
+		}
+
+		/*
+		 * Allocate and setup the transaction.
+		 */
+		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
+					  resblks, resrtextents);
+		/*
+		 * Check for running out of space
+		 */
+		if (error) {
+			/*
+			 * Free the transaction structure.
+			 */
+			ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
+			xfs_trans_cancel(tp, 0);
+			break;
+		}
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+		error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks,
+						      0, quota_flag);
+		if (error)
+			goto error1;
+
+		xfs_trans_ijoin(tp, ip, 0);
+
+		xfs_bmap_init(&free_list, &firstfsb);
+		error = xfs_bmapi_write(tp, ip, startoffset_fsb,
+					allocatesize_fsb, alloc_type, &firstfsb,
+					0, imapp, &nimaps, &free_list);
+		if (error) {
+			goto error0;
+		}
+
+		/*
+		 * Complete the transaction
+		 */
+		error = xfs_bmap_finish(&tp, &free_list, &committed);
+		if (error) {
+			goto error0;
+		}
+
+		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		if (error) {
+			break;
+		}
+
+		allocated_fsb = imapp->br_blockcount;
+
+		if (nimaps == 0) {
+			error = XFS_ERROR(ENOSPC);
+			break;
+		}
+
+		startoffset_fsb += allocated_fsb;
+		allocatesize_fsb -= allocated_fsb;
+	}
+
+	return error;
+
+error0:	/* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
+	xfs_bmap_cancel(&free_list);
+	xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
+
+error1:	/* Just cancel transaction */
+	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	return error;
+}
+
+/*
+ * Zero file bytes between startoff and endoff inclusive.
+ * The iolock is held exclusive and no blocks are buffered.
+ *
+ * This function is used by xfs_free_file_space() to zero
+ * partial blocks when the range to free is not block aligned.
+ * When unreserving space with boundaries that are not block
+ * aligned we round up the start and round down the end
+ * boundaries and then use this function to zero the parts of
+ * the blocks that got dropped during the rounding.
+ */
+STATIC int
+xfs_zero_remaining_bytes(
+	xfs_inode_t		*ip,
+	xfs_off_t		startoff,
+	xfs_off_t		endoff)
+{
+	xfs_bmbt_irec_t		imap;
+	xfs_fileoff_t		offset_fsb;
+	xfs_off_t		lastoffset;
+	xfs_off_t		offset;
+	xfs_buf_t		*bp;
+	xfs_mount_t		*mp = ip->i_mount;
+	int			nimap;
+	int			error = 0;
+
+	/*
+	 * Avoid doing I/O beyond eof - it's not necessary
+	 * since nothing can read beyond eof.  The space will
+	 * be zeroed when the file is extended anyway.
+	 */
+	if (startoff >= XFS_ISIZE(ip))
+		return 0;
+
+	if (endoff > XFS_ISIZE(ip))
+		endoff = XFS_ISIZE(ip);
+
+	bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ?
+					mp->m_rtdev_targp : mp->m_ddev_targp,
+				  BTOBB(mp->m_sb.sb_blocksize), 0);
+	if (!bp)
+		return XFS_ERROR(ENOMEM);
+
+	xfs_buf_unlock(bp);
+
+	for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
+		offset_fsb = XFS_B_TO_FSBT(mp, offset);
+		nimap = 1;
+		error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0);
+		if (error || nimap < 1)
+			break;
+		ASSERT(imap.br_blockcount >= 1);
+		ASSERT(imap.br_startoff == offset_fsb);
+		lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1;
+		if (lastoffset > endoff)
+			lastoffset = endoff;
+		if (imap.br_startblock == HOLESTARTBLOCK)
+			continue;
+		ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
+		if (imap.br_state == XFS_EXT_UNWRITTEN)
+			continue;
+		XFS_BUF_UNDONE(bp);
+		XFS_BUF_UNWRITE(bp);
+		XFS_BUF_READ(bp);
+		XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
+		xfsbdstrat(mp, bp);
+		error = xfs_buf_iowait(bp);
+		if (error) {
+			xfs_buf_ioerror_alert(bp,
+					"xfs_zero_remaining_bytes(read)");
+			break;
+		}
+		memset(bp->b_addr +
+			(offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
+		      0, lastoffset - offset + 1);
+		XFS_BUF_UNDONE(bp);
+		XFS_BUF_UNREAD(bp);
+		XFS_BUF_WRITE(bp);
+		xfsbdstrat(mp, bp);
+		error = xfs_buf_iowait(bp);
+		if (error) {
+			xfs_buf_ioerror_alert(bp,
+					"xfs_zero_remaining_bytes(write)");
+			break;
+		}
+	}
+	xfs_buf_free(bp);
+	return error;
+}
+
+/*
+ * xfs_free_file_space()
+ *      This routine frees disk space for the given file.
+ *
+ *	This routine is only called by xfs_change_file_space
+ *	for an UNRESVSP type call.
+ *
+ * RETURNS:
+ *       0 on success
+ *      errno on error
+ *
+ */
+STATIC int
+xfs_free_file_space(
+	xfs_inode_t		*ip,
+	xfs_off_t		offset,
+	xfs_off_t		len,
+	int			attr_flags)
+{
+	int			committed;
+	int			done;
+	xfs_fileoff_t		endoffset_fsb;
+	int			error;
+	xfs_fsblock_t		firstfsb;
+	xfs_bmap_free_t		free_list;
+	xfs_bmbt_irec_t		imap;
+	xfs_off_t		ioffset;
+	xfs_extlen_t		mod=0;
+	xfs_mount_t		*mp;
+	int			nimap;
+	uint			resblks;
+	xfs_off_t		rounding;
+	int			rt;
+	xfs_fileoff_t		startoffset_fsb;
+	xfs_trans_t		*tp;
+	int			need_iolock = 1;
+
+	mp = ip->i_mount;
+
+	trace_xfs_free_file_space(ip);
+
+	error = xfs_qm_dqattach(ip, 0);
+	if (error)
+		return error;
+
+	error = 0;
+	if (len <= 0)	/* if nothing being freed */
+		return error;
+	rt = XFS_IS_REALTIME_INODE(ip);
+	startoffset_fsb	= XFS_B_TO_FSB(mp, offset);
+	endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
+
+	if (attr_flags & XFS_ATTR_NOLOCK)
+		need_iolock = 0;
+	if (need_iolock) {
+		xfs_ilock(ip, XFS_IOLOCK_EXCL);
+		/* wait for the completion of any pending DIOs */
+		inode_dio_wait(VFS_I(ip));
+	}
+
+	rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
+	ioffset = offset & ~(rounding - 1);
+	error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
+					      ioffset, -1);
+	if (error)
+		goto out_unlock_iolock;
+	truncate_pagecache_range(VFS_I(ip), ioffset, -1);
+
+	/*
+	 * Need to zero the stuff we're not freeing, on disk.
+	 * If it's a realtime file & can't use unwritten extents then we
+	 * actually need to zero the extent edges.  Otherwise xfs_bunmapi
+	 * will take care of it for us.
+	 */
+	if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
+		nimap = 1;
+		error = xfs_bmapi_read(ip, startoffset_fsb, 1,
+					&imap, &nimap, 0);
+		if (error)
+			goto out_unlock_iolock;
+		ASSERT(nimap == 0 || nimap == 1);
+		if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
+			xfs_daddr_t	block;
+
+			ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
+			block = imap.br_startblock;
+			mod = do_div(block, mp->m_sb.sb_rextsize);
+			if (mod)
+				startoffset_fsb += mp->m_sb.sb_rextsize - mod;
+		}
+		nimap = 1;
+		error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1,
+					&imap, &nimap, 0);
+		if (error)
+			goto out_unlock_iolock;
+		ASSERT(nimap == 0 || nimap == 1);
+		if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
+			ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
+			mod++;
+			if (mod && (mod != mp->m_sb.sb_rextsize))
+				endoffset_fsb -= mod;
+		}
+	}
+	if ((done = (endoffset_fsb <= startoffset_fsb)))
+		/*
+		 * One contiguous piece to clear
+		 */
+		error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1);
+	else {
+		/*
+		 * Some full blocks, possibly two pieces to clear
+		 */
+		if (offset < XFS_FSB_TO_B(mp, startoffset_fsb))
+			error = xfs_zero_remaining_bytes(ip, offset,
+				XFS_FSB_TO_B(mp, startoffset_fsb) - 1);
+		if (!error &&
+		    XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len)
+			error = xfs_zero_remaining_bytes(ip,
+				XFS_FSB_TO_B(mp, endoffset_fsb),
+				offset + len - 1);
+	}
+
+	/*
+	 * free file space until done or until there is an error
+	 */
+	resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+	while (!error && !done) {
+
+		/*
+		 * allocate and setup the transaction. Allow this
+		 * transaction to dip into the reserve blocks to ensure
+		 * the freeing of the space succeeds at ENOSPC.
+		 */
+		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
+		tp->t_flags |= XFS_TRANS_RESERVE;
+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0);
+
+		/*
+		 * check for running out of space
+		 */
+		if (error) {
+			/*
+			 * Free the transaction structure.
+			 */
+			ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
+			xfs_trans_cancel(tp, 0);
+			break;
+		}
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+		error = xfs_trans_reserve_quota(tp, mp,
+				ip->i_udquot, ip->i_gdquot, ip->i_pdquot,
+				resblks, 0, XFS_QMOPT_RES_REGBLKS);
+		if (error)
+			goto error1;
+
+		xfs_trans_ijoin(tp, ip, 0);
+
+		/*
+		 * issue the bunmapi() call to free the blocks
+		 */
+		xfs_bmap_init(&free_list, &firstfsb);
+		error = xfs_bunmapi(tp, ip, startoffset_fsb,
+				  endoffset_fsb - startoffset_fsb,
+				  0, 2, &firstfsb, &free_list, &done);
+		if (error) {
+			goto error0;
+		}
+
+		/*
+		 * complete the transaction
+		 */
+		error = xfs_bmap_finish(&tp, &free_list, &committed);
+		if (error) {
+			goto error0;
+		}
+
+		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	}
+
+ out_unlock_iolock:
+	if (need_iolock)
+		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+	return error;
+
+ error0:
+	xfs_bmap_cancel(&free_list);
+ error1:
+	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+	xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) :
+		    XFS_ILOCK_EXCL);
+	return error;
+}
+
+
+STATIC int
+xfs_zero_file_space(
+	struct xfs_inode	*ip,
+	xfs_off_t		offset,
+	xfs_off_t		len,
+	int			attr_flags)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	uint			granularity;
+	xfs_off_t		start_boundary;
+	xfs_off_t		end_boundary;
+	int			error;
+
+	granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
+
+	/*
+	 * Round the range of extents we are going to convert inwards.  If the
+	 * offset is aligned, then it doesn't get changed so we zero from the
+	 * start of the block offset points to.
+	 */
+	start_boundary = round_up(offset, granularity);
+	end_boundary = round_down(offset + len, granularity);
+
+	ASSERT(start_boundary >= offset);
+	ASSERT(end_boundary <= offset + len);
+
+	if (!(attr_flags & XFS_ATTR_NOLOCK))
+		xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+	if (start_boundary < end_boundary - 1) {
+		/* punch out the page cache over the conversion range */
+		truncate_pagecache_range(VFS_I(ip), start_boundary,
+					 end_boundary - 1);
+		/* convert the blocks */
+		error = xfs_alloc_file_space(ip, start_boundary,
+					end_boundary - start_boundary - 1,
+					XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT,
+					attr_flags);
+		if (error)
+			goto out_unlock;
+
+		/* We've handled the interior of the range, now for the edges */
+		if (start_boundary != offset)
+			error = xfs_iozero(ip, offset, start_boundary - offset);
+		if (error)
+			goto out_unlock;
+
+		if (end_boundary != offset + len)
+			error = xfs_iozero(ip, end_boundary,
+					   offset + len - end_boundary);
+
+	} else {
+		/*
+		 * It's either a sub-granularity range or the range spanned lies
+		 * partially across two adjacent blocks.
+		 */
+		error = xfs_iozero(ip, offset, len);
+	}
+
+out_unlock:
+	if (!(attr_flags & XFS_ATTR_NOLOCK))
+		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+	return error;
+
+}
+
+/*
+ * xfs_change_file_space()
+ *      This routine allocates or frees disk space for the given file.
+ *      The user specified parameters are checked for alignment and size
+ *      limitations.
+ *
+ * RETURNS:
+ *       0 on success
+ *      errno on error
+ *
+ */
+int
+xfs_change_file_space(
+	xfs_inode_t	*ip,
+	int		cmd,
+	xfs_flock64_t	*bf,
+	xfs_off_t	offset,
+	int		attr_flags)
+{
+	xfs_mount_t	*mp = ip->i_mount;
+	int		clrprealloc;
+	int		error;
+	xfs_fsize_t	fsize;
+	int		setprealloc;
+	xfs_off_t	startoffset;
+	xfs_trans_t	*tp;
+	struct iattr	iattr;
+
+	if (!S_ISREG(ip->i_d.di_mode))
+		return XFS_ERROR(EINVAL);
+
+	switch (bf->l_whence) {
+	case 0: /*SEEK_SET*/
+		break;
+	case 1: /*SEEK_CUR*/
+		bf->l_start += offset;
+		break;
+	case 2: /*SEEK_END*/
+		bf->l_start += XFS_ISIZE(ip);
+		break;
+	default:
+		return XFS_ERROR(EINVAL);
+	}
+
+	/*
+	 * length of <= 0 for resv/unresv/zero is invalid.  length for
+	 * alloc/free is ignored completely and we have no idea what userspace
+	 * might have set it to, so set it to zero to allow range
+	 * checks to pass.
+	 */
+	switch (cmd) {
+	case XFS_IOC_ZERO_RANGE:
+	case XFS_IOC_RESVSP:
+	case XFS_IOC_RESVSP64:
+	case XFS_IOC_UNRESVSP:
+	case XFS_IOC_UNRESVSP64:
+		if (bf->l_len <= 0)
+			return XFS_ERROR(EINVAL);
+		break;
+	default:
+		bf->l_len = 0;
+		break;
+	}
+
+	if (bf->l_start < 0 ||
+	    bf->l_start > mp->m_super->s_maxbytes ||
+	    bf->l_start + bf->l_len < 0 ||
+	    bf->l_start + bf->l_len >= mp->m_super->s_maxbytes)
+		return XFS_ERROR(EINVAL);
+
+	bf->l_whence = 0;
+
+	startoffset = bf->l_start;
+	fsize = XFS_ISIZE(ip);
+
+	setprealloc = clrprealloc = 0;
+	switch (cmd) {
+	case XFS_IOC_ZERO_RANGE:
+		error = xfs_zero_file_space(ip, startoffset, bf->l_len,
+						attr_flags);
+		if (error)
+			return error;
+		setprealloc = 1;
+		break;
+
+	case XFS_IOC_RESVSP:
+	case XFS_IOC_RESVSP64:
+		error = xfs_alloc_file_space(ip, startoffset, bf->l_len,
+						XFS_BMAPI_PREALLOC, attr_flags);
+		if (error)
+			return error;
+		setprealloc = 1;
+		break;
+
+	case XFS_IOC_UNRESVSP:
+	case XFS_IOC_UNRESVSP64:
+		if ((error = xfs_free_file_space(ip, startoffset, bf->l_len,
+								attr_flags)))
+			return error;
+		break;
+
+	case XFS_IOC_ALLOCSP:
+	case XFS_IOC_ALLOCSP64:
+	case XFS_IOC_FREESP:
+	case XFS_IOC_FREESP64:
+		/*
+		 * These operations actually do IO when extending the file, but
+		 * the allocation is done seperately to the zeroing that is
+		 * done. This set of operations need to be serialised against
+		 * other IO operations, such as truncate and buffered IO. We
+		 * need to take the IOLOCK here to serialise the allocation and
+		 * zeroing IO to prevent other IOLOCK holders (e.g. getbmap,
+		 * truncate, direct IO) from racing against the transient
+		 * allocated but not written state we can have here.
+		 */
+		xfs_ilock(ip, XFS_IOLOCK_EXCL);
+		if (startoffset > fsize) {
+			error = xfs_alloc_file_space(ip, fsize,
+					startoffset - fsize, 0,
+					attr_flags | XFS_ATTR_NOLOCK);
+			if (error) {
+				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+				break;
+			}
+		}
+
+		iattr.ia_valid = ATTR_SIZE;
+		iattr.ia_size = startoffset;
+
+		error = xfs_setattr_size(ip, &iattr,
+					 attr_flags | XFS_ATTR_NOLOCK);
+		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+
+		if (error)
+			return error;
+
+		clrprealloc = 1;
+		break;
+
+	default:
+		ASSERT(0);
+		return XFS_ERROR(EINVAL);
+	}
+
+	/*
+	 * update the inode timestamp, mode, and prealloc flag bits
+	 */
+	tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID);
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_writeid, 0, 0);
+	if (error) {
+		xfs_trans_cancel(tp, 0);
+		return error;
+	}
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+
+	if ((attr_flags & XFS_ATTR_DMI) == 0) {
+		ip->i_d.di_mode &= ~S_ISUID;
+
+		/*
+		 * Note that we don't have to worry about mandatory
+		 * file locking being disabled here because we only
+		 * clear the S_ISGID bit if the Group execute bit is
+		 * on, but if it was on then mandatory locking wouldn't
+		 * have been enabled.
+		 */
+		if (ip->i_d.di_mode & S_IXGRP)
+			ip->i_d.di_mode &= ~S_ISGID;
+
+		xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+	}
+	if (setprealloc)
+		ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
+	else if (clrprealloc)
+		ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
+
+	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+	if (attr_flags & XFS_ATTR_SYNC)
+		xfs_trans_set_sync(tp);
+	return xfs_trans_commit(tp, 0);
+}
+
+/*
+ * We need to check that the format of the data fork in the temporary inode is
+ * valid for the target inode before doing the swap. This is not a problem with
+ * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
+ * data fork depending on the space the attribute fork is taking so we can get
+ * invalid formats on the target inode.
+ *
+ * E.g. target has space for 7 extents in extent format, temp inode only has
+ * space for 6.  If we defragment down to 7 extents, then the tmp format is a
+ * btree, but when swapped it needs to be in extent format. Hence we can't just
+ * blindly swap data forks on attr2 filesystems.
+ *
+ * Note that we check the swap in both directions so that we don't end up with
+ * a corrupt temporary inode, either.
+ *
+ * Note that fixing the way xfs_fsr sets up the attribute fork in the source
+ * inode will prevent this situation from occurring, so all we do here is
+ * reject and log the attempt. basically we are putting the responsibility on
+ * userspace to get this right.
+ */
+static int
+xfs_swap_extents_check_format(
+	xfs_inode_t	*ip,	/* target inode */
+	xfs_inode_t	*tip)	/* tmp inode */
+{
+
+	/* Should never get a local format */
+	if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
+	    tip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
+		return EINVAL;
+
+	/*
+	 * if the target inode has less extents that then temporary inode then
+	 * why did userspace call us?
+	 */
+	if (ip->i_d.di_nextents < tip->i_d.di_nextents)
+		return EINVAL;
+
+	/*
+	 * if the target inode is in extent form and the temp inode is in btree
+	 * form then we will end up with the target inode in the wrong format
+	 * as we already know there are less extents in the temp inode.
+	 */
+	if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
+	    tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
+		return EINVAL;
+
+	/* Check temp in extent form to max in target */
+	if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
+	    XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) >
+			XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
+		return EINVAL;
+
+	/* Check target in extent form to max in temp */
+	if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
+	    XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) >
+			XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
+		return EINVAL;
+
+	/*
+	 * If we are in a btree format, check that the temp root block will fit
+	 * in the target and that it has enough extents to be in btree format
+	 * in the target.
+	 *
+	 * Note that we have to be careful to allow btree->extent conversions
+	 * (a common defrag case) which will occur when the temp inode is in
+	 * extent format...
+	 */
+	if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
+		if (XFS_IFORK_BOFF(ip) &&
+		    XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip))
+			return EINVAL;
+		if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <=
+		    XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
+			return EINVAL;
+	}
+
+	/* Reciprocal target->temp btree format checks */
+	if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
+		if (XFS_IFORK_BOFF(tip) &&
+		    XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip))
+			return EINVAL;
+		if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <=
+		    XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
+			return EINVAL;
+	}
+
+	return 0;
+}
+
+int
+xfs_swap_extents(
+	xfs_inode_t	*ip,	/* target inode */
+	xfs_inode_t	*tip,	/* tmp inode */
+	xfs_swapext_t	*sxp)
+{
+	xfs_mount_t	*mp = ip->i_mount;
+	xfs_trans_t	*tp;
+	xfs_bstat_t	*sbp = &sxp->sx_stat;
+	xfs_ifork_t	*tempifp, *ifp, *tifp;
+	int		src_log_flags, target_log_flags;
+	int		error = 0;
+	int		aforkblks = 0;
+	int		taforkblks = 0;
+	__uint64_t	tmp;
+
+	/*
+	 * We have no way of updating owner information in the BMBT blocks for
+	 * each inode on CRC enabled filesystems, so to avoid corrupting the
+	 * this metadata we simply don't allow extent swaps to occur.
+	 */
+	if (xfs_sb_version_hascrc(&mp->m_sb))
+		return XFS_ERROR(EINVAL);
+
+	tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
+	if (!tempifp) {
+		error = XFS_ERROR(ENOMEM);
+		goto out;
+	}
+
+	/*
+	 * we have to do two separate lock calls here to keep lockdep
+	 * happy. If we try to get all the locks in one call, lock will
+	 * report false positives when we drop the ILOCK and regain them
+	 * below.
+	 */
+	xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
+	xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
+
+	/* Verify that both files have the same format */
+	if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
+		error = XFS_ERROR(EINVAL);
+		goto out_unlock;
+	}
+
+	/* Verify both files are either real-time or non-realtime */
+	if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) {
+		error = XFS_ERROR(EINVAL);
+		goto out_unlock;
+	}
+
+	error = -filemap_write_and_wait(VFS_I(tip)->i_mapping);
+	if (error)
+		goto out_unlock;
+	truncate_pagecache_range(VFS_I(tip), 0, -1);
+
+	/* Verify O_DIRECT for ftmp */
+	if (VN_CACHED(VFS_I(tip)) != 0) {
+		error = XFS_ERROR(EINVAL);
+		goto out_unlock;
+	}
+
+	/* Verify all data are being swapped */
+	if (sxp->sx_offset != 0 ||
+	    sxp->sx_length != ip->i_d.di_size ||
+	    sxp->sx_length != tip->i_d.di_size) {
+		error = XFS_ERROR(EFAULT);
+		goto out_unlock;
+	}
+
+	trace_xfs_swap_extent_before(ip, 0);
+	trace_xfs_swap_extent_before(tip, 1);
+
+	/* check inode formats now that data is flushed */
+	error = xfs_swap_extents_check_format(ip, tip);
+	if (error) {
+		xfs_notice(mp,
+		    "%s: inode 0x%llx format is incompatible for exchanging.",
+				__func__, ip->i_ino);
+		goto out_unlock;
+	}
+
+	/*
+	 * Compare the current change & modify times with that
+	 * passed in.  If they differ, we abort this swap.
+	 * This is the mechanism used to ensure the calling
+	 * process that the file was not changed out from
+	 * under it.
+	 */
+	if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) ||
+	    (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) ||
+	    (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) ||
+	    (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) {
+		error = XFS_ERROR(EBUSY);
+		goto out_unlock;
+	}
+
+	/* We need to fail if the file is memory mapped.  Once we have tossed
+	 * all existing pages, the page fault will have no option
+	 * but to go to the filesystem for pages. By making the page fault call
+	 * vop_read (or write in the case of autogrow) they block on the iolock
+	 * until we have switched the extents.
+	 */
+	if (VN_MAPPED(VFS_I(ip))) {
+		error = XFS_ERROR(EBUSY);
+		goto out_unlock;
+	}
+
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	xfs_iunlock(tip, XFS_ILOCK_EXCL);
+
+	/*
+	 * There is a race condition here since we gave up the
+	 * ilock.  However, the data fork will not change since
+	 * we have the iolock (locked for truncation too) so we
+	 * are safe.  We don't really care if non-io related
+	 * fields change.
+	 */
+	truncate_pagecache_range(VFS_I(ip), 0, -1);
+
+	tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
+	if (error) {
+		xfs_iunlock(ip,  XFS_IOLOCK_EXCL);
+		xfs_iunlock(tip, XFS_IOLOCK_EXCL);
+		xfs_trans_cancel(tp, 0);
+		goto out;
+	}
+	xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
+
+	/*
+	 * Count the number of extended attribute blocks
+	 */
+	if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) &&
+	     (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
+		error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks);
+		if (error)
+			goto out_trans_cancel;
+	}
+	if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) &&
+	     (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
+		error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK,
+			&taforkblks);
+		if (error)
+			goto out_trans_cancel;
+	}
+
+	/*
+	 * Swap the data forks of the inodes
+	 */
+	ifp = &ip->i_df;
+	tifp = &tip->i_df;
+	*tempifp = *ifp;	/* struct copy */
+	*ifp = *tifp;		/* struct copy */
+	*tifp = *tempifp;	/* struct copy */
+
+	/*
+	 * Fix the on-disk inode values
+	 */
+	tmp = (__uint64_t)ip->i_d.di_nblocks;
+	ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks;
+	tip->i_d.di_nblocks = tmp + taforkblks - aforkblks;
+
+	tmp = (__uint64_t) ip->i_d.di_nextents;
+	ip->i_d.di_nextents = tip->i_d.di_nextents;
+	tip->i_d.di_nextents = tmp;
+
+	tmp = (__uint64_t) ip->i_d.di_format;
+	ip->i_d.di_format = tip->i_d.di_format;
+	tip->i_d.di_format = tmp;
+
+	/*
+	 * The extents in the source inode could still contain speculative
+	 * preallocation beyond EOF (e.g. the file is open but not modified
+	 * while defrag is in progress). In that case, we need to copy over the
+	 * number of delalloc blocks the data fork in the source inode is
+	 * tracking beyond EOF so that when the fork is truncated away when the
+	 * temporary inode is unlinked we don't underrun the i_delayed_blks
+	 * counter on that inode.
+	 */
+	ASSERT(tip->i_delayed_blks == 0);
+	tip->i_delayed_blks = ip->i_delayed_blks;
+	ip->i_delayed_blks = 0;
+
+	src_log_flags = XFS_ILOG_CORE;
+	switch (ip->i_d.di_format) {
+	case XFS_DINODE_FMT_EXTENTS:
+		/* If the extents fit in the inode, fix the
+		 * pointer.  Otherwise it's already NULL or
+		 * pointing to the extent.
+		 */
+		if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) {
+			ifp->if_u1.if_extents =
+				ifp->if_u2.if_inline_ext;
+		}
+		src_log_flags |= XFS_ILOG_DEXT;
+		break;
+	case XFS_DINODE_FMT_BTREE:
+		src_log_flags |= XFS_ILOG_DBROOT;
+		break;
+	}
+
+	target_log_flags = XFS_ILOG_CORE;
+	switch (tip->i_d.di_format) {
+	case XFS_DINODE_FMT_EXTENTS:
+		/* If the extents fit in the inode, fix the
+		 * pointer.  Otherwise it's already NULL or
+		 * pointing to the extent.
+		 */
+		if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) {
+			tifp->if_u1.if_extents =
+				tifp->if_u2.if_inline_ext;
+		}
+		target_log_flags |= XFS_ILOG_DEXT;
+		break;
+	case XFS_DINODE_FMT_BTREE:
+		target_log_flags |= XFS_ILOG_DBROOT;
+		break;
+	}
+
+
+	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+	xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+
+	xfs_trans_log_inode(tp, ip,  src_log_flags);
+	xfs_trans_log_inode(tp, tip, target_log_flags);
+
+	/*
+	 * If this is a synchronous mount, make sure that the
+	 * transaction goes to disk before returning to the user.
+	 */
+	if (mp->m_flags & XFS_MOUNT_WSYNC)
+		xfs_trans_set_sync(tp);
+
+	error = xfs_trans_commit(tp, 0);
+
+	trace_xfs_swap_extent_after(ip, 0);
+	trace_xfs_swap_extent_after(tip, 1);
+out:
+	kmem_free(tempifp);
+	return error;
+
+out_unlock:
+	xfs_iunlock(ip,  XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+	xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+	goto out;
+
+out_trans_cancel:
+	xfs_trans_cancel(tp, 0);
+	goto out_unlock;
+}
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
new file mode 100644
index 0000000..0612609
--- /dev/null
+++ b/fs/xfs/xfs_bmap_util.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_BMAP_UTIL_H__
+#define	__XFS_BMAP_UTIL_H__
+
+/* Kernel only BMAP related definitions and functions */
+
+struct xfs_bmbt_irec;
+struct xfs_bmap_free_item;
+struct xfs_ifork;
+struct xfs_inode;
+struct xfs_mount;
+struct xfs_trans;
+
+/*
+ * Argument structure for xfs_bmap_alloc.
+ */
+struct xfs_bmalloca {
+	xfs_fsblock_t		*firstblock; /* i/o first block allocated */
+	struct xfs_bmap_free	*flist;	/* bmap freelist */
+	struct xfs_trans	*tp;	/* transaction pointer */
+	struct xfs_inode	*ip;	/* incore inode pointer */
+	struct xfs_bmbt_irec	prev;	/* extent before the new one */
+	struct xfs_bmbt_irec	got;	/* extent after, or delayed */
+
+	xfs_fileoff_t		offset;	/* offset in file filling in */
+	xfs_extlen_t		length;	/* i/o length asked/allocated */
+	xfs_fsblock_t		blkno;	/* starting block of new extent */
+
+	struct xfs_btree_cur	*cur;	/* btree cursor */
+	xfs_extnum_t		idx;	/* current extent index */
+	int			nallocs;/* number of extents alloc'd */
+	int			logflags;/* flags for transaction logging */
+
+	xfs_extlen_t		total;	/* total blocks needed for xaction */
+	xfs_extlen_t		minlen;	/* minimum allocation size (blocks) */
+	xfs_extlen_t		minleft; /* amount must be left after alloc */
+	char			eof;	/* set if allocating past last extent */
+	char			wasdel;	/* replacing a delayed allocation */
+	char			userdata;/* set if is user data */
+	char			aeof;	/* allocated space at eof */
+	char			conv;	/* overwriting unwritten extents */
+	char			stack_switch;
+	int			flags;
+	struct completion	*done;
+	struct work_struct	work;
+	int			result;
+};
+
+int	xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
+			int *committed);
+int	xfs_bmap_rtalloc(struct xfs_bmalloca *ap);
+int	xfs_bmapi_allocate(struct xfs_bmalloca *args);
+int	__xfs_bmapi_allocate(struct xfs_bmalloca *args);
+int	xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
+		     int whichfork, int *eof);
+int	xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
+			      int whichfork, int *count);
+int	xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
+		xfs_fileoff_t start_fsb, xfs_fileoff_t length);
+
+/* bmap to userspace formatter - copy to user & advance pointer */
+typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *, int *);
+int	xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv,
+		xfs_bmap_format_t formatter, void *arg);
+
+/* functions in xfs_bmap.c that are only needed by xfs_bmap_util.c */
+void	xfs_bmap_del_free(struct xfs_bmap_free *flist,
+			  struct xfs_bmap_free_item *prev,
+			  struct xfs_bmap_free_item *free);
+int	xfs_bmap_extsize_align(struct xfs_mount *mp, struct xfs_bmbt_irec *gotp,
+			       struct xfs_bmbt_irec *prevp, xfs_extlen_t extsz,
+			       int rt, int eof, int delay, int convert,
+			       xfs_fileoff_t *offp, xfs_extlen_t *lenp);
+void	xfs_bmap_adjacent(struct xfs_bmalloca *ap);
+int	xfs_bmap_last_extent(struct xfs_trans *tp, struct xfs_inode *ip,
+			     int whichfork, struct xfs_bmbt_irec *rec,
+			     int *is_empty);
+
+/* preallocation and hole punch interface */
+int	xfs_change_file_space(struct xfs_inode *ip, int cmd,
+			      xfs_flock64_t *bf, xfs_off_t offset,
+			      int attr_flags);
+
+/* EOF block manipulation functions */
+bool	xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
+int	xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip,
+			   bool need_iolock);
+
+int	xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
+			 struct xfs_swapext *sx);
+
+xfs_daddr_t xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb);
+
+#endif	/* __XFS_BMAP_UTIL_H__ */
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 0903960..7a2b4da 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -510,7 +510,7 @@
 }
 
 /*
- * Get a the root block which is stored in the inode.
+ * Get the root block which is stored in the inode.
  *
  * For now this btree implementation assumes the btree root is always
  * stored in the if_broot field of an inode fork.
@@ -978,6 +978,7 @@
 			buf->bb_u.l.bb_owner = cpu_to_be64(owner);
 			uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid);
 			buf->bb_u.l.bb_pad = 0;
+			buf->bb_u.l.bb_lsn = 0;
 		}
 	} else {
 		/* owner is a 32 bit value on short blocks */
@@ -989,6 +990,7 @@
 			buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
 			buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
 			uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid);
+			buf->bb_u.s.bb_lsn = 0;
 		}
 	}
 }
@@ -1684,7 +1686,7 @@
 
 /*
  * Lookup the record.  The cursor is made to point to it, based on dir.
- * Return 0 if can't find any such record, 1 for success.
+ * stat is set to 0 if can't find any such record, 1 for success.
  */
 int					/* error */
 xfs_btree_lookup(
@@ -2756,7 +2758,6 @@
 
 		if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
 			/* A root block that can be made bigger. */
-
 			xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
 		} else {
 			/* A root block that needs replacing */
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 55e3c7c..c8473c7 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -88,13 +88,11 @@
 #define XFS_BTREE_SBLOCK_CRC_LEN	(XFS_BTREE_SBLOCK_LEN + 40)
 #define XFS_BTREE_LBLOCK_CRC_LEN	(XFS_BTREE_LBLOCK_LEN + 48)
 
-
 #define XFS_BTREE_SBLOCK_CRC_OFF \
 	offsetof(struct xfs_btree_block, bb_u.s.bb_crc)
 #define XFS_BTREE_LBLOCK_CRC_OFF \
 	offsetof(struct xfs_btree_block, bb_u.l.bb_crc)
 
-
 /*
  * Generic key, ptr and record wrapper structures.
  *
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 1b2472a..c06823f 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -35,6 +35,7 @@
 #include <linux/freezer.h>
 
 #include "xfs_sb.h"
+#include "xfs_trans_resv.h"
 #include "xfs_log.h"
 #include "xfs_ag.h"
 #include "xfs_mount.h"
@@ -303,7 +304,7 @@
  *	Releases the specified buffer.
  *
  * 	The modification state of any associated pages is left unchanged.
- * 	The buffer most not be on any hash - use xfs_buf_rele instead for
+ * 	The buffer must not be on any hash - use xfs_buf_rele instead for
  * 	hashed and refcounted buffers
  */
 void
@@ -1621,7 +1622,7 @@
 /*
  *	When allocating the initial buffer target we have not yet
  *	read in the superblock, so don't know what sized sectors
- *	are being used is at this early stage.  Play safe.
+ *	are being used at this early stage.  Play safe.
  */
 STATIC int
 xfs_setsize_buftarg_early(
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index bfc4e0c..3a944b1 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -39,6 +39,14 @@
 
 STATIC void	xfs_buf_do_callbacks(struct xfs_buf *bp);
 
+static inline int
+xfs_buf_log_format_size(
+	struct xfs_buf_log_format *blfp)
+{
+	return offsetof(struct xfs_buf_log_format, blf_data_map) +
+			(blfp->blf_map_size * sizeof(blfp->blf_data_map[0]));
+}
+
 /*
  * This returns the number of log iovecs needed to log the
  * given buf log item.
@@ -49,25 +57,27 @@
  *
  * If the XFS_BLI_STALE flag has been set, then log nothing.
  */
-STATIC uint
+STATIC void
 xfs_buf_item_size_segment(
 	struct xfs_buf_log_item	*bip,
-	struct xfs_buf_log_format *blfp)
+	struct xfs_buf_log_format *blfp,
+	int			*nvecs,
+	int			*nbytes)
 {
 	struct xfs_buf		*bp = bip->bli_buf;
-	uint			nvecs;
 	int			next_bit;
 	int			last_bit;
 
 	last_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
 	if (last_bit == -1)
-		return 0;
+		return;
 
 	/*
 	 * initial count for a dirty buffer is 2 vectors - the format structure
 	 * and the first dirty region.
 	 */
-	nvecs = 2;
+	*nvecs += 2;
+	*nbytes += xfs_buf_log_format_size(blfp) + XFS_BLF_CHUNK;
 
 	while (last_bit != -1) {
 		/*
@@ -87,18 +97,17 @@
 			break;
 		} else if (next_bit != last_bit + 1) {
 			last_bit = next_bit;
-			nvecs++;
+			(*nvecs)++;
 		} else if (xfs_buf_offset(bp, next_bit * XFS_BLF_CHUNK) !=
 			   (xfs_buf_offset(bp, last_bit * XFS_BLF_CHUNK) +
 			    XFS_BLF_CHUNK)) {
 			last_bit = next_bit;
-			nvecs++;
+			(*nvecs)++;
 		} else {
 			last_bit++;
 		}
+		*nbytes += XFS_BLF_CHUNK;
 	}
-
-	return nvecs;
 }
 
 /*
@@ -118,12 +127,13 @@
  * If the XFS_BLI_STALE flag has been set, then log nothing but the buf log
  * format structures.
  */
-STATIC uint
+STATIC void
 xfs_buf_item_size(
-	struct xfs_log_item	*lip)
+	struct xfs_log_item	*lip,
+	int			*nvecs,
+	int			*nbytes)
 {
 	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);
-	uint			nvecs;
 	int			i;
 
 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
@@ -135,7 +145,11 @@
 		 */
 		trace_xfs_buf_item_size_stale(bip);
 		ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
-		return bip->bli_format_count;
+		*nvecs += bip->bli_format_count;
+		for (i = 0; i < bip->bli_format_count; i++) {
+			*nbytes += xfs_buf_log_format_size(&bip->bli_formats[i]);
+		}
+		return;
 	}
 
 	ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
@@ -147,7 +161,8 @@
 		 * commit, so no vectors are used at all.
 		 */
 		trace_xfs_buf_item_size_ordered(bip);
-		return XFS_LOG_VEC_ORDERED;
+		*nvecs = XFS_LOG_VEC_ORDERED;
+		return;
 	}
 
 	/*
@@ -159,13 +174,11 @@
 	 * count for the extra buf log format structure that will need to be
 	 * written.
 	 */
-	nvecs = 0;
 	for (i = 0; i < bip->bli_format_count; i++) {
-		nvecs += xfs_buf_item_size_segment(bip, &bip->bli_formats[i]);
+		xfs_buf_item_size_segment(bip, &bip->bli_formats[i],
+					  nvecs, nbytes);
 	}
-
 	trace_xfs_buf_item_size(bip);
-	return nvecs;
 }
 
 static struct xfs_log_iovec *
@@ -192,8 +205,7 @@
 	 * the actual size of the dirty bitmap rather than the size of the in
 	 * memory structure.
 	 */
-	base_size = offsetof(struct xfs_buf_log_format, blf_data_map) +
-			(blfp->blf_map_size * sizeof(blfp->blf_data_map[0]));
+	base_size = xfs_buf_log_format_size(blfp);
 
 	nvecs = 0;
 	first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
@@ -601,11 +613,9 @@
 			}
 		}
 	}
-	if (clean)
-		xfs_buf_item_relse(bp);
-	else if (aborted) {
+	if (clean || aborted) {
 		if (atomic_dec_and_test(&bip->bli_refcount)) {
-			ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
+			ASSERT(!aborted || XFS_FORCED_SHUTDOWN(lip->li_mountp));
 			xfs_buf_item_relse(bp);
 		}
 	} else
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 0f1c247..db63710 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -18,101 +18,9 @@
 #ifndef	__XFS_BUF_ITEM_H__
 #define	__XFS_BUF_ITEM_H__
 
-extern kmem_zone_t	*xfs_buf_item_zone;
+/* kernel only definitions */
 
-/*
- * This flag indicates that the buffer contains on disk inodes
- * and requires special recovery handling.
- */
-#define	XFS_BLF_INODE_BUF	(1<<0)
-/*
- * This flag indicates that the buffer should not be replayed
- * during recovery because its blocks are being freed.
- */
-#define	XFS_BLF_CANCEL		(1<<1)
-
-/*
- * This flag indicates that the buffer contains on disk
- * user or group dquots and may require special recovery handling.
- */
-#define	XFS_BLF_UDQUOT_BUF	(1<<2)
-#define XFS_BLF_PDQUOT_BUF	(1<<3)
-#define	XFS_BLF_GDQUOT_BUF	(1<<4)
-
-#define	XFS_BLF_CHUNK		128
-#define	XFS_BLF_SHIFT		7
-#define	BIT_TO_WORD_SHIFT	5
-#define	NBWORD			(NBBY * sizeof(unsigned int))
-
-/*
- * This is the structure used to lay out a buf log item in the
- * log.  The data map describes which 128 byte chunks of the buffer
- * have been logged.
- */
-#define XFS_BLF_DATAMAP_SIZE	((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / NBWORD)
-
-typedef struct xfs_buf_log_format {
-	unsigned short	blf_type;	/* buf log item type indicator */
-	unsigned short	blf_size;	/* size of this item */
-	ushort		blf_flags;	/* misc state */
-	ushort		blf_len;	/* number of blocks in this buf */
-	__int64_t	blf_blkno;	/* starting blkno of this buf */
-	unsigned int	blf_map_size;	/* used size of data bitmap in words */
-	unsigned int	blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */
-} xfs_buf_log_format_t;
-
-/*
- * All buffers now need to tell recovery where the magic number
- * is so that it can verify and calculate the CRCs on the buffer correctly
- * once the changes have been replayed into the buffer.
- *
- * The type value is held in the upper 5 bits of the blf_flags field, which is
- * an unsigned 16 bit field. Hence we need to shift it 11 bits up and down.
- */
-#define XFS_BLFT_BITS	5
-#define XFS_BLFT_SHIFT	11
-#define XFS_BLFT_MASK	(((1 << XFS_BLFT_BITS) - 1) << XFS_BLFT_SHIFT)
-
-enum xfs_blft {
-	XFS_BLFT_UNKNOWN_BUF = 0,
-	XFS_BLFT_UDQUOT_BUF,
-	XFS_BLFT_PDQUOT_BUF,
-	XFS_BLFT_GDQUOT_BUF,
-	XFS_BLFT_BTREE_BUF,
-	XFS_BLFT_AGF_BUF,
-	XFS_BLFT_AGFL_BUF,
-	XFS_BLFT_AGI_BUF,
-	XFS_BLFT_DINO_BUF,
-	XFS_BLFT_SYMLINK_BUF,
-	XFS_BLFT_DIR_BLOCK_BUF,
-	XFS_BLFT_DIR_DATA_BUF,
-	XFS_BLFT_DIR_FREE_BUF,
-	XFS_BLFT_DIR_LEAF1_BUF,
-	XFS_BLFT_DIR_LEAFN_BUF,
-	XFS_BLFT_DA_NODE_BUF,
-	XFS_BLFT_ATTR_LEAF_BUF,
-	XFS_BLFT_ATTR_RMT_BUF,
-	XFS_BLFT_SB_BUF,
-	XFS_BLFT_MAX_BUF = (1 << XFS_BLFT_BITS),
-};
-
-static inline void
-xfs_blft_to_flags(struct xfs_buf_log_format *blf, enum xfs_blft type)
-{
-	ASSERT(type > XFS_BLFT_UNKNOWN_BUF && type < XFS_BLFT_MAX_BUF);
-	blf->blf_flags &= ~XFS_BLFT_MASK;
-	blf->blf_flags |= ((type << XFS_BLFT_SHIFT) & XFS_BLFT_MASK);
-}
-
-static inline __uint16_t
-xfs_blft_from_flags(struct xfs_buf_log_format *blf)
-{
-	return (blf->blf_flags & XFS_BLFT_MASK) >> XFS_BLFT_SHIFT;
-}
-
-/*
- * buf log item flags
- */
+/* buf log item flags */
 #define	XFS_BLI_HOLD		0x01
 #define	XFS_BLI_DIRTY		0x02
 #define	XFS_BLI_STALE		0x04
@@ -133,8 +41,6 @@
 	{ XFS_BLI_ORDERED,	"ORDERED" }
 
 
-#ifdef __KERNEL__
-
 struct xfs_buf;
 struct xfs_mount;
 struct xfs_buf_log_item;
@@ -169,6 +75,6 @@
 			       enum xfs_blft);
 void	xfs_trans_buf_copy_type(struct xfs_buf *dst_bp, struct xfs_buf *src_bp);
 
-#endif	/* __KERNEL__ */
+extern kmem_zone_t	*xfs_buf_item_zone;
 
 #endif	/* __XFS_BUF_ITEM_H__ */
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 0b8b2a1..d4e59a4 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -27,8 +27,8 @@
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
-#include "xfs_dir2.h"
 #include "xfs_dir2_format.h"
+#include "xfs_dir2.h"
 #include "xfs_dir2_priv.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
@@ -399,7 +399,7 @@
 	struct xfs_da_intnode	*node;
 	struct xfs_buf		*bp;
 	int			max;
-	int			action;
+	int			action = 0;
 	int			error;
 	int			i;
 
@@ -2454,9 +2454,9 @@
 xfs_buf_map_from_irec(
 	struct xfs_mount	*mp,
 	struct xfs_buf_map	**mapp,
-	unsigned int		*nmaps,
+	int			*nmaps,
 	struct xfs_bmbt_irec	*irecs,
-	unsigned int		nirecs)
+	int			nirecs)
 {
 	struct xfs_buf_map	*map;
 	int			i;
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 6fb3371c..b1f2679 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -133,12 +133,19 @@
 				     struct xfs_da3_icnode_hdr *from);
 
 static inline int
-xfs_da3_node_hdr_size(struct xfs_da_intnode *dap)
+__xfs_da3_node_hdr_size(bool v3)
 {
-	if (dap->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC))
+	if (v3)
 		return sizeof(struct xfs_da3_node_hdr);
 	return sizeof(struct xfs_da_node_hdr);
 }
+static inline int
+xfs_da3_node_hdr_size(struct xfs_da_intnode *dap)
+{
+	bool	v3 = dap->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC);
+
+	return __xfs_da3_node_hdr_size(v3);
+}
 
 static inline struct xfs_da_node_entry *
 xfs_da3_node_tree_p(struct xfs_da_intnode *dap)
@@ -176,6 +183,7 @@
 typedef struct xfs_da_args {
 	const __uint8_t	*name;		/* string (maybe not NULL terminated) */
 	int		namelen;	/* length of string (maybe no NULL) */
+	__uint8_t	filetype;	/* filetype of inode for directories */
 	__uint8_t	*value;		/* set of bytes (maybe contain NULLs) */
 	int		valuelen;	/* length of value */
 	int		flags;		/* argument flags (eg: ATTR_NOCREATE) */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
deleted file mode 100644
index e36445c..0000000
--- a/fs/xfs/xfs_dfrag.c
+++ /dev/null
@@ -1,459 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_log.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_inode_item.h"
-#include "xfs_bmap.h"
-#include "xfs_itable.h"
-#include "xfs_dfrag.h"
-#include "xfs_error.h"
-#include "xfs_vnodeops.h"
-#include "xfs_trace.h"
-
-
-static int xfs_swap_extents(
-	xfs_inode_t	*ip,	/* target inode */
-	xfs_inode_t	*tip,	/* tmp inode */
-	xfs_swapext_t	*sxp);
-
-/*
- * ioctl interface for swapext
- */
-int
-xfs_swapext(
-	xfs_swapext_t	*sxp)
-{
-	xfs_inode_t     *ip, *tip;
-	struct fd	f, tmp;
-	int		error = 0;
-
-	/* Pull information for the target fd */
-	f = fdget((int)sxp->sx_fdtarget);
-	if (!f.file) {
-		error = XFS_ERROR(EINVAL);
-		goto out;
-	}
-
-	if (!(f.file->f_mode & FMODE_WRITE) ||
-	    !(f.file->f_mode & FMODE_READ) ||
-	    (f.file->f_flags & O_APPEND)) {
-		error = XFS_ERROR(EBADF);
-		goto out_put_file;
-	}
-
-	tmp = fdget((int)sxp->sx_fdtmp);
-	if (!tmp.file) {
-		error = XFS_ERROR(EINVAL);
-		goto out_put_file;
-	}
-
-	if (!(tmp.file->f_mode & FMODE_WRITE) ||
-	    !(tmp.file->f_mode & FMODE_READ) ||
-	    (tmp.file->f_flags & O_APPEND)) {
-		error = XFS_ERROR(EBADF);
-		goto out_put_tmp_file;
-	}
-
-	if (IS_SWAPFILE(file_inode(f.file)) ||
-	    IS_SWAPFILE(file_inode(tmp.file))) {
-		error = XFS_ERROR(EINVAL);
-		goto out_put_tmp_file;
-	}
-
-	ip = XFS_I(file_inode(f.file));
-	tip = XFS_I(file_inode(tmp.file));
-
-	if (ip->i_mount != tip->i_mount) {
-		error = XFS_ERROR(EINVAL);
-		goto out_put_tmp_file;
-	}
-
-	if (ip->i_ino == tip->i_ino) {
-		error = XFS_ERROR(EINVAL);
-		goto out_put_tmp_file;
-	}
-
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-		error = XFS_ERROR(EIO);
-		goto out_put_tmp_file;
-	}
-
-	error = xfs_swap_extents(ip, tip, sxp);
-
- out_put_tmp_file:
-	fdput(tmp);
- out_put_file:
-	fdput(f);
- out:
-	return error;
-}
-
-/*
- * We need to check that the format of the data fork in the temporary inode is
- * valid for the target inode before doing the swap. This is not a problem with
- * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
- * data fork depending on the space the attribute fork is taking so we can get
- * invalid formats on the target inode.
- *
- * E.g. target has space for 7 extents in extent format, temp inode only has
- * space for 6.  If we defragment down to 7 extents, then the tmp format is a
- * btree, but when swapped it needs to be in extent format. Hence we can't just
- * blindly swap data forks on attr2 filesystems.
- *
- * Note that we check the swap in both directions so that we don't end up with
- * a corrupt temporary inode, either.
- *
- * Note that fixing the way xfs_fsr sets up the attribute fork in the source
- * inode will prevent this situation from occurring, so all we do here is
- * reject and log the attempt. basically we are putting the responsibility on
- * userspace to get this right.
- */
-static int
-xfs_swap_extents_check_format(
-	xfs_inode_t	*ip,	/* target inode */
-	xfs_inode_t	*tip)	/* tmp inode */
-{
-
-	/* Should never get a local format */
-	if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
-	    tip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
-		return EINVAL;
-
-	/*
-	 * if the target inode has less extents that then temporary inode then
-	 * why did userspace call us?
-	 */
-	if (ip->i_d.di_nextents < tip->i_d.di_nextents)
-		return EINVAL;
-
-	/*
-	 * if the target inode is in extent form and the temp inode is in btree
-	 * form then we will end up with the target inode in the wrong format
-	 * as we already know there are less extents in the temp inode.
-	 */
-	if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
-	    tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
-		return EINVAL;
-
-	/* Check temp in extent form to max in target */
-	if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
-	    XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) >
-			XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
-		return EINVAL;
-
-	/* Check target in extent form to max in temp */
-	if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
-	    XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) >
-			XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
-		return EINVAL;
-
-	/*
-	 * If we are in a btree format, check that the temp root block will fit
-	 * in the target and that it has enough extents to be in btree format
-	 * in the target.
-	 *
-	 * Note that we have to be careful to allow btree->extent conversions
-	 * (a common defrag case) which will occur when the temp inode is in
-	 * extent format...
-	 */
-	if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
-		if (XFS_IFORK_BOFF(ip) &&
-		    XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip))
-			return EINVAL;
-		if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <=
-		    XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
-			return EINVAL;
-	}
-
-	/* Reciprocal target->temp btree format checks */
-	if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
-		if (XFS_IFORK_BOFF(tip) &&
-		    XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip))
-			return EINVAL;
-		if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <=
-		    XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
-			return EINVAL;
-	}
-
-	return 0;
-}
-
-static int
-xfs_swap_extents(
-	xfs_inode_t	*ip,	/* target inode */
-	xfs_inode_t	*tip,	/* tmp inode */
-	xfs_swapext_t	*sxp)
-{
-	xfs_mount_t	*mp = ip->i_mount;
-	xfs_trans_t	*tp;
-	xfs_bstat_t	*sbp = &sxp->sx_stat;
-	xfs_ifork_t	*tempifp, *ifp, *tifp;
-	int		src_log_flags, target_log_flags;
-	int		error = 0;
-	int		aforkblks = 0;
-	int		taforkblks = 0;
-	__uint64_t	tmp;
-
-	/*
-	 * We have no way of updating owner information in the BMBT blocks for
-	 * each inode on CRC enabled filesystems, so to avoid corrupting the
-	 * this metadata we simply don't allow extent swaps to occur.
-	 */
-	if (xfs_sb_version_hascrc(&mp->m_sb))
-		return XFS_ERROR(EINVAL);
-
-	tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
-	if (!tempifp) {
-		error = XFS_ERROR(ENOMEM);
-		goto out;
-	}
-
-	/*
-	 * we have to do two separate lock calls here to keep lockdep
-	 * happy. If we try to get all the locks in one call, lock will
-	 * report false positives when we drop the ILOCK and regain them
-	 * below.
-	 */
-	xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
-	xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
-
-	/* Verify that both files have the same format */
-	if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
-		error = XFS_ERROR(EINVAL);
-		goto out_unlock;
-	}
-
-	/* Verify both files are either real-time or non-realtime */
-	if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) {
-		error = XFS_ERROR(EINVAL);
-		goto out_unlock;
-	}
-
-	error = -filemap_write_and_wait(VFS_I(tip)->i_mapping);
-	if (error)
-		goto out_unlock;
-	truncate_pagecache_range(VFS_I(tip), 0, -1);
-
-	/* Verify O_DIRECT for ftmp */
-	if (VN_CACHED(VFS_I(tip)) != 0) {
-		error = XFS_ERROR(EINVAL);
-		goto out_unlock;
-	}
-
-	/* Verify all data are being swapped */
-	if (sxp->sx_offset != 0 ||
-	    sxp->sx_length != ip->i_d.di_size ||
-	    sxp->sx_length != tip->i_d.di_size) {
-		error = XFS_ERROR(EFAULT);
-		goto out_unlock;
-	}
-
-	trace_xfs_swap_extent_before(ip, 0);
-	trace_xfs_swap_extent_before(tip, 1);
-
-	/* check inode formats now that data is flushed */
-	error = xfs_swap_extents_check_format(ip, tip);
-	if (error) {
-		xfs_notice(mp,
-		    "%s: inode 0x%llx format is incompatible for exchanging.",
-				__func__, ip->i_ino);
-		goto out_unlock;
-	}
-
-	/*
-	 * Compare the current change & modify times with that
-	 * passed in.  If they differ, we abort this swap.
-	 * This is the mechanism used to ensure the calling
-	 * process that the file was not changed out from
-	 * under it.
-	 */
-	if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) ||
-	    (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) ||
-	    (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) ||
-	    (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) {
-		error = XFS_ERROR(EBUSY);
-		goto out_unlock;
-	}
-
-	/* We need to fail if the file is memory mapped.  Once we have tossed
-	 * all existing pages, the page fault will have no option
-	 * but to go to the filesystem for pages. By making the page fault call
-	 * vop_read (or write in the case of autogrow) they block on the iolock
-	 * until we have switched the extents.
-	 */
-	if (VN_MAPPED(VFS_I(ip))) {
-		error = XFS_ERROR(EBUSY);
-		goto out_unlock;
-	}
-
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	xfs_iunlock(tip, XFS_ILOCK_EXCL);
-
-	/*
-	 * There is a race condition here since we gave up the
-	 * ilock.  However, the data fork will not change since
-	 * we have the iolock (locked for truncation too) so we
-	 * are safe.  We don't really care if non-io related
-	 * fields change.
-	 */
-	truncate_pagecache_range(VFS_I(ip), 0, -1);
-
-	tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
-	if ((error = xfs_trans_reserve(tp, 0,
-				     XFS_ICHANGE_LOG_RES(mp), 0,
-				     0, 0))) {
-		xfs_iunlock(ip,  XFS_IOLOCK_EXCL);
-		xfs_iunlock(tip, XFS_IOLOCK_EXCL);
-		xfs_trans_cancel(tp, 0);
-		goto out;
-	}
-	xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
-
-	/*
-	 * Count the number of extended attribute blocks
-	 */
-	if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) &&
-	     (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
-		error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks);
-		if (error)
-			goto out_trans_cancel;
-	}
-	if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) &&
-	     (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
-		error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK,
-			&taforkblks);
-		if (error)
-			goto out_trans_cancel;
-	}
-
-	/*
-	 * Swap the data forks of the inodes
-	 */
-	ifp = &ip->i_df;
-	tifp = &tip->i_df;
-	*tempifp = *ifp;	/* struct copy */
-	*ifp = *tifp;		/* struct copy */
-	*tifp = *tempifp;	/* struct copy */
-
-	/*
-	 * Fix the on-disk inode values
-	 */
-	tmp = (__uint64_t)ip->i_d.di_nblocks;
-	ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks;
-	tip->i_d.di_nblocks = tmp + taforkblks - aforkblks;
-
-	tmp = (__uint64_t) ip->i_d.di_nextents;
-	ip->i_d.di_nextents = tip->i_d.di_nextents;
-	tip->i_d.di_nextents = tmp;
-
-	tmp = (__uint64_t) ip->i_d.di_format;
-	ip->i_d.di_format = tip->i_d.di_format;
-	tip->i_d.di_format = tmp;
-
-	/*
-	 * The extents in the source inode could still contain speculative
-	 * preallocation beyond EOF (e.g. the file is open but not modified
-	 * while defrag is in progress). In that case, we need to copy over the
-	 * number of delalloc blocks the data fork in the source inode is
-	 * tracking beyond EOF so that when the fork is truncated away when the
-	 * temporary inode is unlinked we don't underrun the i_delayed_blks
-	 * counter on that inode.
-	 */
-	ASSERT(tip->i_delayed_blks == 0);
-	tip->i_delayed_blks = ip->i_delayed_blks;
-	ip->i_delayed_blks = 0;
-
-	src_log_flags = XFS_ILOG_CORE;
-	switch (ip->i_d.di_format) {
-	case XFS_DINODE_FMT_EXTENTS:
-		/* If the extents fit in the inode, fix the
-		 * pointer.  Otherwise it's already NULL or
-		 * pointing to the extent.
-		 */
-		if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) {
-			ifp->if_u1.if_extents =
-				ifp->if_u2.if_inline_ext;
-		}
-		src_log_flags |= XFS_ILOG_DEXT;
-		break;
-	case XFS_DINODE_FMT_BTREE:
-		src_log_flags |= XFS_ILOG_DBROOT;
-		break;
-	}
-
-	target_log_flags = XFS_ILOG_CORE;
-	switch (tip->i_d.di_format) {
-	case XFS_DINODE_FMT_EXTENTS:
-		/* If the extents fit in the inode, fix the
-		 * pointer.  Otherwise it's already NULL or
-		 * pointing to the extent.
-		 */
-		if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) {
-			tifp->if_u1.if_extents =
-				tifp->if_u2.if_inline_ext;
-		}
-		target_log_flags |= XFS_ILOG_DEXT;
-		break;
-	case XFS_DINODE_FMT_BTREE:
-		target_log_flags |= XFS_ILOG_DBROOT;
-		break;
-	}
-
-
-	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-	xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-
-	xfs_trans_log_inode(tp, ip,  src_log_flags);
-	xfs_trans_log_inode(tp, tip, target_log_flags);
-
-	/*
-	 * If this is a synchronous mount, make sure that the
-	 * transaction goes to disk before returning to the user.
-	 */
-	if (mp->m_flags & XFS_MOUNT_WSYNC)
-		xfs_trans_set_sync(tp);
-
-	error = xfs_trans_commit(tp, 0);
-
-	trace_xfs_swap_extent_after(ip, 0);
-	trace_xfs_swap_extent_after(tip, 1);
-out:
-	kmem_free(tempifp);
-	return error;
-
-out_unlock:
-	xfs_iunlock(ip,  XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-	xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-	goto out;
-
-out_trans_cancel:
-	xfs_trans_cancel(tp, 0);
-	goto out_unlock;
-}
diff --git a/fs/xfs/xfs_dfrag.h b/fs/xfs/xfs_dfrag.h
deleted file mode 100644
index 20bdd93..0000000
--- a/fs/xfs/xfs_dfrag.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2000,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_DFRAG_H__
-#define	__XFS_DFRAG_H__
-
-/*
- * Structure passed to xfs_swapext
- */
-
-typedef struct xfs_swapext
-{
-	__int64_t	sx_version;	/* version */
-	__int64_t	sx_fdtarget;	/* fd of target file */
-	__int64_t	sx_fdtmp;	/* fd of tmp file */
-	xfs_off_t	sx_offset;	/* offset into file */
-	xfs_off_t	sx_length;	/* leng from offset */
-	char		sx_pad[16];	/* pad space, unused */
-	xfs_bstat_t	sx_stat;	/* stat of target b4 copy */
-} xfs_swapext_t;
-
-/*
- * Version flag
- */
-#define XFS_SX_VERSION		0
-
-#ifdef __KERNEL__
-/*
- * Prototypes for visible xfs_dfrag.c routines.
- */
-
-/*
- * Syscall interface for xfs_swapext
- */
-int	xfs_swapext(struct xfs_swapext *sx);
-
-#endif	/* __KERNEL__ */
-
-#endif	/* __XFS_DFRAG_H__ */
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 8f023de..edf203a 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -31,14 +31,14 @@
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
 #include "xfs_bmap.h"
-#include "xfs_dir2.h"
 #include "xfs_dir2_format.h"
+#include "xfs_dir2.h"
 #include "xfs_dir2_priv.h"
 #include "xfs_error.h"
-#include "xfs_vnodeops.h"
 #include "xfs_trace.h"
 
-struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2};
+struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR };
+
 
 /*
  * ASCII case-insensitive (ie. A-Z) support for directories that was
@@ -90,6 +90,9 @@
 xfs_dir_mount(
 	xfs_mount_t	*mp)
 {
+	int	nodehdr_size;
+
+
 	ASSERT(xfs_sb_version_hasdirv2(&mp->m_sb));
 	ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <=
 	       XFS_MAX_BLOCKSIZE);
@@ -98,12 +101,13 @@
 	mp->m_dirdatablk = xfs_dir2_db_to_da(mp, XFS_DIR2_DATA_FIRSTDB(mp));
 	mp->m_dirleafblk = xfs_dir2_db_to_da(mp, XFS_DIR2_LEAF_FIRSTDB(mp));
 	mp->m_dirfreeblk = xfs_dir2_db_to_da(mp, XFS_DIR2_FREE_FIRSTDB(mp));
-	mp->m_attr_node_ents =
-		(mp->m_sb.sb_blocksize - (uint)sizeof(xfs_da_node_hdr_t)) /
-		(uint)sizeof(xfs_da_node_entry_t);
-	mp->m_dir_node_ents =
-		(mp->m_dirblksize - (uint)sizeof(xfs_da_node_hdr_t)) /
-		(uint)sizeof(xfs_da_node_entry_t);
+
+	nodehdr_size = __xfs_da3_node_hdr_size(xfs_sb_version_hascrc(&mp->m_sb));
+	mp->m_attr_node_ents = (mp->m_sb.sb_blocksize - nodehdr_size) /
+				(uint)sizeof(xfs_da_node_entry_t);
+	mp->m_dir_node_ents = (mp->m_dirblksize - nodehdr_size) /
+				(uint)sizeof(xfs_da_node_entry_t);
+
 	mp->m_dir_magicpct = (mp->m_dirblksize * 37) / 100;
 	if (xfs_sb_version_hasasciici(&mp->m_sb))
 		mp->m_dirnameops = &xfs_ascii_ci_nameops;
@@ -209,6 +213,7 @@
 	memset(&args, 0, sizeof(xfs_da_args_t));
 	args.name = name->name;
 	args.namelen = name->len;
+	args.filetype = name->type;
 	args.hashval = dp->i_mount->m_dirnameops->hashname(name);
 	args.inumber = inum;
 	args.dp = dp;
@@ -283,6 +288,7 @@
 	memset(&args, 0, sizeof(xfs_da_args_t));
 	args.name = name->name;
 	args.namelen = name->len;
+	args.filetype = name->type;
 	args.hashval = dp->i_mount->m_dirnameops->hashname(name);
 	args.dp = dp;
 	args.whichfork = XFS_DATA_FORK;
@@ -338,6 +344,7 @@
 	memset(&args, 0, sizeof(xfs_da_args_t));
 	args.name = name->name;
 	args.namelen = name->len;
+	args.filetype = name->type;
 	args.hashval = dp->i_mount->m_dirnameops->hashname(name);
 	args.inumber = ino;
 	args.dp = dp;
@@ -363,37 +370,6 @@
 }
 
 /*
- * Read a directory.
- */
-int
-xfs_readdir(
-	xfs_inode_t	*dp,
-	struct dir_context *ctx,
-	size_t		bufsize)
-{
-	int		rval;		/* return value */
-	int		v;		/* type-checking value */
-
-	trace_xfs_readdir(dp);
-
-	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
-		return XFS_ERROR(EIO);
-
-	ASSERT(S_ISDIR(dp->i_d.di_mode));
-	XFS_STATS_INC(xs_dir_getdents);
-
-	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
-		rval = xfs_dir2_sf_getdents(dp, ctx);
-	else if ((rval = xfs_dir2_isblock(NULL, dp, &v)))
-		;
-	else if (v)
-		rval = xfs_dir2_block_getdents(dp, ctx);
-	else
-		rval = xfs_dir2_leaf_getdents(dp, ctx, bufsize);
-	return rval;
-}
-
-/*
  * Replace the inode number of a directory entry.
  */
 int
@@ -418,6 +394,7 @@
 	memset(&args, 0, sizeof(xfs_da_args_t));
 	args.name = name->name;
 	args.namelen = name->len;
+	args.filetype = name->type;
 	args.hashval = dp->i_mount->m_dirnameops->hashname(name);
 	args.inumber = inum;
 	args.dp = dp;
@@ -465,6 +442,7 @@
 	memset(&args, 0, sizeof(xfs_da_args_t));
 	args.name = name->name;
 	args.namelen = name->len;
+	args.filetype = name->type;
 	args.hashval = dp->i_mount->m_dirnameops->hashname(name);
 	args.dp = dp;
 	args.whichfork = XFS_DATA_FORK;
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index e937d99..9910401 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -23,6 +23,11 @@
 struct xfs_inode;
 struct xfs_mount;
 struct xfs_trans;
+struct xfs_dir2_sf_hdr;
+struct xfs_dir2_sf_entry;
+struct xfs_dir2_data_hdr;
+struct xfs_dir2_data_entry;
+struct xfs_dir2_data_unused;
 
 extern struct xfs_name	xfs_name_dotdot;
 
@@ -57,4 +62,45 @@
  */
 extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);
 
+/*
+ * Interface routines used by userspace utilities
+ */
+extern xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *sfp);
+extern void xfs_dir2_sf_put_parent_ino(struct xfs_dir2_sf_hdr *sfp,
+		xfs_ino_t ino);
+extern xfs_ino_t xfs_dir3_sfe_get_ino(struct xfs_mount *mp,
+		struct xfs_dir2_sf_hdr *sfp, struct xfs_dir2_sf_entry *sfep);
+extern void xfs_dir3_sfe_put_ino(struct xfs_mount *mp,
+		struct xfs_dir2_sf_hdr *hdr, struct xfs_dir2_sf_entry *sfep,
+		xfs_ino_t ino);
+
+extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
+extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
+extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
+				struct xfs_buf *bp);
+
+extern void xfs_dir2_data_freescan(struct xfs_mount *mp,
+		struct xfs_dir2_data_hdr *hdr, int *loghead);
+extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_buf *bp,
+		struct xfs_dir2_data_entry *dep);
+extern void xfs_dir2_data_log_header(struct xfs_trans *tp,
+		struct xfs_buf *bp);
+extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_buf *bp,
+		struct xfs_dir2_data_unused *dup);
+extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_buf *bp,
+		xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len,
+		int *needlogp, int *needscanp);
+extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_buf *bp,
+		struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset,
+		xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
+
+extern struct xfs_dir2_data_free *xfs_dir2_data_freefind(
+		struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_unused *dup);
+
+extern const struct xfs_buf_ops xfs_dir3_block_buf_ops;
+extern const struct xfs_buf_ops xfs_dir3_leafn_buf_ops;
+extern const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops;
+extern const struct xfs_buf_ops xfs_dir3_free_buf_ops;
+extern const struct xfs_buf_ops xfs_dir3_data_buf_ops;
+
 #endif	/* __XFS_DIR2_H__ */
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 5e7fbd7..0957aa9 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -31,8 +31,8 @@
 #include "xfs_inode_item.h"
 #include "xfs_bmap.h"
 #include "xfs_buf_item.h"
-#include "xfs_dir2.h"
 #include "xfs_dir2_format.h"
+#include "xfs_dir2.h"
 #include "xfs_dir2_priv.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
@@ -126,7 +126,7 @@
 	.verify_write = xfs_dir3_block_write_verify,
 };
 
-static int
+int
 xfs_dir3_block_read(
 	struct xfs_trans	*tp,
 	struct xfs_inode	*dp,
@@ -369,7 +369,7 @@
 	if (error)
 		return error;
 
-	len = xfs_dir2_data_entsize(args->namelen);
+	len = xfs_dir3_data_entsize(mp, args->namelen);
 
 	/*
 	 * Set up pointers to parts of the block.
@@ -549,7 +549,8 @@
 	dep->inumber = cpu_to_be64(args->inumber);
 	dep->namelen = args->namelen;
 	memcpy(dep->name, args->name, args->namelen);
-	tagp = xfs_dir2_data_entry_tag_p(dep);
+	xfs_dir3_dirent_put_ftype(mp, dep, args->filetype);
+	tagp = xfs_dir3_data_entry_tag_p(mp, dep);
 	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
 	/*
 	 * Clean up the bestfree array and log the header, tail, and entry.
@@ -565,101 +566,6 @@
 }
 
 /*
- * Readdir for block directories.
- */
-int						/* error */
-xfs_dir2_block_getdents(
-	xfs_inode_t		*dp,		/* incore inode */
-	struct dir_context	*ctx)
-{
-	xfs_dir2_data_hdr_t	*hdr;		/* block header */
-	struct xfs_buf		*bp;		/* buffer for block */
-	xfs_dir2_block_tail_t	*btp;		/* block tail */
-	xfs_dir2_data_entry_t	*dep;		/* block data entry */
-	xfs_dir2_data_unused_t	*dup;		/* block unused entry */
-	char			*endptr;	/* end of the data entries */
-	int			error;		/* error return value */
-	xfs_mount_t		*mp;		/* filesystem mount point */
-	char			*ptr;		/* current data entry */
-	int			wantoff;	/* starting block offset */
-	xfs_off_t		cook;
-
-	mp = dp->i_mount;
-	/*
-	 * If the block number in the offset is out of range, we're done.
-	 */
-	if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk)
-		return 0;
-
-	error = xfs_dir3_block_read(NULL, dp, &bp);
-	if (error)
-		return error;
-
-	/*
-	 * Extract the byte offset we start at from the seek pointer.
-	 * We'll skip entries before this.
-	 */
-	wantoff = xfs_dir2_dataptr_to_off(mp, ctx->pos);
-	hdr = bp->b_addr;
-	xfs_dir3_data_check(dp, bp);
-	/*
-	 * Set up values for the loop.
-	 */
-	btp = xfs_dir2_block_tail_p(mp, hdr);
-	ptr = (char *)xfs_dir3_data_entry_p(hdr);
-	endptr = (char *)xfs_dir2_block_leaf_p(btp);
-
-	/*
-	 * Loop over the data portion of the block.
-	 * Each object is a real entry (dep) or an unused one (dup).
-	 */
-	while (ptr < endptr) {
-		dup = (xfs_dir2_data_unused_t *)ptr;
-		/*
-		 * Unused, skip it.
-		 */
-		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
-			ptr += be16_to_cpu(dup->length);
-			continue;
-		}
-
-		dep = (xfs_dir2_data_entry_t *)ptr;
-
-		/*
-		 * Bump pointer for the next iteration.
-		 */
-		ptr += xfs_dir2_data_entsize(dep->namelen);
-		/*
-		 * The entry is before the desired starting point, skip it.
-		 */
-		if ((char *)dep - (char *)hdr < wantoff)
-			continue;
-
-		cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
-					    (char *)dep - (char *)hdr);
-
-		ctx->pos = cook & 0x7fffffff;
-		/*
-		 * If it didn't fit, set the final offset to here & return.
-		 */
-		if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
-			    be64_to_cpu(dep->inumber), DT_UNKNOWN)) {
-			xfs_trans_brelse(NULL, bp);
-			return 0;
-		}
-	}
-
-	/*
-	 * Reached the end of the block.
-	 * Set the offset to a non-existent block 1 and return.
-	 */
-	ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) &
-			0x7fffffff;
-	xfs_trans_brelse(NULL, bp);
-	return 0;
-}
-
-/*
  * Log leaf entries from the block.
  */
 static void
@@ -736,6 +642,7 @@
 	 * Fill in inode number, CI name if appropriate, release the block.
 	 */
 	args->inumber = be64_to_cpu(dep->inumber);
+	args->filetype = xfs_dir3_dirent_get_ftype(mp, dep);
 	error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
 	xfs_trans_brelse(args->trans, bp);
 	return XFS_ERROR(error);
@@ -894,7 +801,7 @@
 	needlog = needscan = 0;
 	xfs_dir2_data_make_free(tp, bp,
 		(xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
-		xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
+		xfs_dir3_data_entsize(mp, dep->namelen), &needlog, &needscan);
 	/*
 	 * Fix up the block tail.
 	 */
@@ -968,6 +875,7 @@
 	 * Change the inode number to the new value.
 	 */
 	dep->inumber = cpu_to_be64(args->inumber);
+	xfs_dir3_dirent_put_ftype(mp, dep, args->filetype);
 	xfs_dir2_data_log_entry(args->trans, bp, dep);
 	xfs_dir3_data_check(dp, bp);
 	return 0;
@@ -1254,7 +1162,8 @@
 	dep->inumber = cpu_to_be64(dp->i_ino);
 	dep->namelen = 1;
 	dep->name[0] = '.';
-	tagp = xfs_dir2_data_entry_tag_p(dep);
+	xfs_dir3_dirent_put_ftype(mp, dep, XFS_DIR3_FT_DIR);
+	tagp = xfs_dir3_data_entry_tag_p(mp, dep);
 	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
 	xfs_dir2_data_log_entry(tp, bp, dep);
 	blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
@@ -1267,7 +1176,8 @@
 	dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp));
 	dep->namelen = 2;
 	dep->name[0] = dep->name[1] = '.';
-	tagp = xfs_dir2_data_entry_tag_p(dep);
+	xfs_dir3_dirent_put_ftype(mp, dep, XFS_DIR3_FT_DIR);
+	tagp = xfs_dir3_data_entry_tag_p(mp, dep);
 	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
 	xfs_dir2_data_log_entry(tp, bp, dep);
 	blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
@@ -1312,10 +1222,12 @@
 		 * Copy a real entry.
 		 */
 		dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset);
-		dep->inumber = cpu_to_be64(xfs_dir2_sfe_get_ino(sfp, sfep));
+		dep->inumber = cpu_to_be64(xfs_dir3_sfe_get_ino(mp, sfp, sfep));
 		dep->namelen = sfep->namelen;
+		xfs_dir3_dirent_put_ftype(mp, dep,
+					xfs_dir3_sfe_get_ftype(mp, sfp, sfep));
 		memcpy(dep->name, sfep->name, dep->namelen);
-		tagp = xfs_dir2_data_entry_tag_p(dep);
+		tagp = xfs_dir3_data_entry_tag_p(mp, dep);
 		*tagp = cpu_to_be16((char *)dep - (char *)hdr);
 		xfs_dir2_data_log_entry(tp, bp, dep);
 		name.name = sfep->name;
@@ -1328,7 +1240,7 @@
 		if (++i == sfp->count)
 			sfep = NULL;
 		else
-			sfep = xfs_dir2_sf_nextentry(sfp, sfep);
+			sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep);
 	}
 	/* Done with the temporary buffer */
 	kmem_free(sfp);
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index c293023..47e1326 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -29,14 +29,12 @@
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_dir2_format.h"
+#include "xfs_dir2.h"
 #include "xfs_dir2_priv.h"
 #include "xfs_error.h"
 #include "xfs_buf_item.h"
 #include "xfs_cksum.h"
 
-STATIC xfs_dir2_data_free_t *
-xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup);
-
 /*
  * Check the consistency of the data block.
  * The input can also be a block-format directory.
@@ -149,8 +147,10 @@
 		XFS_WANT_CORRUPTED_RETURN(
 			!xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)));
 		XFS_WANT_CORRUPTED_RETURN(
-			be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) ==
+			be16_to_cpu(*xfs_dir3_data_entry_tag_p(mp, dep)) ==
 					       (char *)dep - (char *)hdr);
+		XFS_WANT_CORRUPTED_RETURN(
+			xfs_dir3_dirent_get_ftype(mp, dep) < XFS_DIR3_FT_MAX);
 		count++;
 		lastfree = 0;
 		if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
@@ -168,7 +168,7 @@
 			}
 			XFS_WANT_CORRUPTED_RETURN(i < be32_to_cpu(btp->count));
 		}
-		p += xfs_dir2_data_entsize(dep->namelen);
+		p += xfs_dir3_data_entsize(mp, dep->namelen);
 	}
 	/*
 	 * Need to have seen all the entries and all the bestfree slots.
@@ -325,7 +325,7 @@
  * Given a data block and an unused entry from that block,
  * return the bestfree entry if any that corresponds to it.
  */
-STATIC xfs_dir2_data_free_t *
+xfs_dir2_data_free_t *
 xfs_dir2_data_freefind(
 	xfs_dir2_data_hdr_t	*hdr,		/* data block */
 	xfs_dir2_data_unused_t	*dup)		/* data unused entry */
@@ -333,7 +333,7 @@
 	xfs_dir2_data_free_t	*dfp;		/* bestfree entry */
 	xfs_dir2_data_aoff_t	off;		/* offset value needed */
 	struct xfs_dir2_data_free *bf;
-#if defined(DEBUG) && defined(__KERNEL__)
+#ifdef DEBUG
 	int			matched;	/* matched the value */
 	int			seenzero;	/* saw a 0 bestfree entry */
 #endif
@@ -341,7 +341,7 @@
 	off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr);
 	bf = xfs_dir3_data_bestfree_p(hdr);
 
-#if defined(DEBUG) && defined(__KERNEL__)
+#ifdef DEBUG
 	/*
 	 * Validate some consistency in the bestfree table.
 	 * Check order, non-overlapping entries, and if we find the
@@ -538,8 +538,8 @@
 		else {
 			dep = (xfs_dir2_data_entry_t *)p;
 			ASSERT((char *)dep - (char *)hdr ==
-			       be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)));
-			p += xfs_dir2_data_entsize(dep->namelen);
+			       be16_to_cpu(*xfs_dir3_data_entry_tag_p(mp, dep)));
+			p += xfs_dir3_data_entsize(mp, dep->namelen);
 		}
 	}
 }
@@ -629,7 +629,8 @@
 	struct xfs_buf		*bp,
 	xfs_dir2_data_entry_t	*dep)		/* data entry pointer */
 {
-	xfs_dir2_data_hdr_t	*hdr = bp->b_addr;
+	struct xfs_dir2_data_hdr *hdr = bp->b_addr;
+	struct xfs_mount	*mp = tp->t_mountp;
 
 	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
 	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
@@ -637,7 +638,7 @@
 	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
 
 	xfs_trans_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr),
-		(uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) -
+		(uint)((char *)(xfs_dir3_data_entry_tag_p(mp, dep) + 1) -
 		       (char *)hdr - 1));
 }
 
diff --git a/fs/xfs/xfs_dir2_format.h b/fs/xfs/xfs_dir2_format.h
index 7826782b..a0961a6 100644
--- a/fs/xfs/xfs_dir2_format.h
+++ b/fs/xfs/xfs_dir2_format.h
@@ -69,6 +69,23 @@
 #define	XFS_DIR3_FREE_MAGIC	0x58444633	/* XDF3: free index blocks */
 
 /*
+ * Dirents in version 3 directories have a file type field. Additions to this
+ * list are an on-disk format change, requiring feature bits. Valid values
+ * are as follows:
+ */
+#define XFS_DIR3_FT_UNKNOWN		0
+#define XFS_DIR3_FT_REG_FILE		1
+#define XFS_DIR3_FT_DIR			2
+#define XFS_DIR3_FT_CHRDEV		3
+#define XFS_DIR3_FT_BLKDEV		4
+#define XFS_DIR3_FT_FIFO		5
+#define XFS_DIR3_FT_SOCK		6
+#define XFS_DIR3_FT_SYMLINK		7
+#define XFS_DIR3_FT_WHT			8
+
+#define XFS_DIR3_FT_MAX			9
+
+/*
  * Byte offset in data block and shortform entry.
  */
 typedef	__uint16_t	xfs_dir2_data_off_t;
@@ -138,6 +155,9 @@
 	xfs_dir2_sf_off_t	offset;		/* saved offset */
 	__u8			name[];		/* name, variable size */
 	/*
+	 * A single byte containing the file type field follows the inode
+	 * number for version 3 directory entries.
+	 *
 	 * A xfs_dir2_ino8_t or xfs_dir2_ino4_t follows here, at a
 	 * variable offset after the name.
 	 */
@@ -162,16 +182,6 @@
 	put_unaligned_be16(off, &sfep->offset.i);
 }
 
-static inline int
-xfs_dir2_sf_entsize(struct xfs_dir2_sf_hdr *hdr, int len)
-{
-	return sizeof(struct xfs_dir2_sf_entry) +	/* namelen + offset */
-		len +					/* name */
-		(hdr->i8count ?				/* ino */
-		 sizeof(xfs_dir2_ino8_t) :
-		 sizeof(xfs_dir2_ino4_t));
-}
-
 static inline struct xfs_dir2_sf_entry *
 xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr)
 {
@@ -179,14 +189,78 @@
 		((char *)hdr + xfs_dir2_sf_hdr_size(hdr->i8count));
 }
 
-static inline struct xfs_dir2_sf_entry *
-xfs_dir2_sf_nextentry(struct xfs_dir2_sf_hdr *hdr,
-		struct xfs_dir2_sf_entry *sfep)
+static inline int
+xfs_dir3_sf_entsize(
+	struct xfs_mount	*mp,
+	struct xfs_dir2_sf_hdr	*hdr,
+	int			len)
 {
-	return (struct xfs_dir2_sf_entry *)
-		((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen));
+	int count = sizeof(struct xfs_dir2_sf_entry); 	/* namelen + offset */
+
+	count += len;					/* name */
+	count += hdr->i8count ? sizeof(xfs_dir2_ino8_t) :
+				sizeof(xfs_dir2_ino4_t); /* ino # */
+	if (xfs_sb_version_hasftype(&mp->m_sb))
+		count += sizeof(__uint8_t);		/* file type */
+	return count;
 }
 
+static inline struct xfs_dir2_sf_entry *
+xfs_dir3_sf_nextentry(
+	struct xfs_mount	*mp,
+	struct xfs_dir2_sf_hdr	*hdr,
+	struct xfs_dir2_sf_entry *sfep)
+{
+	return (struct xfs_dir2_sf_entry *)
+		((char *)sfep + xfs_dir3_sf_entsize(mp, hdr, sfep->namelen));
+}
+
+/*
+ * in dir3 shortform directories, the file type field is stored at a variable
+ * offset after the inode number. Because it's only a single byte, endian
+ * conversion is not necessary.
+ */
+static inline __uint8_t *
+xfs_dir3_sfe_ftypep(
+	struct xfs_dir2_sf_hdr	*hdr,
+	struct xfs_dir2_sf_entry *sfep)
+{
+	return (__uint8_t *)&sfep->name[sfep->namelen];
+}
+
+static inline __uint8_t
+xfs_dir3_sfe_get_ftype(
+	struct xfs_mount	*mp,
+	struct xfs_dir2_sf_hdr	*hdr,
+	struct xfs_dir2_sf_entry *sfep)
+{
+	__uint8_t	*ftp;
+
+	if (!xfs_sb_version_hasftype(&mp->m_sb))
+		return XFS_DIR3_FT_UNKNOWN;
+
+	ftp = xfs_dir3_sfe_ftypep(hdr, sfep);
+	if (*ftp >= XFS_DIR3_FT_MAX)
+		return XFS_DIR3_FT_UNKNOWN;
+	return *ftp;
+}
+
+static inline void
+xfs_dir3_sfe_put_ftype(
+	struct xfs_mount	*mp,
+	struct xfs_dir2_sf_hdr	*hdr,
+	struct xfs_dir2_sf_entry *sfep,
+	__uint8_t		ftype)
+{
+	__uint8_t	*ftp;
+
+	ASSERT(ftype < XFS_DIR3_FT_MAX);
+
+	if (!xfs_sb_version_hasftype(&mp->m_sb))
+		return;
+	ftp = xfs_dir3_sfe_ftypep(hdr, sfep);
+	*ftp = ftype;
+}
 
 /*
  * Data block structures.
@@ -286,12 +360,18 @@
  * Active entry in a data block.
  *
  * Aligned to 8 bytes.  After the variable length name field there is a
- * 2 byte tag field, which can be accessed using xfs_dir2_data_entry_tag_p.
+ * 2 byte tag field, which can be accessed using xfs_dir3_data_entry_tag_p.
+ *
+ * For dir3 structures, there is file type field between the name and the tag.
+ * This can only be manipulated by helper functions. It is packed hard against
+ * the end of the name so any padding for rounding is between the file type and
+ * the tag.
  */
 typedef struct xfs_dir2_data_entry {
 	__be64			inumber;	/* inode number */
 	__u8			namelen;	/* name length */
 	__u8			name[];		/* name bytes, no null */
+     /* __u8			filetype; */	/* type of inode we point to */
      /*	__be16                  tag; */		/* starting offset of us */
 } xfs_dir2_data_entry_t;
 
@@ -311,20 +391,67 @@
 /*
  * Size of a data entry.
  */
-static inline int xfs_dir2_data_entsize(int n)
+static inline int
+__xfs_dir3_data_entsize(
+	bool	ftype,
+	int	n)
 {
-	return (int)roundup(offsetof(struct xfs_dir2_data_entry, name[0]) + n +
-		 (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN);
+	int	size = offsetof(struct xfs_dir2_data_entry, name[0]);
+
+	size += n;
+	size += sizeof(xfs_dir2_data_off_t);
+	if (ftype)
+		size += sizeof(__uint8_t);
+	return roundup(size, XFS_DIR2_DATA_ALIGN);
+}
+static inline int
+xfs_dir3_data_entsize(
+	struct xfs_mount	*mp,
+	int			n)
+{
+	bool ftype = xfs_sb_version_hasftype(&mp->m_sb) ? true : false;
+	return __xfs_dir3_data_entsize(ftype, n);
+}
+
+static inline __uint8_t
+xfs_dir3_dirent_get_ftype(
+	struct xfs_mount	*mp,
+	struct xfs_dir2_data_entry *dep)
+{
+	if (xfs_sb_version_hasftype(&mp->m_sb)) {
+		__uint8_t	type = dep->name[dep->namelen];
+
+		ASSERT(type < XFS_DIR3_FT_MAX);
+		if (type < XFS_DIR3_FT_MAX)
+			return type;
+
+	}
+	return XFS_DIR3_FT_UNKNOWN;
+}
+
+static inline void
+xfs_dir3_dirent_put_ftype(
+	struct xfs_mount	*mp,
+	struct xfs_dir2_data_entry *dep,
+	__uint8_t		type)
+{
+	ASSERT(type < XFS_DIR3_FT_MAX);
+	ASSERT(dep->namelen != 0);
+
+	if (xfs_sb_version_hasftype(&mp->m_sb))
+		dep->name[dep->namelen] = type;
 }
 
 /*
  * Pointer to an entry's tag word.
  */
 static inline __be16 *
-xfs_dir2_data_entry_tag_p(struct xfs_dir2_data_entry *dep)
+xfs_dir3_data_entry_tag_p(
+	struct xfs_mount	*mp,
+	struct xfs_dir2_data_entry *dep)
 {
 	return (__be16 *)((char *)dep +
-		xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
+		xfs_dir3_data_entsize(mp, dep->namelen) - sizeof(__be16));
 }
 
 /*
@@ -375,13 +502,17 @@
  * data block header because the sfe embeds the block offset of the entry into
  * it so that it doesn't change when format conversion occurs. Bad Things Happen
  * if we don't follow this rule.
+ *
+ * XXX: there is scope for significant optimisation of the logic here. Right
+ * now we are checking for "dir3 format" over and over again. Ideally we should
+ * only do it once for each operation.
  */
 #define	XFS_DIR3_DATA_DOT_OFFSET(mp)	\
 	xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&(mp)->m_sb))
 #define	XFS_DIR3_DATA_DOTDOT_OFFSET(mp)	\
-	(XFS_DIR3_DATA_DOT_OFFSET(mp) + xfs_dir2_data_entsize(1))
+	(XFS_DIR3_DATA_DOT_OFFSET(mp) + xfs_dir3_data_entsize(mp, 1))
 #define	XFS_DIR3_DATA_FIRST_OFFSET(mp)		\
-	(XFS_DIR3_DATA_DOTDOT_OFFSET(mp) + xfs_dir2_data_entsize(2))
+	(XFS_DIR3_DATA_DOTDOT_OFFSET(mp) + xfs_dir3_data_entsize(mp, 2))
 
 static inline xfs_dir2_data_aoff_t
 xfs_dir3_data_dot_offset(struct xfs_dir2_data_hdr *hdr)
@@ -392,13 +523,19 @@
 static inline xfs_dir2_data_aoff_t
 xfs_dir3_data_dotdot_offset(struct xfs_dir2_data_hdr *hdr)
 {
-	return xfs_dir3_data_dot_offset(hdr) + xfs_dir2_data_entsize(1);
+	bool dir3 = hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
+		    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
+	return xfs_dir3_data_dot_offset(hdr) +
+		__xfs_dir3_data_entsize(dir3, 1);
 }
 
 static inline xfs_dir2_data_aoff_t
 xfs_dir3_data_first_offset(struct xfs_dir2_data_hdr *hdr)
 {
-	return xfs_dir3_data_dotdot_offset(hdr) + xfs_dir2_data_entsize(2);
+	bool dir3 = hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
+		    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
+	return xfs_dir3_data_dotdot_offset(hdr) +
+		__xfs_dir3_data_entsize(dir3, 2);
 }
 
 /*
@@ -519,6 +656,9 @@
 
 #define XFS_DIR3_LEAF_CRC_OFF  offsetof(struct xfs_dir3_leaf_hdr, info.crc)
 
+extern void xfs_dir3_leaf_hdr_from_disk(struct xfs_dir3_icleaf_hdr *to,
+					struct xfs_dir2_leaf *from);
+
 static inline int
 xfs_dir3_leaf_hdr_size(struct xfs_dir2_leaf *lp)
 {
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 2aed25c..08984ee 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -31,6 +31,7 @@
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
 #include "xfs_dir2_format.h"
+#include "xfs_dir2.h"
 #include "xfs_dir2_priv.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
@@ -695,7 +696,7 @@
 	ents = xfs_dir3_leaf_ents_p(leaf);
 	xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
 	bestsp = xfs_dir2_leaf_bests_p(ltp);
-	length = xfs_dir2_data_entsize(args->namelen);
+	length = xfs_dir3_data_entsize(mp, args->namelen);
 
 	/*
 	 * See if there are any entries with the same hash value
@@ -896,7 +897,8 @@
 	dep->inumber = cpu_to_be64(args->inumber);
 	dep->namelen = args->namelen;
 	memcpy(dep->name, args->name, dep->namelen);
-	tagp = xfs_dir2_data_entry_tag_p(dep);
+	xfs_dir3_dirent_put_ftype(mp, dep, args->filetype);
+	tagp = xfs_dir3_data_entry_tag_p(mp, dep);
 	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
 	/*
 	 * Need to scan fix up the bestfree table.
@@ -1083,396 +1085,6 @@
 	*highstalep = highstale;
 }
 
-struct xfs_dir2_leaf_map_info {
-	xfs_extlen_t	map_blocks;	/* number of fsbs in map */
-	xfs_dablk_t	map_off;	/* last mapped file offset */
-	int		map_size;	/* total entries in *map */
-	int		map_valid;	/* valid entries in *map */
-	int		nmap;		/* mappings to ask xfs_bmapi */
-	xfs_dir2_db_t	curdb;		/* db for current block */
-	int		ra_current;	/* number of read-ahead blks */
-	int		ra_index;	/* *map index for read-ahead */
-	int		ra_offset;	/* map entry offset for ra */
-	int		ra_want;	/* readahead count wanted */
-	struct xfs_bmbt_irec map[];	/* map vector for blocks */
-};
-
-STATIC int
-xfs_dir2_leaf_readbuf(
-	struct xfs_inode	*dp,
-	size_t			bufsize,
-	struct xfs_dir2_leaf_map_info *mip,
-	xfs_dir2_off_t		*curoff,
-	struct xfs_buf		**bpp)
-{
-	struct xfs_mount	*mp = dp->i_mount;
-	struct xfs_buf		*bp = *bpp;
-	struct xfs_bmbt_irec	*map = mip->map;
-	struct blk_plug		plug;
-	int			error = 0;
-	int			length;
-	int			i;
-	int			j;
-
-	/*
-	 * If we have a buffer, we need to release it and
-	 * take it out of the mapping.
-	 */
-
-	if (bp) {
-		xfs_trans_brelse(NULL, bp);
-		bp = NULL;
-		mip->map_blocks -= mp->m_dirblkfsbs;
-		/*
-		 * Loop to get rid of the extents for the
-		 * directory block.
-		 */
-		for (i = mp->m_dirblkfsbs; i > 0; ) {
-			j = min_t(int, map->br_blockcount, i);
-			map->br_blockcount -= j;
-			map->br_startblock += j;
-			map->br_startoff += j;
-			/*
-			 * If mapping is done, pitch it from
-			 * the table.
-			 */
-			if (!map->br_blockcount && --mip->map_valid)
-				memmove(&map[0], &map[1],
-					sizeof(map[0]) * mip->map_valid);
-			i -= j;
-		}
-	}
-
-	/*
-	 * Recalculate the readahead blocks wanted.
-	 */
-	mip->ra_want = howmany(bufsize + mp->m_dirblksize,
-			       mp->m_sb.sb_blocksize) - 1;
-	ASSERT(mip->ra_want >= 0);
-
-	/*
-	 * If we don't have as many as we want, and we haven't
-	 * run out of data blocks, get some more mappings.
-	 */
-	if (1 + mip->ra_want > mip->map_blocks &&
-	    mip->map_off < xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) {
-		/*
-		 * Get more bmaps, fill in after the ones
-		 * we already have in the table.
-		 */
-		mip->nmap = mip->map_size - mip->map_valid;
-		error = xfs_bmapi_read(dp, mip->map_off,
-				xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET) -
-								mip->map_off,
-				&map[mip->map_valid], &mip->nmap, 0);
-
-		/*
-		 * Don't know if we should ignore this or try to return an
-		 * error.  The trouble with returning errors is that readdir
-		 * will just stop without actually passing the error through.
-		 */
-		if (error)
-			goto out;	/* XXX */
-
-		/*
-		 * If we got all the mappings we asked for, set the final map
-		 * offset based on the last bmap value received.  Otherwise,
-		 * we've reached the end.
-		 */
-		if (mip->nmap == mip->map_size - mip->map_valid) {
-			i = mip->map_valid + mip->nmap - 1;
-			mip->map_off = map[i].br_startoff + map[i].br_blockcount;
-		} else
-			mip->map_off = xfs_dir2_byte_to_da(mp,
-							XFS_DIR2_LEAF_OFFSET);
-
-		/*
-		 * Look for holes in the mapping, and eliminate them.  Count up
-		 * the valid blocks.
-		 */
-		for (i = mip->map_valid; i < mip->map_valid + mip->nmap; ) {
-			if (map[i].br_startblock == HOLESTARTBLOCK) {
-				mip->nmap--;
-				length = mip->map_valid + mip->nmap - i;
-				if (length)
-					memmove(&map[i], &map[i + 1],
-						sizeof(map[i]) * length);
-			} else {
-				mip->map_blocks += map[i].br_blockcount;
-				i++;
-			}
-		}
-		mip->map_valid += mip->nmap;
-	}
-
-	/*
-	 * No valid mappings, so no more data blocks.
-	 */
-	if (!mip->map_valid) {
-		*curoff = xfs_dir2_da_to_byte(mp, mip->map_off);
-		goto out;
-	}
-
-	/*
-	 * Read the directory block starting at the first mapping.
-	 */
-	mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
-	error = xfs_dir3_data_read(NULL, dp, map->br_startoff,
-			map->br_blockcount >= mp->m_dirblkfsbs ?
-			    XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1, &bp);
-
-	/*
-	 * Should just skip over the data block instead of giving up.
-	 */
-	if (error)
-		goto out;	/* XXX */
-
-	/*
-	 * Adjust the current amount of read-ahead: we just read a block that
-	 * was previously ra.
-	 */
-	if (mip->ra_current)
-		mip->ra_current -= mp->m_dirblkfsbs;
-
-	/*
-	 * Do we need more readahead?
-	 */
-	blk_start_plug(&plug);
-	for (mip->ra_index = mip->ra_offset = i = 0;
-	     mip->ra_want > mip->ra_current && i < mip->map_blocks;
-	     i += mp->m_dirblkfsbs) {
-		ASSERT(mip->ra_index < mip->map_valid);
-		/*
-		 * Read-ahead a contiguous directory block.
-		 */
-		if (i > mip->ra_current &&
-		    map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) {
-			xfs_dir3_data_readahead(NULL, dp,
-				map[mip->ra_index].br_startoff + mip->ra_offset,
-				XFS_FSB_TO_DADDR(mp,
-					map[mip->ra_index].br_startblock +
-							mip->ra_offset));
-			mip->ra_current = i;
-		}
-
-		/*
-		 * Read-ahead a non-contiguous directory block.  This doesn't
-		 * use our mapping, but this is a very rare case.
-		 */
-		else if (i > mip->ra_current) {
-			xfs_dir3_data_readahead(NULL, dp,
-					map[mip->ra_index].br_startoff +
-							mip->ra_offset, -1);
-			mip->ra_current = i;
-		}
-
-		/*
-		 * Advance offset through the mapping table.
-		 */
-		for (j = 0; j < mp->m_dirblkfsbs; j++) {
-			/*
-			 * The rest of this extent but not more than a dir
-			 * block.
-			 */
-			length = min_t(int, mp->m_dirblkfsbs,
-					map[mip->ra_index].br_blockcount -
-							mip->ra_offset);
-			j += length;
-			mip->ra_offset += length;
-
-			/*
-			 * Advance to the next mapping if this one is used up.
-			 */
-			if (mip->ra_offset == map[mip->ra_index].br_blockcount) {
-				mip->ra_offset = 0;
-				mip->ra_index++;
-			}
-		}
-	}
-	blk_finish_plug(&plug);
-
-out:
-	*bpp = bp;
-	return error;
-}
-
-/*
- * Getdents (readdir) for leaf and node directories.
- * This reads the data blocks only, so is the same for both forms.
- */
-int						/* error */
-xfs_dir2_leaf_getdents(
-	xfs_inode_t		*dp,		/* incore directory inode */
-	struct dir_context	*ctx,
-	size_t			bufsize)
-{
-	struct xfs_buf		*bp = NULL;	/* data block buffer */
-	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
-	xfs_dir2_data_entry_t	*dep;		/* data entry */
-	xfs_dir2_data_unused_t	*dup;		/* unused entry */
-	int			error = 0;	/* error return value */
-	int			length;		/* temporary length value */
-	xfs_mount_t		*mp;		/* filesystem mount point */
-	int			byteoff;	/* offset in current block */
-	xfs_dir2_off_t		curoff;		/* current overall offset */
-	xfs_dir2_off_t		newoff;		/* new curoff after new blk */
-	char			*ptr = NULL;	/* pointer to current data */
-	struct xfs_dir2_leaf_map_info *map_info;
-
-	/*
-	 * If the offset is at or past the largest allowed value,
-	 * give up right away.
-	 */
-	if (ctx->pos >= XFS_DIR2_MAX_DATAPTR)
-		return 0;
-
-	mp = dp->i_mount;
-
-	/*
-	 * Set up to bmap a number of blocks based on the caller's
-	 * buffer size, the directory block size, and the filesystem
-	 * block size.
-	 */
-	length = howmany(bufsize + mp->m_dirblksize,
-				     mp->m_sb.sb_blocksize);
-	map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) +
-				(length * sizeof(struct xfs_bmbt_irec)),
-			       KM_SLEEP | KM_NOFS);
-	map_info->map_size = length;
-
-	/*
-	 * Inside the loop we keep the main offset value as a byte offset
-	 * in the directory file.
-	 */
-	curoff = xfs_dir2_dataptr_to_byte(mp, ctx->pos);
-
-	/*
-	 * Force this conversion through db so we truncate the offset
-	 * down to get the start of the data block.
-	 */
-	map_info->map_off = xfs_dir2_db_to_da(mp,
-					      xfs_dir2_byte_to_db(mp, curoff));
-
-	/*
-	 * Loop over directory entries until we reach the end offset.
-	 * Get more blocks and readahead as necessary.
-	 */
-	while (curoff < XFS_DIR2_LEAF_OFFSET) {
-		/*
-		 * If we have no buffer, or we're off the end of the
-		 * current buffer, need to get another one.
-		 */
-		if (!bp || ptr >= (char *)bp->b_addr + mp->m_dirblksize) {
-
-			error = xfs_dir2_leaf_readbuf(dp, bufsize, map_info,
-						      &curoff, &bp);
-			if (error || !map_info->map_valid)
-				break;
-
-			/*
-			 * Having done a read, we need to set a new offset.
-			 */
-			newoff = xfs_dir2_db_off_to_byte(mp, map_info->curdb, 0);
-			/*
-			 * Start of the current block.
-			 */
-			if (curoff < newoff)
-				curoff = newoff;
-			/*
-			 * Make sure we're in the right block.
-			 */
-			else if (curoff > newoff)
-				ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
-				       map_info->curdb);
-			hdr = bp->b_addr;
-			xfs_dir3_data_check(dp, bp);
-			/*
-			 * Find our position in the block.
-			 */
-			ptr = (char *)xfs_dir3_data_entry_p(hdr);
-			byteoff = xfs_dir2_byte_to_off(mp, curoff);
-			/*
-			 * Skip past the header.
-			 */
-			if (byteoff == 0)
-				curoff += xfs_dir3_data_entry_offset(hdr);
-			/*
-			 * Skip past entries until we reach our offset.
-			 */
-			else {
-				while ((char *)ptr - (char *)hdr < byteoff) {
-					dup = (xfs_dir2_data_unused_t *)ptr;
-
-					if (be16_to_cpu(dup->freetag)
-						  == XFS_DIR2_DATA_FREE_TAG) {
-
-						length = be16_to_cpu(dup->length);
-						ptr += length;
-						continue;
-					}
-					dep = (xfs_dir2_data_entry_t *)ptr;
-					length =
-					   xfs_dir2_data_entsize(dep->namelen);
-					ptr += length;
-				}
-				/*
-				 * Now set our real offset.
-				 */
-				curoff =
-					xfs_dir2_db_off_to_byte(mp,
-					    xfs_dir2_byte_to_db(mp, curoff),
-					    (char *)ptr - (char *)hdr);
-				if (ptr >= (char *)hdr + mp->m_dirblksize) {
-					continue;
-				}
-			}
-		}
-		/*
-		 * We have a pointer to an entry.
-		 * Is it a live one?
-		 */
-		dup = (xfs_dir2_data_unused_t *)ptr;
-		/*
-		 * No, it's unused, skip over it.
-		 */
-		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
-			length = be16_to_cpu(dup->length);
-			ptr += length;
-			curoff += length;
-			continue;
-		}
-
-		dep = (xfs_dir2_data_entry_t *)ptr;
-		length = xfs_dir2_data_entsize(dep->namelen);
-
-		ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
-		if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
-			    be64_to_cpu(dep->inumber), DT_UNKNOWN))
-			break;
-
-		/*
-		 * Advance to next entry in the block.
-		 */
-		ptr += length;
-		curoff += length;
-		/* bufsize may have just been a guess; don't go negative */
-		bufsize = bufsize > length ? bufsize - length : 0;
-	}
-
-	/*
-	 * All done.  Set output offset value to current offset.
-	 */
-	if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR))
-		ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff;
-	else
-		ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
-	kmem_free(map_info);
-	if (bp)
-		xfs_trans_brelse(NULL, bp);
-	return error;
-}
-
-
 /*
  * Log the bests entries indicated from a leaf1 block.
  */
@@ -1614,6 +1226,7 @@
 	 * Return the found inode number & CI name if appropriate
 	 */
 	args->inumber = be64_to_cpu(dep->inumber);
+	args->filetype = xfs_dir3_dirent_get_ftype(dp->i_mount, dep);
 	error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
 	xfs_trans_brelse(tp, dbp);
 	xfs_trans_brelse(tp, lbp);
@@ -1816,7 +1429,7 @@
 	 */
 	xfs_dir2_data_make_free(tp, dbp,
 		(xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
-		xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
+		xfs_dir3_data_entsize(mp, dep->namelen), &needlog, &needscan);
 	/*
 	 * We just mark the leaf entry stale by putting a null in it.
 	 */
@@ -1944,6 +1557,7 @@
 	 * Put the new inode number in, log it.
 	 */
 	dep->inumber = cpu_to_be64(args->inumber);
+	xfs_dir3_dirent_put_ftype(dp->i_mount, dep, args->filetype);
 	tp = args->trans;
 	xfs_dir2_data_log_entry(tp, dbp, dep);
 	xfs_dir3_leaf_check(dp->i_mount, lbp);
@@ -1975,10 +1589,6 @@
 	ents = xfs_dir3_leaf_ents_p(leaf);
 	xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
 
-#ifndef __KERNEL__
-	if (!leafhdr.count)
-		return 0;
-#endif
 	/*
 	 * Note, the table cannot be empty, so we have to go through the loop.
 	 * Binary search the leaf entries looking for our hash value.
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 2226a00..4c3dba7 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -30,6 +30,7 @@
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
 #include "xfs_dir2_format.h"
+#include "xfs_dir2.h"
 #include "xfs_dir2_priv.h"
 #include "xfs_error.h"
 #include "xfs_trace.h"
@@ -312,11 +313,13 @@
 	struct xfs_trans	*tp,
 	struct xfs_buf		*bp)
 {
+#ifdef DEBUG
 	xfs_dir2_free_t		*free;		/* freespace structure */
 
 	free = bp->b_addr;
 	ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
 	       free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
+#endif
 	xfs_trans_log_buf(tp, bp, 0, xfs_dir3_free_hdr_size(tp->t_mountp) - 1);
 }
 
@@ -602,7 +605,7 @@
 		ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
 		       free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
 	}
-	length = xfs_dir2_data_entsize(args->namelen);
+	length = xfs_dir3_data_entsize(mp, args->namelen);
 	/*
 	 * Loop over leaf entries with the right hash value.
 	 */
@@ -813,6 +816,7 @@
 				xfs_trans_brelse(tp, state->extrablk.bp);
 			args->cmpresult = cmp;
 			args->inumber = be64_to_cpu(dep->inumber);
+			args->filetype = xfs_dir3_dirent_get_ftype(mp, dep);
 			*indexp = index;
 			state->extravalid = 1;
 			state->extrablk.bp = curbp;
@@ -1256,7 +1260,7 @@
 	longest = be16_to_cpu(bf[0].length);
 	needlog = needscan = 0;
 	xfs_dir2_data_make_free(tp, dbp, off,
-		xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
+		xfs_dir3_data_entsize(mp, dep->namelen), &needlog, &needscan);
 	/*
 	 * Rescan the data block freespaces for bestfree.
 	 * Log the data block header if needed.
@@ -1708,7 +1712,7 @@
 	dp = args->dp;
 	mp = dp->i_mount;
 	tp = args->trans;
-	length = xfs_dir2_data_entsize(args->namelen);
+	length = xfs_dir3_data_entsize(mp, args->namelen);
 	/*
 	 * If we came in with a freespace block that means that lookup
 	 * found an entry with our hash value.  This is the freespace
@@ -2004,7 +2008,8 @@
 	dep->inumber = cpu_to_be64(args->inumber);
 	dep->namelen = args->namelen;
 	memcpy(dep->name, args->name, dep->namelen);
-	tagp = xfs_dir2_data_entry_tag_p(dep);
+	xfs_dir3_dirent_put_ftype(mp, dep, args->filetype);
+	tagp = xfs_dir3_data_entry_tag_p(mp, dep);
 	*tagp = cpu_to_be16((char *)dep - (char *)hdr);
 	xfs_dir2_data_log_entry(tp, dbp, dep);
 	/*
@@ -2224,6 +2229,7 @@
 		 * Fill in the new inode number and log the entry.
 		 */
 		dep->inumber = cpu_to_be64(inum);
+		xfs_dir3_dirent_put_ftype(state->mp, dep, args->filetype);
 		xfs_dir2_data_log_entry(args->trans, state->extrablk.bp, dep);
 		rval = 0;
 	}
diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h
index 0511cda..1bad84c 100644
--- a/fs/xfs/xfs_dir2_priv.h
+++ b/fs/xfs/xfs_dir2_priv.h
@@ -18,23 +18,26 @@
 #ifndef __XFS_DIR2_PRIV_H__
 #define __XFS_DIR2_PRIV_H__
 
+struct dir_context;
+
 /* xfs_dir2.c */
 extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
-extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
-extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
 extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
 				xfs_dir2_db_t *dbp);
-extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
-				struct xfs_buf *bp);
 extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
 				const unsigned char *name, int len);
 
-/* xfs_dir2_block.c */
-extern const struct xfs_buf_ops xfs_dir3_block_buf_ops;
+#define S_SHIFT 12
+extern const unsigned char xfs_mode_to_ftype[];
 
+extern unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp,
+					__uint8_t filetype);
+
+
+/* xfs_dir2_block.c */
+extern int xfs_dir3_block_read(struct xfs_trans *tp, struct xfs_inode *dp,
+			       struct xfs_buf **bpp);
 extern int xfs_dir2_block_addname(struct xfs_da_args *args);
-extern int xfs_dir2_block_getdents(struct xfs_inode *dp,
-		struct dir_context *ctx);
 extern int xfs_dir2_block_lookup(struct xfs_da_args *args);
 extern int xfs_dir2_block_removename(struct xfs_da_args *args);
 extern int xfs_dir2_block_replace(struct xfs_da_args *args);
@@ -48,9 +51,6 @@
 #define	xfs_dir3_data_check(dp,bp)
 #endif
 
-extern const struct xfs_buf_ops xfs_dir3_data_buf_ops;
-extern const struct xfs_buf_ops xfs_dir3_free_buf_ops;
-
 extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
 extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp,
 		xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp);
@@ -60,27 +60,10 @@
 extern struct xfs_dir2_data_free *
 xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr,
 		struct xfs_dir2_data_unused *dup, int *loghead);
-extern void xfs_dir2_data_freescan(struct xfs_mount *mp,
-		struct xfs_dir2_data_hdr *hdr, int *loghead);
 extern int xfs_dir3_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
 		struct xfs_buf **bpp);
-extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_buf *bp,
-		struct xfs_dir2_data_entry *dep);
-extern void xfs_dir2_data_log_header(struct xfs_trans *tp,
-		struct xfs_buf *bp);
-extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_buf *bp,
-		struct xfs_dir2_data_unused *dup);
-extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_buf *bp,
-		xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len,
-		int *needlogp, int *needscanp);
-extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_buf *bp,
-		struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset,
-		xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
 
 /* xfs_dir2_leaf.c */
-extern const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops;
-extern const struct xfs_buf_ops xfs_dir3_leafn_buf_ops;
-
 extern int xfs_dir3_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp,
 		xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp);
 extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
@@ -91,8 +74,6 @@
 extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr,
 		struct xfs_dir2_leaf_entry *ents, int *indexp,
 		int *lowstalep, int *highstalep, int *lowlogp, int *highlogp);
-extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, struct dir_context *ctx,
-		size_t bufsize);
 extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno,
 		struct xfs_buf **bpp, __uint16_t magic);
 extern void xfs_dir3_leaf_log_ents(struct xfs_trans *tp, struct xfs_buf *bp,
@@ -144,18 +125,18 @@
 		xfs_dablk_t fbno, struct xfs_buf **bpp);
 
 /* xfs_dir2_sf.c */
-extern xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *sfp);
-extern xfs_ino_t xfs_dir2_sfe_get_ino(struct xfs_dir2_sf_hdr *sfp,
-		struct xfs_dir2_sf_entry *sfep);
 extern int xfs_dir2_block_sfsize(struct xfs_inode *dp,
 		struct xfs_dir2_data_hdr *block, struct xfs_dir2_sf_hdr *sfhp);
 extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_buf *bp,
 		int size, xfs_dir2_sf_hdr_t *sfhp);
 extern int xfs_dir2_sf_addname(struct xfs_da_args *args);
 extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
-extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, struct dir_context *ctx);
 extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
 extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
 extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
 
+/* xfs_dir2_readdir.c */
+extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
+		       size_t bufsize);
+
 #endif /* __XFS_DIR2_PRIV_H__ */
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
new file mode 100644
index 0000000..8993ec1
--- /dev/null
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -0,0 +1,695 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * Copyright (c) 2013 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_types.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_bmap.h"
+
+/*
+ * Directory file type support functions
+ */
+static unsigned char xfs_dir3_filetype_table[] = {
+	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK,
+	DT_FIFO, DT_SOCK, DT_LNK, DT_WHT,
+};
+
+unsigned char
+xfs_dir3_get_dtype(
+	struct xfs_mount	*mp,
+	__uint8_t		filetype)
+{
+	if (!xfs_sb_version_hasftype(&mp->m_sb))
+		return DT_UNKNOWN;
+
+	if (filetype >= XFS_DIR3_FT_MAX)
+		return DT_UNKNOWN;
+
+	return xfs_dir3_filetype_table[filetype];
+}
+/*
+ * @mode, if set, indicates that the type field needs to be set up.
+ * This uses the transformation from file mode to DT_* as defined in linux/fs.h
+ * for file type specification. This will be propagated into the directory
+ * structure if appropriate for the given operation and filesystem config.
+ */
+const unsigned char xfs_mode_to_ftype[S_IFMT >> S_SHIFT] = {
+	[0]			= XFS_DIR3_FT_UNKNOWN,
+	[S_IFREG >> S_SHIFT]    = XFS_DIR3_FT_REG_FILE,
+	[S_IFDIR >> S_SHIFT]    = XFS_DIR3_FT_DIR,
+	[S_IFCHR >> S_SHIFT]    = XFS_DIR3_FT_CHRDEV,
+	[S_IFBLK >> S_SHIFT]    = XFS_DIR3_FT_BLKDEV,
+	[S_IFIFO >> S_SHIFT]    = XFS_DIR3_FT_FIFO,
+	[S_IFSOCK >> S_SHIFT]   = XFS_DIR3_FT_SOCK,
+	[S_IFLNK >> S_SHIFT]    = XFS_DIR3_FT_SYMLINK,
+};
+
+STATIC int
+xfs_dir2_sf_getdents(
+	xfs_inode_t		*dp,		/* incore directory inode */
+	struct dir_context	*ctx)
+{
+	int			i;		/* shortform entry number */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	xfs_dir2_dataptr_t	off;		/* current entry's offset */
+	xfs_dir2_sf_entry_t	*sfep;		/* shortform directory entry */
+	xfs_dir2_sf_hdr_t	*sfp;		/* shortform structure */
+	xfs_dir2_dataptr_t	dot_offset;
+	xfs_dir2_dataptr_t	dotdot_offset;
+	xfs_ino_t		ino;
+
+	mp = dp->i_mount;
+
+	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
+	/*
+	 * Give up if the directory is way too short.
+	 */
+	if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
+		ASSERT(XFS_FORCED_SHUTDOWN(mp));
+		return XFS_ERROR(EIO);
+	}
+
+	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
+	ASSERT(dp->i_df.if_u1.if_data != NULL);
+
+	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+
+	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
+
+	/*
+	 * If the block number in the offset is out of range, we're done.
+	 */
+	if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk)
+		return 0;
+
+	/*
+	 * Precalculate offsets for . and .. as we will always need them.
+	 *
+	 * XXX(hch): the second argument is sometimes 0 and sometimes
+	 * mp->m_dirdatablk.
+	 */
+	dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
+					     XFS_DIR3_DATA_DOT_OFFSET(mp));
+	dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
+						XFS_DIR3_DATA_DOTDOT_OFFSET(mp));
+
+	/*
+	 * Put . entry unless we're starting past it.
+	 */
+	if (ctx->pos <= dot_offset) {
+		ctx->pos = dot_offset & 0x7fffffff;
+		if (!dir_emit(ctx, ".", 1, dp->i_ino, DT_DIR))
+			return 0;
+	}
+
+	/*
+	 * Put .. entry unless we're starting past it.
+	 */
+	if (ctx->pos <= dotdot_offset) {
+		ino = xfs_dir2_sf_get_parent_ino(sfp);
+		ctx->pos = dotdot_offset & 0x7fffffff;
+		if (!dir_emit(ctx, "..", 2, ino, DT_DIR))
+			return 0;
+	}
+
+	/*
+	 * Loop while there are more entries and put'ing works.
+	 */
+	sfep = xfs_dir2_sf_firstentry(sfp);
+	for (i = 0; i < sfp->count; i++) {
+		__uint8_t filetype;
+
+		off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
+				xfs_dir2_sf_get_offset(sfep));
+
+		if (ctx->pos > off) {
+			sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep);
+			continue;
+		}
+
+		ino = xfs_dir3_sfe_get_ino(mp, sfp, sfep);
+		filetype = xfs_dir3_sfe_get_ftype(mp, sfp, sfep);
+		ctx->pos = off & 0x7fffffff;
+		if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen, ino,
+			    xfs_dir3_get_dtype(mp, filetype)))
+			return 0;
+		sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep);
+	}
+
+	ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) &
+			0x7fffffff;
+	return 0;
+}
+
+/*
+ * Readdir for block directories.
+ */
+STATIC int
+xfs_dir2_block_getdents(
+	xfs_inode_t		*dp,		/* incore inode */
+	struct dir_context	*ctx)
+{
+	xfs_dir2_data_hdr_t	*hdr;		/* block header */
+	struct xfs_buf		*bp;		/* buffer for block */
+	xfs_dir2_block_tail_t	*btp;		/* block tail */
+	xfs_dir2_data_entry_t	*dep;		/* block data entry */
+	xfs_dir2_data_unused_t	*dup;		/* block unused entry */
+	char			*endptr;	/* end of the data entries */
+	int			error;		/* error return value */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	char			*ptr;		/* current data entry */
+	int			wantoff;	/* starting block offset */
+	xfs_off_t		cook;
+
+	mp = dp->i_mount;
+	/*
+	 * If the block number in the offset is out of range, we're done.
+	 */
+	if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk)
+		return 0;
+
+	error = xfs_dir3_block_read(NULL, dp, &bp);
+	if (error)
+		return error;
+
+	/*
+	 * Extract the byte offset we start at from the seek pointer.
+	 * We'll skip entries before this.
+	 */
+	wantoff = xfs_dir2_dataptr_to_off(mp, ctx->pos);
+	hdr = bp->b_addr;
+	xfs_dir3_data_check(dp, bp);
+	/*
+	 * Set up values for the loop.
+	 */
+	btp = xfs_dir2_block_tail_p(mp, hdr);
+	ptr = (char *)xfs_dir3_data_entry_p(hdr);
+	endptr = (char *)xfs_dir2_block_leaf_p(btp);
+
+	/*
+	 * Loop over the data portion of the block.
+	 * Each object is a real entry (dep) or an unused one (dup).
+	 */
+	while (ptr < endptr) {
+		__uint8_t filetype;
+
+		dup = (xfs_dir2_data_unused_t *)ptr;
+		/*
+		 * Unused, skip it.
+		 */
+		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
+			ptr += be16_to_cpu(dup->length);
+			continue;
+		}
+
+		dep = (xfs_dir2_data_entry_t *)ptr;
+
+		/*
+		 * Bump pointer for the next iteration.
+		 */
+		ptr += xfs_dir3_data_entsize(mp, dep->namelen);
+		/*
+		 * The entry is before the desired starting point, skip it.
+		 */
+		if ((char *)dep - (char *)hdr < wantoff)
+			continue;
+
+		cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
+					    (char *)dep - (char *)hdr);
+
+		ctx->pos = cook & 0x7fffffff;
+		filetype = xfs_dir3_dirent_get_ftype(mp, dep);
+		/*
+		 * If it didn't fit, set the final offset to here & return.
+		 */
+		if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
+			    be64_to_cpu(dep->inumber),
+			    xfs_dir3_get_dtype(mp, filetype))) {
+			xfs_trans_brelse(NULL, bp);
+			return 0;
+		}
+	}
+
+	/*
+	 * Reached the end of the block.
+	 * Set the offset to a non-existent block 1 and return.
+	 */
+	ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) &
+			0x7fffffff;
+	xfs_trans_brelse(NULL, bp);
+	return 0;
+}
+
+struct xfs_dir2_leaf_map_info {
+	xfs_extlen_t	map_blocks;	/* number of fsbs in map */
+	xfs_dablk_t	map_off;	/* last mapped file offset */
+	int		map_size;	/* total entries in *map */
+	int		map_valid;	/* valid entries in *map */
+	int		nmap;		/* mappings to ask xfs_bmapi */
+	xfs_dir2_db_t	curdb;		/* db for current block */
+	int		ra_current;	/* number of read-ahead blks */
+	int		ra_index;	/* *map index for read-ahead */
+	int		ra_offset;	/* map entry offset for ra */
+	int		ra_want;	/* readahead count wanted */
+	struct xfs_bmbt_irec map[];	/* map vector for blocks */
+};
+
+STATIC int
+xfs_dir2_leaf_readbuf(
+	struct xfs_inode	*dp,
+	size_t			bufsize,
+	struct xfs_dir2_leaf_map_info *mip,
+	xfs_dir2_off_t		*curoff,
+	struct xfs_buf		**bpp)
+{
+	struct xfs_mount	*mp = dp->i_mount;
+	struct xfs_buf		*bp = *bpp;
+	struct xfs_bmbt_irec	*map = mip->map;
+	struct blk_plug		plug;
+	int			error = 0;
+	int			length;
+	int			i;
+	int			j;
+
+	/*
+	 * If we have a buffer, we need to release it and
+	 * take it out of the mapping.
+	 */
+
+	if (bp) {
+		xfs_trans_brelse(NULL, bp);
+		bp = NULL;
+		mip->map_blocks -= mp->m_dirblkfsbs;
+		/*
+		 * Loop to get rid of the extents for the
+		 * directory block.
+		 */
+		for (i = mp->m_dirblkfsbs; i > 0; ) {
+			j = min_t(int, map->br_blockcount, i);
+			map->br_blockcount -= j;
+			map->br_startblock += j;
+			map->br_startoff += j;
+			/*
+			 * If mapping is done, pitch it from
+			 * the table.
+			 */
+			if (!map->br_blockcount && --mip->map_valid)
+				memmove(&map[0], &map[1],
+					sizeof(map[0]) * mip->map_valid);
+			i -= j;
+		}
+	}
+
+	/*
+	 * Recalculate the readahead blocks wanted.
+	 */
+	mip->ra_want = howmany(bufsize + mp->m_dirblksize,
+			       mp->m_sb.sb_blocksize) - 1;
+	ASSERT(mip->ra_want >= 0);
+
+	/*
+	 * If we don't have as many as we want, and we haven't
+	 * run out of data blocks, get some more mappings.
+	 */
+	if (1 + mip->ra_want > mip->map_blocks &&
+	    mip->map_off < xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) {
+		/*
+		 * Get more bmaps, fill in after the ones
+		 * we already have in the table.
+		 */
+		mip->nmap = mip->map_size - mip->map_valid;
+		error = xfs_bmapi_read(dp, mip->map_off,
+				xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET) -
+								mip->map_off,
+				&map[mip->map_valid], &mip->nmap, 0);
+
+		/*
+		 * Don't know if we should ignore this or try to return an
+		 * error.  The trouble with returning errors is that readdir
+		 * will just stop without actually passing the error through.
+		 */
+		if (error)
+			goto out;	/* XXX */
+
+		/*
+		 * If we got all the mappings we asked for, set the final map
+		 * offset based on the last bmap value received.  Otherwise,
+		 * we've reached the end.
+		 */
+		if (mip->nmap == mip->map_size - mip->map_valid) {
+			i = mip->map_valid + mip->nmap - 1;
+			mip->map_off = map[i].br_startoff + map[i].br_blockcount;
+		} else
+			mip->map_off = xfs_dir2_byte_to_da(mp,
+							XFS_DIR2_LEAF_OFFSET);
+
+		/*
+		 * Look for holes in the mapping, and eliminate them.  Count up
+		 * the valid blocks.
+		 */
+		for (i = mip->map_valid; i < mip->map_valid + mip->nmap; ) {
+			if (map[i].br_startblock == HOLESTARTBLOCK) {
+				mip->nmap--;
+				length = mip->map_valid + mip->nmap - i;
+				if (length)
+					memmove(&map[i], &map[i + 1],
+						sizeof(map[i]) * length);
+			} else {
+				mip->map_blocks += map[i].br_blockcount;
+				i++;
+			}
+		}
+		mip->map_valid += mip->nmap;
+	}
+
+	/*
+	 * No valid mappings, so no more data blocks.
+	 */
+	if (!mip->map_valid) {
+		*curoff = xfs_dir2_da_to_byte(mp, mip->map_off);
+		goto out;
+	}
+
+	/*
+	 * Read the directory block starting at the first mapping.
+	 */
+	mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
+	error = xfs_dir3_data_read(NULL, dp, map->br_startoff,
+			map->br_blockcount >= mp->m_dirblkfsbs ?
+			    XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1, &bp);
+
+	/*
+	 * Should just skip over the data block instead of giving up.
+	 */
+	if (error)
+		goto out;	/* XXX */
+
+	/*
+	 * Adjust the current amount of read-ahead: we just read a block that
+	 * was previously ra.
+	 */
+	if (mip->ra_current)
+		mip->ra_current -= mp->m_dirblkfsbs;
+
+	/*
+	 * Do we need more readahead?
+	 */
+	blk_start_plug(&plug);
+	for (mip->ra_index = mip->ra_offset = i = 0;
+	     mip->ra_want > mip->ra_current && i < mip->map_blocks;
+	     i += mp->m_dirblkfsbs) {
+		ASSERT(mip->ra_index < mip->map_valid);
+		/*
+		 * Read-ahead a contiguous directory block.
+		 */
+		if (i > mip->ra_current &&
+		    map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) {
+			xfs_dir3_data_readahead(NULL, dp,
+				map[mip->ra_index].br_startoff + mip->ra_offset,
+				XFS_FSB_TO_DADDR(mp,
+					map[mip->ra_index].br_startblock +
+							mip->ra_offset));
+			mip->ra_current = i;
+		}
+
+		/*
+		 * Read-ahead a non-contiguous directory block.  This doesn't
+		 * use our mapping, but this is a very rare case.
+		 */
+		else if (i > mip->ra_current) {
+			xfs_dir3_data_readahead(NULL, dp,
+					map[mip->ra_index].br_startoff +
+							mip->ra_offset, -1);
+			mip->ra_current = i;
+		}
+
+		/*
+		 * Advance offset through the mapping table.
+		 */
+		for (j = 0; j < mp->m_dirblkfsbs; j++) {
+			/*
+			 * The rest of this extent but not more than a dir
+			 * block.
+			 */
+			length = min_t(int, mp->m_dirblkfsbs,
+					map[mip->ra_index].br_blockcount -
+							mip->ra_offset);
+			j += length;
+			mip->ra_offset += length;
+
+			/*
+			 * Advance to the next mapping if this one is used up.
+			 */
+			if (mip->ra_offset == map[mip->ra_index].br_blockcount) {
+				mip->ra_offset = 0;
+				mip->ra_index++;
+			}
+		}
+	}
+	blk_finish_plug(&plug);
+
+out:
+	*bpp = bp;
+	return error;
+}
+
+/*
+ * Getdents (readdir) for leaf and node directories.
+ * This reads the data blocks only, so is the same for both forms.
+ */
+STATIC int
+xfs_dir2_leaf_getdents(
+	xfs_inode_t		*dp,		/* incore directory inode */
+	struct dir_context	*ctx,
+	size_t			bufsize)
+{
+	struct xfs_buf		*bp = NULL;	/* data block buffer */
+	xfs_dir2_data_hdr_t	*hdr;		/* data block header */
+	xfs_dir2_data_entry_t	*dep;		/* data entry */
+	xfs_dir2_data_unused_t	*dup;		/* unused entry */
+	int			error = 0;	/* error return value */
+	int			length;		/* temporary length value */
+	xfs_mount_t		*mp;		/* filesystem mount point */
+	int			byteoff;	/* offset in current block */
+	xfs_dir2_off_t		curoff;		/* current overall offset */
+	xfs_dir2_off_t		newoff;		/* new curoff after new blk */
+	char			*ptr = NULL;	/* pointer to current data */
+	struct xfs_dir2_leaf_map_info *map_info;
+
+	/*
+	 * If the offset is at or past the largest allowed value,
+	 * give up right away.
+	 */
+	if (ctx->pos >= XFS_DIR2_MAX_DATAPTR)
+		return 0;
+
+	mp = dp->i_mount;
+
+	/*
+	 * Set up to bmap a number of blocks based on the caller's
+	 * buffer size, the directory block size, and the filesystem
+	 * block size.
+	 */
+	length = howmany(bufsize + mp->m_dirblksize,
+				     mp->m_sb.sb_blocksize);
+	map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) +
+				(length * sizeof(struct xfs_bmbt_irec)),
+			       KM_SLEEP | KM_NOFS);
+	map_info->map_size = length;
+
+	/*
+	 * Inside the loop we keep the main offset value as a byte offset
+	 * in the directory file.
+	 */
+	curoff = xfs_dir2_dataptr_to_byte(mp, ctx->pos);
+
+	/*
+	 * Force this conversion through db so we truncate the offset
+	 * down to get the start of the data block.
+	 */
+	map_info->map_off = xfs_dir2_db_to_da(mp,
+					      xfs_dir2_byte_to_db(mp, curoff));
+
+	/*
+	 * Loop over directory entries until we reach the end offset.
+	 * Get more blocks and readahead as necessary.
+	 */
+	while (curoff < XFS_DIR2_LEAF_OFFSET) {
+		__uint8_t filetype;
+
+		/*
+		 * If we have no buffer, or we're off the end of the
+		 * current buffer, need to get another one.
+		 */
+		if (!bp || ptr >= (char *)bp->b_addr + mp->m_dirblksize) {
+
+			error = xfs_dir2_leaf_readbuf(dp, bufsize, map_info,
+						      &curoff, &bp);
+			if (error || !map_info->map_valid)
+				break;
+
+			/*
+			 * Having done a read, we need to set a new offset.
+			 */
+			newoff = xfs_dir2_db_off_to_byte(mp, map_info->curdb, 0);
+			/*
+			 * Start of the current block.
+			 */
+			if (curoff < newoff)
+				curoff = newoff;
+			/*
+			 * Make sure we're in the right block.
+			 */
+			else if (curoff > newoff)
+				ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
+				       map_info->curdb);
+			hdr = bp->b_addr;
+			xfs_dir3_data_check(dp, bp);
+			/*
+			 * Find our position in the block.
+			 */
+			ptr = (char *)xfs_dir3_data_entry_p(hdr);
+			byteoff = xfs_dir2_byte_to_off(mp, curoff);
+			/*
+			 * Skip past the header.
+			 */
+			if (byteoff == 0)
+				curoff += xfs_dir3_data_entry_offset(hdr);
+			/*
+			 * Skip past entries until we reach our offset.
+			 */
+			else {
+				while ((char *)ptr - (char *)hdr < byteoff) {
+					dup = (xfs_dir2_data_unused_t *)ptr;
+
+					if (be16_to_cpu(dup->freetag)
+						  == XFS_DIR2_DATA_FREE_TAG) {
+
+						length = be16_to_cpu(dup->length);
+						ptr += length;
+						continue;
+					}
+					dep = (xfs_dir2_data_entry_t *)ptr;
+					length =
+					   xfs_dir3_data_entsize(mp, dep->namelen);
+					ptr += length;
+				}
+				/*
+				 * Now set our real offset.
+				 */
+				curoff =
+					xfs_dir2_db_off_to_byte(mp,
+					    xfs_dir2_byte_to_db(mp, curoff),
+					    (char *)ptr - (char *)hdr);
+				if (ptr >= (char *)hdr + mp->m_dirblksize) {
+					continue;
+				}
+			}
+		}
+		/*
+		 * We have a pointer to an entry.
+		 * Is it a live one?
+		 */
+		dup = (xfs_dir2_data_unused_t *)ptr;
+		/*
+		 * No, it's unused, skip over it.
+		 */
+		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
+			length = be16_to_cpu(dup->length);
+			ptr += length;
+			curoff += length;
+			continue;
+		}
+
+		dep = (xfs_dir2_data_entry_t *)ptr;
+		length = xfs_dir3_data_entsize(mp, dep->namelen);
+		filetype = xfs_dir3_dirent_get_ftype(mp, dep);
+
+		ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
+		if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
+			    be64_to_cpu(dep->inumber),
+			    xfs_dir3_get_dtype(mp, filetype)))
+			break;
+
+		/*
+		 * Advance to next entry in the block.
+		 */
+		ptr += length;
+		curoff += length;
+		/* bufsize may have just been a guess; don't go negative */
+		bufsize = bufsize > length ? bufsize - length : 0;
+	}
+
+	/*
+	 * All done.  Set output offset value to current offset.
+	 */
+	if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR))
+		ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff;
+	else
+		ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
+	kmem_free(map_info);
+	if (bp)
+		xfs_trans_brelse(NULL, bp);
+	return error;
+}
+
+/*
+ * Read a directory.
+ */
+int
+xfs_readdir(
+	xfs_inode_t	*dp,
+	struct dir_context *ctx,
+	size_t		bufsize)
+{
+	int		rval;		/* return value */
+	int		v;		/* type-checking value */
+
+	trace_xfs_readdir(dp);
+
+	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
+		return XFS_ERROR(EIO);
+
+	ASSERT(S_ISDIR(dp->i_d.di_mode));
+	XFS_STATS_INC(xs_dir_getdents);
+
+	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
+		rval = xfs_dir2_sf_getdents(dp, ctx);
+	else if ((rval = xfs_dir2_isblock(NULL, dp, &v)))
+		;
+	else if (v)
+		rval = xfs_dir2_block_getdents(dp, ctx);
+	else
+		rval = xfs_dir2_leaf_getdents(dp, ctx, bufsize);
+	return rval;
+}
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 97676a3..bb6e284 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -29,8 +29,8 @@
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
 #include "xfs_error.h"
-#include "xfs_dir2.h"
 #include "xfs_dir2_format.h"
+#include "xfs_dir2.h"
 #include "xfs_dir2_priv.h"
 #include "xfs_trace.h"
 
@@ -95,7 +95,7 @@
 	return xfs_dir2_sf_get_ino(hdr, &hdr->parent);
 }
 
-static void
+void
 xfs_dir2_sf_put_parent_ino(
 	struct xfs_dir2_sf_hdr	*hdr,
 	xfs_ino_t		ino)
@@ -105,31 +105,38 @@
 
 /*
  * In short-form directory entries the inode numbers are stored at variable
- * offset behind the entry name.  The inode numbers may only be accessed
- * through the helpers below.
+ * offset behind the entry name. If the entry stores a filetype value, then it
+ * sits between the name and the inode number. Hence the inode numbers may only
+ * be accessed through the helpers below.
  */
 static xfs_dir2_inou_t *
-xfs_dir2_sfe_inop(
+xfs_dir3_sfe_inop(
+	struct xfs_mount	*mp,
 	struct xfs_dir2_sf_entry *sfep)
 {
-	return (xfs_dir2_inou_t *)&sfep->name[sfep->namelen];
+	__uint8_t	*ptr = &sfep->name[sfep->namelen];
+	if (xfs_sb_version_hasftype(&mp->m_sb))
+		ptr++;
+	return (xfs_dir2_inou_t *)ptr;
 }
 
 xfs_ino_t
-xfs_dir2_sfe_get_ino(
+xfs_dir3_sfe_get_ino(
+	struct xfs_mount	*mp,
 	struct xfs_dir2_sf_hdr	*hdr,
 	struct xfs_dir2_sf_entry *sfep)
 {
-	return xfs_dir2_sf_get_ino(hdr, xfs_dir2_sfe_inop(sfep));
+	return xfs_dir2_sf_get_ino(hdr, xfs_dir3_sfe_inop(mp, sfep));
 }
 
-static void
-xfs_dir2_sfe_put_ino(
+void
+xfs_dir3_sfe_put_ino(
+	struct xfs_mount	*mp,
 	struct xfs_dir2_sf_hdr	*hdr,
 	struct xfs_dir2_sf_entry *sfep,
 	xfs_ino_t		ino)
 {
-	xfs_dir2_sf_put_ino(hdr, xfs_dir2_sfe_inop(sfep), ino);
+	xfs_dir2_sf_put_ino(hdr, xfs_dir3_sfe_inop(mp, sfep), ino);
 }
 
 /*
@@ -157,9 +164,16 @@
 	int			namelen;	/* total name bytes */
 	xfs_ino_t		parent = 0;	/* parent inode number */
 	int			size=0;		/* total computed size */
+	int			has_ftype;
 
 	mp = dp->i_mount;
 
+	/*
+	 * if there is a filetype field, add the extra byte to the namelen
+	 * for each entry that we see.
+	 */
+	has_ftype = xfs_sb_version_hasftype(&mp->m_sb) ? 1 : 0;
+
 	count = i8count = namelen = 0;
 	btp = xfs_dir2_block_tail_p(mp, hdr);
 	blp = xfs_dir2_block_leaf_p(btp);
@@ -188,9 +202,10 @@
 		if (!isdot)
 			i8count += be64_to_cpu(dep->inumber) > XFS_DIR2_MAX_SHORT_INUM;
 #endif
+		/* take into account the file type field */
 		if (!isdot && !isdotdot) {
 			count++;
-			namelen += dep->namelen;
+			namelen += dep->namelen + has_ftype;
 		} else if (isdotdot)
 			parent = be64_to_cpu(dep->inumber);
 		/*
@@ -316,12 +331,14 @@
 				(xfs_dir2_data_aoff_t)
 				((char *)dep - (char *)hdr));
 			memcpy(sfep->name, dep->name, dep->namelen);
-			xfs_dir2_sfe_put_ino(sfp, sfep,
+			xfs_dir3_sfe_put_ino(mp, sfp, sfep,
 					     be64_to_cpu(dep->inumber));
+			xfs_dir3_sfe_put_ftype(mp, sfp, sfep,
+					xfs_dir3_dirent_get_ftype(mp, dep));
 
-			sfep = xfs_dir2_sf_nextentry(sfp, sfep);
+			sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep);
 		}
-		ptr += xfs_dir2_data_entsize(dep->namelen);
+		ptr += xfs_dir3_data_entsize(mp, dep->namelen);
 	}
 	ASSERT((char *)sfep - (char *)sfp == size);
 	xfs_dir2_sf_check(args);
@@ -372,7 +389,7 @@
 	/*
 	 * Compute entry (and change in) size.
 	 */
-	add_entsize = xfs_dir2_sf_entsize(sfp, args->namelen);
+	add_entsize = xfs_dir3_sf_entsize(dp->i_mount, sfp, args->namelen);
 	incr_isize = add_entsize;
 	objchange = 0;
 #if XFS_BIG_INUMS
@@ -466,8 +483,9 @@
 	/*
 	 * Grow the in-inode space.
 	 */
-	xfs_idata_realloc(dp, xfs_dir2_sf_entsize(sfp, args->namelen),
-		XFS_DATA_FORK);
+	xfs_idata_realloc(dp,
+			  xfs_dir3_sf_entsize(dp->i_mount, sfp, args->namelen),
+			  XFS_DATA_FORK);
 	/*
 	 * Need to set up again due to realloc of the inode data.
 	 */
@@ -479,7 +497,9 @@
 	sfep->namelen = args->namelen;
 	xfs_dir2_sf_put_offset(sfep, offset);
 	memcpy(sfep->name, args->name, sfep->namelen);
-	xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
+	xfs_dir3_sfe_put_ino(dp->i_mount, sfp, sfep, args->inumber);
+	xfs_dir3_sfe_put_ftype(dp->i_mount, sfp, sfep, args->filetype);
+
 	/*
 	 * Update the header and inode.
 	 */
@@ -519,11 +539,13 @@
 	xfs_dir2_sf_hdr_t	*oldsfp;	/* original shortform dir */
 	xfs_dir2_sf_entry_t	*sfep;		/* entry in new dir */
 	xfs_dir2_sf_hdr_t	*sfp;		/* new shortform dir */
+	struct xfs_mount	*mp;
 
 	/*
 	 * Copy the old directory to the stack buffer.
 	 */
 	dp = args->dp;
+	mp = dp->i_mount;
 
 	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
 	old_isize = (int)dp->i_d.di_size;
@@ -535,13 +557,13 @@
 	 * to insert the new entry.
 	 * If it's going to end up at the end then oldsfep will point there.
 	 */
-	for (offset = XFS_DIR3_DATA_FIRST_OFFSET(dp->i_mount),
+	for (offset = XFS_DIR3_DATA_FIRST_OFFSET(mp),
 	      oldsfep = xfs_dir2_sf_firstentry(oldsfp),
-	      add_datasize = xfs_dir2_data_entsize(args->namelen),
+	      add_datasize = xfs_dir3_data_entsize(mp, args->namelen),
 	      eof = (char *)oldsfep == &buf[old_isize];
 	     !eof;
-	     offset = new_offset + xfs_dir2_data_entsize(oldsfep->namelen),
-	      oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep),
+	     offset = new_offset + xfs_dir3_data_entsize(mp, oldsfep->namelen),
+	      oldsfep = xfs_dir3_sf_nextentry(mp, oldsfp, oldsfep),
 	      eof = (char *)oldsfep == &buf[old_isize]) {
 		new_offset = xfs_dir2_sf_get_offset(oldsfep);
 		if (offset + add_datasize <= new_offset)
@@ -570,7 +592,8 @@
 	sfep->namelen = args->namelen;
 	xfs_dir2_sf_put_offset(sfep, offset);
 	memcpy(sfep->name, args->name, sfep->namelen);
-	xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
+	xfs_dir3_sfe_put_ino(mp, sfp, sfep, args->inumber);
+	xfs_dir3_sfe_put_ftype(mp, sfp, sfep, args->filetype);
 	sfp->count++;
 #if XFS_BIG_INUMS
 	if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange)
@@ -580,7 +603,7 @@
 	 * If there's more left to copy, do that.
 	 */
 	if (!eof) {
-		sfep = xfs_dir2_sf_nextentry(sfp, sfep);
+		sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep);
 		memcpy(sfep, oldsfep, old_isize - nbytes);
 	}
 	kmem_free(buf);
@@ -616,7 +639,7 @@
 	mp = dp->i_mount;
 
 	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-	size = xfs_dir2_data_entsize(args->namelen);
+	size = xfs_dir3_data_entsize(mp, args->namelen);
 	offset = XFS_DIR3_DATA_FIRST_OFFSET(mp);
 	sfep = xfs_dir2_sf_firstentry(sfp);
 	holefit = 0;
@@ -629,8 +652,8 @@
 		if (!holefit)
 			holefit = offset + size <= xfs_dir2_sf_get_offset(sfep);
 		offset = xfs_dir2_sf_get_offset(sfep) +
-			 xfs_dir2_data_entsize(sfep->namelen);
-		sfep = xfs_dir2_sf_nextentry(sfp, sfep);
+			 xfs_dir3_data_entsize(mp, sfep->namelen);
+		sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep);
 	}
 	/*
 	 * Calculate data bytes used excluding the new entry, if this
@@ -684,31 +707,34 @@
 	int			offset;		/* data offset */
 	xfs_dir2_sf_entry_t	*sfep;		/* shortform dir entry */
 	xfs_dir2_sf_hdr_t	*sfp;		/* shortform structure */
+	struct xfs_mount	*mp;
 
 	dp = args->dp;
+	mp = dp->i_mount;
 
 	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-	offset = XFS_DIR3_DATA_FIRST_OFFSET(dp->i_mount);
+	offset = XFS_DIR3_DATA_FIRST_OFFSET(mp);
 	ino = xfs_dir2_sf_get_parent_ino(sfp);
 	i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
 
 	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
 	     i < sfp->count;
-	     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
+	     i++, sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep)) {
 		ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset);
-		ino = xfs_dir2_sfe_get_ino(sfp, sfep);
+		ino = xfs_dir3_sfe_get_ino(mp, sfp, sfep);
 		i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
 		offset =
 			xfs_dir2_sf_get_offset(sfep) +
-			xfs_dir2_data_entsize(sfep->namelen);
+			xfs_dir3_data_entsize(mp, sfep->namelen);
+		ASSERT(xfs_dir3_sfe_get_ftype(mp, sfp, sfep) <
+							XFS_DIR3_FT_MAX);
 	}
 	ASSERT(i8count == sfp->i8count);
 	ASSERT(XFS_BIG_INUMS || i8count == 0);
 	ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size);
 	ASSERT(offset +
 	       (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
-	       (uint)sizeof(xfs_dir2_block_tail_t) <=
-	       dp->i_mount->m_dirblksize);
+	       (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dirblksize);
 }
 #endif	/* DEBUG */
 
@@ -765,100 +791,6 @@
 	return 0;
 }
 
-int						/* error */
-xfs_dir2_sf_getdents(
-	xfs_inode_t		*dp,		/* incore directory inode */
-	struct dir_context	*ctx)
-{
-	int			i;		/* shortform entry number */
-	xfs_mount_t		*mp;		/* filesystem mount point */
-	xfs_dir2_dataptr_t	off;		/* current entry's offset */
-	xfs_dir2_sf_entry_t	*sfep;		/* shortform directory entry */
-	xfs_dir2_sf_hdr_t	*sfp;		/* shortform structure */
-	xfs_dir2_dataptr_t	dot_offset;
-	xfs_dir2_dataptr_t	dotdot_offset;
-	xfs_ino_t		ino;
-
-	mp = dp->i_mount;
-
-	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
-	/*
-	 * Give up if the directory is way too short.
-	 */
-	if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
-		ASSERT(XFS_FORCED_SHUTDOWN(mp));
-		return XFS_ERROR(EIO);
-	}
-
-	ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
-	ASSERT(dp->i_df.if_u1.if_data != NULL);
-
-	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
-
-	ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
-
-	/*
-	 * If the block number in the offset is out of range, we're done.
-	 */
-	if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk)
-		return 0;
-
-	/*
-	 * Precalculate offsets for . and .. as we will always need them.
-	 *
-	 * XXX(hch): the second argument is sometimes 0 and sometimes
-	 * mp->m_dirdatablk.
-	 */
-	dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
-					     XFS_DIR3_DATA_DOT_OFFSET(mp));
-	dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
-						XFS_DIR3_DATA_DOTDOT_OFFSET(mp));
-
-	/*
-	 * Put . entry unless we're starting past it.
-	 */
-	if (ctx->pos <= dot_offset) {
-		ctx->pos = dot_offset & 0x7fffffff;
-		if (!dir_emit(ctx, ".", 1, dp->i_ino, DT_DIR))
-			return 0;
-	}
-
-	/*
-	 * Put .. entry unless we're starting past it.
-	 */
-	if (ctx->pos <= dotdot_offset) {
-		ino = xfs_dir2_sf_get_parent_ino(sfp);
-		ctx->pos = dotdot_offset & 0x7fffffff;
-		if (!dir_emit(ctx, "..", 2, ino, DT_DIR))
-			return 0;
-	}
-
-	/*
-	 * Loop while there are more entries and put'ing works.
-	 */
-	sfep = xfs_dir2_sf_firstentry(sfp);
-	for (i = 0; i < sfp->count; i++) {
-		off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
-				xfs_dir2_sf_get_offset(sfep));
-
-		if (ctx->pos > off) {
-			sfep = xfs_dir2_sf_nextentry(sfp, sfep);
-			continue;
-		}
-
-		ino = xfs_dir2_sfe_get_ino(sfp, sfep);
-		ctx->pos = off & 0x7fffffff;
-		if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen,
-			    ino, DT_UNKNOWN))
-			return 0;
-		sfep = xfs_dir2_sf_nextentry(sfp, sfep);
-	}
-
-	ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) &
-			0x7fffffff;
-	return 0;
-}
-
 /*
  * Lookup an entry in a shortform directory.
  * Returns EEXIST if found, ENOENT if not found.
@@ -898,6 +830,7 @@
 	if (args->namelen == 1 && args->name[0] == '.') {
 		args->inumber = dp->i_ino;
 		args->cmpresult = XFS_CMP_EXACT;
+		args->filetype = XFS_DIR3_FT_DIR;
 		return XFS_ERROR(EEXIST);
 	}
 	/*
@@ -907,6 +840,7 @@
 	    args->name[0] == '.' && args->name[1] == '.') {
 		args->inumber = xfs_dir2_sf_get_parent_ino(sfp);
 		args->cmpresult = XFS_CMP_EXACT;
+		args->filetype = XFS_DIR3_FT_DIR;
 		return XFS_ERROR(EEXIST);
 	}
 	/*
@@ -914,7 +848,7 @@
 	 */
 	ci_sfep = NULL;
 	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
-				i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
+	     i++, sfep = xfs_dir3_sf_nextentry(dp->i_mount, sfp, sfep)) {
 		/*
 		 * Compare name and if it's an exact match, return the inode
 		 * number. If it's the first case-insensitive match, store the
@@ -924,7 +858,10 @@
 								sfep->namelen);
 		if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
 			args->cmpresult = cmp;
-			args->inumber = xfs_dir2_sfe_get_ino(sfp, sfep);
+			args->inumber = xfs_dir3_sfe_get_ino(dp->i_mount,
+							     sfp, sfep);
+			args->filetype = xfs_dir3_sfe_get_ftype(dp->i_mount,
+								sfp, sfep);
 			if (cmp == XFS_CMP_EXACT)
 				return XFS_ERROR(EEXIST);
 			ci_sfep = sfep;
@@ -980,10 +917,10 @@
 	 * Find the one we're deleting.
 	 */
 	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
-				i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
+	     i++, sfep = xfs_dir3_sf_nextentry(dp->i_mount, sfp, sfep)) {
 		if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
 								XFS_CMP_EXACT) {
-			ASSERT(xfs_dir2_sfe_get_ino(sfp, sfep) ==
+			ASSERT(xfs_dir3_sfe_get_ino(dp->i_mount, sfp, sfep) ==
 			       args->inumber);
 			break;
 		}
@@ -997,7 +934,7 @@
 	 * Calculate sizes.
 	 */
 	byteoff = (int)((char *)sfep - (char *)sfp);
-	entsize = xfs_dir2_sf_entsize(sfp, args->namelen);
+	entsize = xfs_dir3_sf_entsize(dp->i_mount, sfp, args->namelen);
 	newsize = oldsize - entsize;
 	/*
 	 * Copy the part if any after the removed entry, sliding it down.
@@ -1113,16 +1050,19 @@
 	 * Normal entry, look for the name.
 	 */
 	else {
-		for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
-				i < sfp->count;
-				i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
+		for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
+		     i++, sfep = xfs_dir3_sf_nextentry(dp->i_mount, sfp, sfep)) {
 			if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
 								XFS_CMP_EXACT) {
 #if XFS_BIG_INUMS || defined(DEBUG)
-				ino = xfs_dir2_sfe_get_ino(sfp, sfep);
+				ino = xfs_dir3_sfe_get_ino(dp->i_mount,
+							   sfp, sfep);
 				ASSERT(args->inumber != ino);
 #endif
-				xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
+				xfs_dir3_sfe_put_ino(dp->i_mount, sfp, sfep,
+						     args->inumber);
+				xfs_dir3_sfe_put_ftype(dp->i_mount, sfp, sfep,
+						       args->filetype);
 				break;
 			}
 		}
@@ -1189,10 +1129,12 @@
 	int			oldsize;	/* old inode size */
 	xfs_dir2_sf_entry_t	*sfep;		/* new sf entry */
 	xfs_dir2_sf_hdr_t	*sfp;		/* new sf directory */
+	struct xfs_mount	*mp;
 
 	trace_xfs_dir2_sf_toino4(args);
 
 	dp = args->dp;
+	mp = dp->i_mount;
 
 	/*
 	 * Copy the old directory to the buffer.
@@ -1230,13 +1172,15 @@
 	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
 		    oldsfep = xfs_dir2_sf_firstentry(oldsfp);
 	     i < sfp->count;
-	     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
-		  oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
+	     i++, sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep),
+		  oldsfep = xfs_dir3_sf_nextentry(mp, oldsfp, oldsfep)) {
 		sfep->namelen = oldsfep->namelen;
 		sfep->offset = oldsfep->offset;
 		memcpy(sfep->name, oldsfep->name, sfep->namelen);
-		xfs_dir2_sfe_put_ino(sfp, sfep,
-			xfs_dir2_sfe_get_ino(oldsfp, oldsfep));
+		xfs_dir3_sfe_put_ino(mp, sfp, sfep,
+			xfs_dir3_sfe_get_ino(mp, oldsfp, oldsfep));
+		xfs_dir3_sfe_put_ftype(mp, sfp, sfep,
+			xfs_dir3_sfe_get_ftype(mp, oldsfp, oldsfep));
 	}
 	/*
 	 * Clean up the inode.
@@ -1264,10 +1208,12 @@
 	int			oldsize;	/* old inode size */
 	xfs_dir2_sf_entry_t	*sfep;		/* new sf entry */
 	xfs_dir2_sf_hdr_t	*sfp;		/* new sf directory */
+	struct xfs_mount	*mp;
 
 	trace_xfs_dir2_sf_toino8(args);
 
 	dp = args->dp;
+	mp = dp->i_mount;
 
 	/*
 	 * Copy the old directory to the buffer.
@@ -1305,13 +1251,15 @@
 	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
 		    oldsfep = xfs_dir2_sf_firstentry(oldsfp);
 	     i < sfp->count;
-	     i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
-		  oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
+	     i++, sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep),
+		  oldsfep = xfs_dir3_sf_nextentry(mp, oldsfp, oldsfep)) {
 		sfep->namelen = oldsfep->namelen;
 		sfep->offset = oldsfep->offset;
 		memcpy(sfep->name, oldsfep->name, sfep->namelen);
-		xfs_dir2_sfe_put_ino(sfp, sfep,
-			xfs_dir2_sfe_get_ino(oldsfp, oldsfep));
+		xfs_dir3_sfe_put_ino(mp, sfp, sfep,
+			xfs_dir3_sfe_get_ino(mp, oldsfp, oldsfep));
+		xfs_dir3_sfe_put_ftype(mp, sfp, sfep,
+			xfs_dir3_sfe_get_ftype(mp, oldsfp, oldsfep));
 	}
 	/*
 	 * Clean up the inode.
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 69cf4fc..45560ee 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -16,12 +16,13 @@
  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include "xfs.h"
-#include "xfs_sb.h"
+#include "xfs_format.h"
 #include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_mount.h"
 #include "xfs_quota.h"
-#include "xfs_trans.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_ialloc_btree.h"
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 0adf27e..251c666 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -17,6 +17,7 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_format.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
 #include "xfs_trans.h"
@@ -28,6 +29,7 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_itable.h"
@@ -710,10 +712,8 @@
 
 	if (flags & XFS_QMOPT_DQALLOC) {
 		tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
-		error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
-					  XFS_QM_DQALLOC_LOG_RES(mp), 0,
-					  XFS_TRANS_PERM_LOG_RES,
-					  XFS_WRITE_LOG_COUNT);
+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_attrsetm,
+					  XFS_QM_DQALLOC_SPACE_RES(mp), 0);
 		if (error)
 			goto error1;
 		cancelflags = XFS_TRANS_RELEASE_LOG_RES;
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 57aa4b0..60c6e1f 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -17,6 +17,7 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_format.h"
 #include "xfs_log.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
@@ -43,14 +44,15 @@
 /*
  * returns the number of iovecs needed to log the given dquot item.
  */
-STATIC uint
+STATIC void
 xfs_qm_dquot_logitem_size(
-	struct xfs_log_item	*lip)
+	struct xfs_log_item	*lip,
+	int			*nvecs,
+	int			*nbytes)
 {
-	/*
-	 * we need only two iovecs, one for the format, one for the real thing
-	 */
-	return 2;
+	*nvecs += 2;
+	*nbytes += sizeof(struct xfs_dq_logformat) +
+		   sizeof(struct xfs_disk_dquot);
 }
 
 /*
@@ -285,11 +287,14 @@
  * We only need 1 iovec for an quotaoff item.  It just logs the
  * quotaoff_log_format structure.
  */
-STATIC uint
+STATIC void
 xfs_qm_qoff_logitem_size(
-	struct xfs_log_item	*lip)
+	struct xfs_log_item	*lip,
+	int			*nvecs,
+	int			*nbytes)
 {
-	return 1;
+	*nvecs += 1;
+	*nbytes += sizeof(struct xfs_qoff_logitem);
 }
 
 /*
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 35d3f5b..1123d93f 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -26,7 +26,6 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
-#include "xfs_utils.h"
 #include "xfs_error.h"
 
 #ifdef DEBUG
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index c585bc6..066df42 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -21,10 +21,11 @@
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_mount.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2.h"
 #include "xfs_export.h"
-#include "xfs_vnodeops.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c
index 85e9f87a..86f559f 100644
--- a/fs/xfs/xfs_extent_busy.c
+++ b/fs/xfs/xfs_extent_busy.c
@@ -147,7 +147,7 @@
  * extent.  If the overlap covers the beginning, the end, or all of the busy
  * extent, the overlapping portion can be made unbusy and used for the
  * allocation.  We can't split a busy extent because we can't modify a
- * transaction/CIL context busy list, but we can update an entries block
+ * transaction/CIL context busy list, but we can update an entry's block
  * number or length.
  *
  * Returns true if the extent can safely be reused, or false if the search
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 452920a..dc53e8f 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -73,11 +73,22 @@
  * We only need 1 iovec for an efi item.  It just logs the efi_log_format
  * structure.
  */
-STATIC uint
-xfs_efi_item_size(
-	struct xfs_log_item	*lip)
+static inline int
+xfs_efi_item_sizeof(
+	struct xfs_efi_log_item *efip)
 {
-	return 1;
+	return sizeof(struct xfs_efi_log_format) +
+	       (efip->efi_format.efi_nextents - 1) * sizeof(xfs_extent_t);
+}
+
+STATIC void
+xfs_efi_item_size(
+	struct xfs_log_item	*lip,
+	int			*nvecs,
+	int			*nbytes)
+{
+	*nvecs += 1;
+	*nbytes += xfs_efi_item_sizeof(EFI_ITEM(lip));
 }
 
 /*
@@ -93,21 +104,17 @@
 	struct xfs_log_iovec	*log_vector)
 {
 	struct xfs_efi_log_item	*efip = EFI_ITEM(lip);
-	uint			size;
 
 	ASSERT(atomic_read(&efip->efi_next_extent) ==
 				efip->efi_format.efi_nextents);
 
 	efip->efi_format.efi_type = XFS_LI_EFI;
-
-	size = sizeof(xfs_efi_log_format_t);
-	size += (efip->efi_format.efi_nextents - 1) * sizeof(xfs_extent_t);
 	efip->efi_format.efi_size = 1;
 
 	log_vector->i_addr = &efip->efi_format;
-	log_vector->i_len = size;
+	log_vector->i_len = xfs_efi_item_sizeof(efip);
 	log_vector->i_type = XLOG_REG_TYPE_EFI_FORMAT;
-	ASSERT(size >= sizeof(xfs_efi_log_format_t));
+	ASSERT(log_vector->i_len >= sizeof(xfs_efi_log_format_t));
 }
 
 
@@ -333,11 +340,22 @@
  * We only need 1 iovec for an efd item.  It just logs the efd_log_format
  * structure.
  */
-STATIC uint
-xfs_efd_item_size(
-	struct xfs_log_item	*lip)
+static inline int
+xfs_efd_item_sizeof(
+	struct xfs_efd_log_item *efdp)
 {
-	return 1;
+	return sizeof(xfs_efd_log_format_t) +
+	       (efdp->efd_format.efd_nextents - 1) * sizeof(xfs_extent_t);
+}
+
+STATIC void
+xfs_efd_item_size(
+	struct xfs_log_item	*lip,
+	int			*nvecs,
+	int			*nbytes)
+{
+	*nvecs += 1;
+	*nbytes += xfs_efd_item_sizeof(EFD_ITEM(lip));
 }
 
 /*
@@ -353,20 +371,16 @@
 	struct xfs_log_iovec	*log_vector)
 {
 	struct xfs_efd_log_item	*efdp = EFD_ITEM(lip);
-	uint			size;
 
 	ASSERT(efdp->efd_next_extent == efdp->efd_format.efd_nextents);
 
 	efdp->efd_format.efd_type = XFS_LI_EFD;
-
-	size = sizeof(xfs_efd_log_format_t);
-	size += (efdp->efd_format.efd_nextents - 1) * sizeof(xfs_extent_t);
 	efdp->efd_format.efd_size = 1;
 
 	log_vector->i_addr = &efdp->efd_format;
-	log_vector->i_len = size;
+	log_vector->i_len = xfs_efd_item_sizeof(efdp);
 	log_vector->i_type = XLOG_REG_TYPE_EFD_FORMAT;
-	ASSERT(size >= sizeof(xfs_efd_log_format_t));
+	ASSERT(log_vector->i_len >= sizeof(xfs_efd_log_format_t));
 }
 
 /*
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 4322224..0ffbce3 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -18,93 +18,11 @@
 #ifndef	__XFS_EXTFREE_ITEM_H__
 #define	__XFS_EXTFREE_ITEM_H__
 
+/* kernel only EFI/EFD definitions */
+
 struct xfs_mount;
 struct kmem_zone;
 
-typedef struct xfs_extent {
-	xfs_dfsbno_t	ext_start;
-	xfs_extlen_t	ext_len;
-} xfs_extent_t;
-
-/*
- * Since an xfs_extent_t has types (start:64, len: 32)
- * there are different alignments on 32 bit and 64 bit kernels.
- * So we provide the different variants for use by a
- * conversion routine.
- */
-
-typedef struct xfs_extent_32 {
-	__uint64_t	ext_start;
-	__uint32_t	ext_len;
-} __attribute__((packed)) xfs_extent_32_t;
-
-typedef struct xfs_extent_64 {
-	__uint64_t	ext_start;
-	__uint32_t	ext_len;
-	__uint32_t	ext_pad;
-} xfs_extent_64_t;
-
-/*
- * This is the structure used to lay out an efi log item in the
- * log.  The efi_extents field is a variable size array whose
- * size is given by efi_nextents.
- */
-typedef struct xfs_efi_log_format {
-	__uint16_t		efi_type;	/* efi log item type */
-	__uint16_t		efi_size;	/* size of this item */
-	__uint32_t		efi_nextents;	/* # extents to free */
-	__uint64_t		efi_id;		/* efi identifier */
-	xfs_extent_t		efi_extents[1];	/* array of extents to free */
-} xfs_efi_log_format_t;
-
-typedef struct xfs_efi_log_format_32 {
-	__uint16_t		efi_type;	/* efi log item type */
-	__uint16_t		efi_size;	/* size of this item */
-	__uint32_t		efi_nextents;	/* # extents to free */
-	__uint64_t		efi_id;		/* efi identifier */
-	xfs_extent_32_t		efi_extents[1];	/* array of extents to free */
-} __attribute__((packed)) xfs_efi_log_format_32_t;
-
-typedef struct xfs_efi_log_format_64 {
-	__uint16_t		efi_type;	/* efi log item type */
-	__uint16_t		efi_size;	/* size of this item */
-	__uint32_t		efi_nextents;	/* # extents to free */
-	__uint64_t		efi_id;		/* efi identifier */
-	xfs_extent_64_t		efi_extents[1];	/* array of extents to free */
-} xfs_efi_log_format_64_t;
-
-/*
- * This is the structure used to lay out an efd log item in the
- * log.  The efd_extents array is a variable size array whose
- * size is given by efd_nextents;
- */
-typedef struct xfs_efd_log_format {
-	__uint16_t		efd_type;	/* efd log item type */
-	__uint16_t		efd_size;	/* size of this item */
-	__uint32_t		efd_nextents;	/* # of extents freed */
-	__uint64_t		efd_efi_id;	/* id of corresponding efi */
-	xfs_extent_t		efd_extents[1];	/* array of extents freed */
-} xfs_efd_log_format_t;
-
-typedef struct xfs_efd_log_format_32 {
-	__uint16_t		efd_type;	/* efd log item type */
-	__uint16_t		efd_size;	/* size of this item */
-	__uint32_t		efd_nextents;	/* # of extents freed */
-	__uint64_t		efd_efi_id;	/* id of corresponding efi */
-	xfs_extent_32_t		efd_extents[1];	/* array of extents freed */
-} __attribute__((packed)) xfs_efd_log_format_32_t;
-
-typedef struct xfs_efd_log_format_64 {
-	__uint16_t		efd_type;	/* efd log item type */
-	__uint16_t		efd_size;	/* size of this item */
-	__uint32_t		efd_nextents;	/* # of extents freed */
-	__uint64_t		efd_efi_id;	/* id of corresponding efi */
-	xfs_extent_64_t		efd_extents[1];	/* array of extents freed */
-} xfs_efd_log_format_64_t;
-
-
-#ifdef __KERNEL__
-
 /*
  * Max number of extents in fast allocation path.
  */
@@ -160,6 +78,4 @@
 					    xfs_efi_log_format_t *dst_efi_fmt);
 void			xfs_efi_item_free(xfs_efi_log_item_t *);
 
-#endif	/* __KERNEL__ */
-
 #endif	/* __XFS_EXTFREE_ITEM_H__ */
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index de3dc98..4c749ab 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -28,10 +28,11 @@
 #include "xfs_inode.h"
 #include "xfs_inode_item.h"
 #include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
 #include "xfs_error.h"
-#include "xfs_vnodeops.h"
 #include "xfs_da_btree.h"
 #include "xfs_dir2_format.h"
+#include "xfs_dir2.h"
 #include "xfs_dir2_priv.h"
 #include "xfs_ioctl.h"
 #include "xfs_trace.h"
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 5170306..ce78e65 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -16,18 +16,18 @@
  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include "xfs.h"
+#include "xfs_log.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_inum.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_ag.h"
-#include "xfs_log.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_mount.h"
 #include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
 #include "xfs_alloc.h"
-#include "xfs_utils.h"
 #include "xfs_mru_cache.h"
 #include "xfs_filestream.h"
 #include "xfs_trace.h"
@@ -668,8 +668,8 @@
  */
 int
 xfs_filestream_new_ag(
-	xfs_bmalloca_t	*ap,
-	xfs_agnumber_t	*agp)
+	struct xfs_bmalloca	*ap,
+	xfs_agnumber_t		*agp)
 {
 	int		flags, err;
 	xfs_inode_t	*ip, *pip = NULL;
diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h
index 09dd9af..6d61dbe 100644
--- a/fs/xfs/xfs_filestream.h
+++ b/fs/xfs/xfs_filestream.h
@@ -18,8 +18,6 @@
 #ifndef __XFS_FILESTREAM_H__
 #define __XFS_FILESTREAM_H__
 
-#ifdef __KERNEL__
-
 struct xfs_mount;
 struct xfs_inode;
 struct xfs_perag;
@@ -69,6 +67,4 @@
 		(ip->i_d.di_flags & XFS_DIFLAG_FILESTREAM);
 }
 
-#endif /* __KERNEL__ */
-
 #endif /* __XFS_FILESTREAM_H__ */
diff --git a/fs/xfs/xfs_format.h b/fs/xfs/xfs_format.h
new file mode 100644
index 0000000..35c08ff
--- /dev/null
+++ b/fs/xfs/xfs_format.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_FORMAT_H__
+#define __XFS_FORMAT_H__
+
+/*
+ * XFS On Disk Format Definitions
+ *
+ * This header file defines all the on-disk format definitions for 
+ * general XFS objects. Directory and attribute related objects are defined in
+ * xfs_da_format.h, which log and log item formats are defined in
+ * xfs_log_format.h. Everything else goes here.
+ */
+
+struct xfs_mount;
+struct xfs_trans;
+struct xfs_inode;
+struct xfs_buf;
+struct xfs_ifork;
+
+/*
+ * RealTime Device format definitions
+ */
+
+/* Min and max rt extent sizes, specified in bytes */
+#define	XFS_MAX_RTEXTSIZE	(1024 * 1024 * 1024)	/* 1GB */
+#define	XFS_DFL_RTEXTSIZE	(64 * 1024)	        /* 64kB */
+#define	XFS_MIN_RTEXTSIZE	(4 * 1024)		/* 4kB */
+
+#define	XFS_BLOCKSIZE(mp)	((mp)->m_sb.sb_blocksize)
+#define	XFS_BLOCKMASK(mp)	((mp)->m_blockmask)
+#define	XFS_BLOCKWSIZE(mp)	((mp)->m_blockwsize)
+#define	XFS_BLOCKWMASK(mp)	((mp)->m_blockwmask)
+
+/*
+ * RT Summary and bit manipulation macros.
+ */
+#define	XFS_SUMOFFS(mp,ls,bb)	((int)((ls) * (mp)->m_sb.sb_rbmblocks + (bb)))
+#define	XFS_SUMOFFSTOBLOCK(mp,s)	\
+	(((s) * (uint)sizeof(xfs_suminfo_t)) >> (mp)->m_sb.sb_blocklog)
+#define	XFS_SUMPTR(mp,bp,so)	\
+	((xfs_suminfo_t *)((bp)->b_addr + \
+		(((so) * (uint)sizeof(xfs_suminfo_t)) & XFS_BLOCKMASK(mp))))
+
+#define	XFS_BITTOBLOCK(mp,bi)	((bi) >> (mp)->m_blkbit_log)
+#define	XFS_BLOCKTOBIT(mp,bb)	((bb) << (mp)->m_blkbit_log)
+#define	XFS_BITTOWORD(mp,bi)	\
+	((int)(((bi) >> XFS_NBWORDLOG) & XFS_BLOCKWMASK(mp)))
+
+#define	XFS_RTMIN(a,b)	((a) < (b) ? (a) : (b))
+#define	XFS_RTMAX(a,b)	((a) > (b) ? (a) : (b))
+
+#define	XFS_RTLOBIT(w)	xfs_lowbit32(w)
+#define	XFS_RTHIBIT(w)	xfs_highbit32(w)
+
+#if XFS_BIG_BLKNOS
+#define	XFS_RTBLOCKLOG(b)	xfs_highbit64(b)
+#else
+#define	XFS_RTBLOCKLOG(b)	xfs_highbit32(b)
+#endif
+
+/*
+ * Dquot and dquot block format definitions
+ */
+#define XFS_DQUOT_MAGIC		0x4451		/* 'DQ' */
+#define XFS_DQUOT_VERSION	(u_int8_t)0x01	/* latest version number */
+
+/*
+ * This is the main portion of the on-disk representation of quota
+ * information for a user. This is the q_core of the xfs_dquot_t that
+ * is kept in kernel memory. We pad this with some more expansion room
+ * to construct the on disk structure.
+ */
+typedef struct	xfs_disk_dquot {
+	__be16		d_magic;	/* dquot magic = XFS_DQUOT_MAGIC */
+	__u8		d_version;	/* dquot version */
+	__u8		d_flags;	/* XFS_DQ_USER/PROJ/GROUP */
+	__be32		d_id;		/* user,project,group id */
+	__be64		d_blk_hardlimit;/* absolute limit on disk blks */
+	__be64		d_blk_softlimit;/* preferred limit on disk blks */
+	__be64		d_ino_hardlimit;/* maximum # allocated inodes */
+	__be64		d_ino_softlimit;/* preferred inode limit */
+	__be64		d_bcount;	/* disk blocks owned by the user */
+	__be64		d_icount;	/* inodes owned by the user */
+	__be32		d_itimer;	/* zero if within inode limits if not,
+					   this is when we refuse service */
+	__be32		d_btimer;	/* similar to above; for disk blocks */
+	__be16		d_iwarns;	/* warnings issued wrt num inodes */
+	__be16		d_bwarns;	/* warnings issued wrt disk blocks */
+	__be32		d_pad0;		/* 64 bit align */
+	__be64		d_rtb_hardlimit;/* absolute limit on realtime blks */
+	__be64		d_rtb_softlimit;/* preferred limit on RT disk blks */
+	__be64		d_rtbcount;	/* realtime blocks owned */
+	__be32		d_rtbtimer;	/* similar to above; for RT disk blocks */
+	__be16		d_rtbwarns;	/* warnings issued wrt RT disk blocks */
+	__be16		d_pad;
+} xfs_disk_dquot_t;
+
+/*
+ * This is what goes on disk. This is separated from the xfs_disk_dquot because
+ * carrying the unnecessary padding would be a waste of memory.
+ */
+typedef struct xfs_dqblk {
+	xfs_disk_dquot_t  dd_diskdq;	/* portion that lives incore as well */
+	char		  dd_fill[4];	/* filling for posterity */
+
+	/*
+	 * These two are only present on filesystems with the CRC bits set.
+	 */
+	__be32		  dd_crc;	/* checksum */
+	__be64		  dd_lsn;	/* last modification in log */
+	uuid_t		  dd_uuid;	/* location information */
+} xfs_dqblk_t;
+
+#define XFS_DQUOT_CRC_OFF	offsetof(struct xfs_dqblk, dd_crc)
+
+/*
+ * Remote symlink format and access functions.
+ */
+#define XFS_SYMLINK_MAGIC	0x58534c4d	/* XSLM */
+
+struct xfs_dsymlink_hdr {
+	__be32	sl_magic;
+	__be32	sl_offset;
+	__be32	sl_bytes;
+	__be32	sl_crc;
+	uuid_t	sl_uuid;
+	__be64	sl_owner;
+	__be64	sl_blkno;
+	__be64	sl_lsn;
+};
+
+/*
+ * The maximum pathlen is 1024 bytes. Since the minimum file system
+ * blocksize is 512 bytes, we can get a max of 3 extents back from
+ * bmapi when crc headers are taken into account.
+ */
+#define XFS_SYMLINK_MAPS 3
+
+#define XFS_SYMLINK_BUF_SPACE(mp, bufsize)	\
+	((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \
+			sizeof(struct xfs_dsymlink_hdr) : 0))
+
+int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen);
+int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset,
+			uint32_t size, struct xfs_buf *bp);
+bool xfs_symlink_hdr_ok(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset,
+			uint32_t size, struct xfs_buf *bp);
+void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
+				 struct xfs_inode *ip, struct xfs_ifork *ifp);
+
+extern const struct xfs_buf_ops xfs_symlink_buf_ops;
+
+#endif /* __XFS_FORMAT_H__ */
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index d046955..1edb5cc 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -240,7 +240,9 @@
 
 
 /*
- * Minimum and maximum sizes need for growth checks
+ * Minimum and maximum sizes need for growth checks.
+ *
+ * Block counts are in units of filesystem blocks, not basic blocks.
  */
 #define XFS_MIN_AG_BLOCKS	64
 #define XFS_MIN_LOG_BLOCKS	512ULL
@@ -311,6 +313,17 @@
 } xfs_bstat_t;
 
 /*
+ * Project quota id helpers (previously projid was 16bit only
+ * and using two 16bit values to hold new 32bit projid was choosen
+ * to retain compatibility with "old" filesystems).
+ */
+static inline __uint32_t
+bstat_get_projid(struct xfs_bstat *bs)
+{
+	return (__uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo;
+}
+
+/*
  * The user-level BulkStat Request interface structure.
  */
 typedef struct xfs_fsop_bulkreq {
@@ -344,7 +357,7 @@
  * Speculative preallocation trimming.
  */
 #define XFS_EOFBLOCKS_VERSION		1
-struct xfs_eofblocks {
+struct xfs_fs_eofblocks {
 	__u32		eof_version;
 	__u32		eof_flags;
 	uid_t		eof_uid;
@@ -450,6 +463,21 @@
 				 + (handle).ha_fid.fid_len)
 
 /*
+ * Structure passed to XFS_IOC_SWAPEXT
+ */
+typedef struct xfs_swapext
+{
+	__int64_t	sx_version;	/* version */
+#define XFS_SX_VERSION		0
+	__int64_t	sx_fdtarget;	/* fd of target file */
+	__int64_t	sx_fdtmp;	/* fd of tmp file */
+	xfs_off_t	sx_offset;	/* offset into file */
+	xfs_off_t	sx_length;	/* leng from offset */
+	char		sx_pad[16];	/* pad space, unused */
+	xfs_bstat_t	sx_stat;	/* stat of target b4 copy */
+} xfs_swapext_t;
+
+/*
  * Flags for going down operation
  */
 #define XFS_FSOP_GOING_FLAGS_DEFAULT		0x0	/* going down */
@@ -511,8 +539,14 @@
 #define XFS_IOC_ERROR_INJECTION	     _IOW ('X', 116, struct xfs_error_injection)
 #define XFS_IOC_ERROR_CLEARALL	     _IOW ('X', 117, struct xfs_error_injection)
 /*	XFS_IOC_ATTRCTL_BY_HANDLE -- deprecated 118	 */
+
 /*	XFS_IOC_FREEZE		  -- FIFREEZE   119	 */
 /*	XFS_IOC_THAW		  -- FITHAW     120	 */
+#ifndef FIFREEZE
+#define XFS_IOC_FREEZE		     _IOWR('X', 119, int)
+#define XFS_IOC_THAW		     _IOWR('X', 120, int)
+#endif
+
 #define XFS_IOC_FSSETDM_BY_HANDLE    _IOW ('X', 121, struct xfs_fsop_setdm_handlereq)
 #define XFS_IOC_ATTRLIST_BY_HANDLE   _IOW ('X', 122, struct xfs_fsop_attrlist_handlereq)
 #define XFS_IOC_ATTRMULTI_BY_HANDLE  _IOW ('X', 123, struct xfs_fsop_attrmulti_handlereq)
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 614eb0c..e64ee52 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -203,8 +203,9 @@
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
 	tp->t_flags |= XFS_TRANS_RESERVE;
-	if ((error = xfs_trans_reserve(tp, XFS_GROWFS_SPACE_RES(mp),
-			XFS_GROWDATA_LOG_RES(mp), 0, 0, 0))) {
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata,
+				  XFS_GROWFS_SPACE_RES(mp), 0);
+	if (error) {
 		xfs_trans_cancel(tp, 0);
 		return error;
 	}
@@ -739,8 +740,7 @@
 	int		error;
 
 	tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP);
-	error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
-				  XFS_DEFAULT_LOG_COUNT);
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0);
 	if (error) {
 		xfs_trans_cancel(tp, 0);
 		return error;
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 7a0c17d..ccf2fb1 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -39,6 +39,7 @@
 #include "xfs_cksum.h"
 #include "xfs_buf_item.h"
 #include "xfs_icreate_item.h"
+#include "xfs_icache.h"
 
 
 /*
@@ -506,7 +507,7 @@
 
 /*
  * Select an allocation group to look for a free inode in, based on the parent
- * inode and then mode.  Return the allocation group buffer.
+ * inode and the mode.  Return the allocation group buffer.
  */
 STATIC xfs_agnumber_t
 xfs_ialloc_ag_select(
@@ -728,7 +729,7 @@
 		error = xfs_inobt_get_rec(cur, &rec, &j);
 		if (error)
 			goto error0;
-		XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+		XFS_WANT_CORRUPTED_GOTO(j == 1, error0);
 
 		if (rec.ir_freecount > 0) {
 			/*
@@ -1341,7 +1342,7 @@
 	xfs_agblock_t	cluster_agbno;	/* first block in inode cluster */
 	int		error;	/* error code */
 	int		offset;	/* index of inode in its buffer */
-	int		offset_agbno;	/* blks from chunk start to inode */
+	xfs_agblock_t	offset_agbno;	/* blks from chunk start to inode */
 
 	ASSERT(ino != NULLFSINO);
 
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 3f90e1c..16219b9 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -17,6 +17,7 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_format.h"
 #include "xfs_types.h"
 #include "xfs_log.h"
 #include "xfs_log_priv.h"
@@ -31,12 +32,12 @@
 #include "xfs_dinode.h"
 #include "xfs_error.h"
 #include "xfs_filestream.h"
-#include "xfs_vnodeops.h"
 #include "xfs_inode_item.h"
 #include "xfs_quota.h"
 #include "xfs_trace.h"
 #include "xfs_fsops.h"
 #include "xfs_icache.h"
+#include "xfs_bmap_util.h"
 
 #include <linux/kthread.h>
 #include <linux/freezer.h>
@@ -619,7 +620,7 @@
 
 /*
  * Background scanning to trim post-EOF preallocated space. This is queued
- * based on the 'background_prealloc_discard_period' tunable (5m by default).
+ * based on the 'speculative_prealloc_lifetime' tunable (5m by default).
  */
 STATIC void
 xfs_queue_eofblocks(
@@ -1203,15 +1204,15 @@
 	struct xfs_inode	*ip,
 	struct xfs_eofblocks	*eofb)
 {
-	if (eofb->eof_flags & XFS_EOF_FLAGS_UID &&
-	    ip->i_d.di_uid != eofb->eof_uid)
+	if ((eofb->eof_flags & XFS_EOF_FLAGS_UID) &&
+	    !uid_eq(VFS_I(ip)->i_uid, eofb->eof_uid))
 		return 0;
 
-	if (eofb->eof_flags & XFS_EOF_FLAGS_GID &&
-	    ip->i_d.di_gid != eofb->eof_gid)
+	if ((eofb->eof_flags & XFS_EOF_FLAGS_GID) &&
+	    !gid_eq(VFS_I(ip)->i_gid, eofb->eof_gid))
 		return 0;
 
-	if (eofb->eof_flags & XFS_EOF_FLAGS_PRID &&
+	if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) &&
 	    xfs_get_projid(ip) != eofb->eof_prid)
 		return 0;
 
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index a01afbb..8a89f7d 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -21,9 +21,24 @@
 struct xfs_mount;
 struct xfs_perag;
 
+struct xfs_eofblocks {
+	__u32		eof_flags;
+	kuid_t		eof_uid;
+	kgid_t		eof_gid;
+	prid_t		eof_prid;
+	__u64		eof_min_file_size;
+};
+
 #define SYNC_WAIT		0x0001	/* wait for i/o to complete */
 #define SYNC_TRYLOCK		0x0002  /* only try to lock inodes */
 
+/*
+ * Flags for xfs_iget()
+ */
+#define XFS_IGET_CREATE		0x1
+#define XFS_IGET_UNTRUSTED	0x2
+#define XFS_IGET_DONTCACHE	0x4
+
 int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino,
 	     uint flags, uint lock_flags, xfs_inode_t **ipp);
 
@@ -49,4 +64,39 @@
 		int flags, void *args),
 	int flags, void *args, int tag);
 
+static inline int
+xfs_fs_eofblocks_from_user(
+	struct xfs_fs_eofblocks		*src,
+	struct xfs_eofblocks		*dst)
+{
+	if (src->eof_version != XFS_EOFBLOCKS_VERSION)
+		return EINVAL;
+
+	if (src->eof_flags & ~XFS_EOF_FLAGS_VALID)
+		return EINVAL;
+
+	if (memchr_inv(&src->pad32, 0, sizeof(src->pad32)) ||
+	    memchr_inv(src->pad64, 0, sizeof(src->pad64)))
+		return EINVAL;
+
+	dst->eof_flags = src->eof_flags;
+	dst->eof_prid = src->eof_prid;
+	dst->eof_min_file_size = src->eof_min_file_size;
+
+	dst->eof_uid = INVALID_UID;
+	if (src->eof_flags & XFS_EOF_FLAGS_UID) {
+		dst->eof_uid = make_kuid(current_user_ns(), src->eof_uid);
+		if (!uid_valid(dst->eof_uid))
+			return EINVAL;
+	}
+
+	dst->eof_gid = INVALID_GID;
+	if (src->eof_flags & XFS_EOF_FLAGS_GID) {
+		dst->eof_gid = make_kgid(current_user_ns(), src->eof_gid);
+		if (!gid_valid(dst->eof_gid))
+			return EINVAL;
+	}
+	return 0;
+}
+
 #endif
diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c
index 7716a4e..5a5a593 100644
--- a/fs/xfs/xfs_icreate_item.c
+++ b/fs/xfs/xfs_icreate_item.c
@@ -20,23 +20,11 @@
 #include "xfs_types.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
-#include "xfs_inum.h"
 #include "xfs_trans.h"
-#include "xfs_buf_item.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_mount.h"
 #include "xfs_trans_priv.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_inode_item.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
 #include "xfs_error.h"
 #include "xfs_icreate_item.h"
 
@@ -52,11 +40,14 @@
  *
  * We only need one iovec for the icreate log structure.
  */
-STATIC uint
+STATIC void
 xfs_icreate_item_size(
-	struct xfs_log_item	*lip)
+	struct xfs_log_item	*lip,
+	int			*nvecs,
+	int			*nbytes)
 {
-	return 1;
+	*nvecs += 1;
+	*nbytes += sizeof(struct xfs_icreate_log);
 }
 
 /*
diff --git a/fs/xfs/xfs_icreate_item.h b/fs/xfs/xfs_icreate_item.h
index 88ba8aa..59e89f8 100644
--- a/fs/xfs/xfs_icreate_item.h
+++ b/fs/xfs/xfs_icreate_item.h
@@ -18,24 +18,6 @@
 #ifndef XFS_ICREATE_ITEM_H
 #define XFS_ICREATE_ITEM_H	1
 
-/*
- * on disk log item structure
- *
- * Log recovery assumes the first two entries are the type and size and they fit
- * in 32 bits. Also in host order (ugh) so they have to be 32 bit aligned so
- * decoding can be done correctly.
- */
-struct xfs_icreate_log {
-	__uint16_t	icl_type;	/* type of log format structure */
-	__uint16_t	icl_size;	/* size of log format structure */
-	__be32		icl_ag;		/* ag being allocated in */
-	__be32		icl_agbno;	/* start block of inode range */
-	__be32		icl_count;	/* number of inodes to initialise */
-	__be32		icl_isize;	/* size of inodes */
-	__be32		icl_length;	/* length of extent to initialise */
-	__be32		icl_gen;	/* inode generation number to use */
-};
-
 /* in memory log item structure */
 struct xfs_icreate_item {
 	struct xfs_log_item	ic_item;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index bb262c2..e3d7538 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -19,18 +19,23 @@
 
 #include "xfs.h"
 #include "xfs_fs.h"
-#include "xfs_types.h"
+#include "xfs_format.h"
 #include "xfs_log.h"
 #include "xfs_inum.h"
 #include "xfs_trans.h"
+#include "xfs_trans_space.h"
 #include "xfs_trans_priv.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_mount.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
 #include "xfs_attr_sf.h"
+#include "xfs_attr.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_buf_item.h"
@@ -39,16 +44,15 @@
 #include "xfs_alloc.h"
 #include "xfs_ialloc.h"
 #include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
 #include "xfs_error.h"
-#include "xfs_utils.h"
 #include "xfs_quota.h"
 #include "xfs_filestream.h"
-#include "xfs_vnodeops.h"
 #include "xfs_cksum.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
+#include "xfs_symlink.h"
 
-kmem_zone_t *xfs_ifork_zone;
 kmem_zone_t *xfs_inode_zone;
 
 /*
@@ -58,9 +62,6 @@
 #define	XFS_ITRUNC_MAX_EXTENTS	2
 
 STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *);
-STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
-STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
-STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
 
 /*
  * helper function to extract extent size hint from inode
@@ -310,6 +311,188 @@
 }
 #endif
 
+#ifdef DEBUG
+int xfs_locked_n;
+int xfs_small_retries;
+int xfs_middle_retries;
+int xfs_lots_retries;
+int xfs_lock_delays;
+#endif
+
+/*
+ * Bump the subclass so xfs_lock_inodes() acquires each lock with
+ * a different value
+ */
+static inline int
+xfs_lock_inumorder(int lock_mode, int subclass)
+{
+	if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
+		lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
+	if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
+		lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
+
+	return lock_mode;
+}
+
+/*
+ * The following routine will lock n inodes in exclusive mode.
+ * We assume the caller calls us with the inodes in i_ino order.
+ *
+ * We need to detect deadlock where an inode that we lock
+ * is in the AIL and we start waiting for another inode that is locked
+ * by a thread in a long running transaction (such as truncate). This can
+ * result in deadlock since the long running trans might need to wait
+ * for the inode we just locked in order to push the tail and free space
+ * in the log.
+ */
+void
+xfs_lock_inodes(
+	xfs_inode_t	**ips,
+	int		inodes,
+	uint		lock_mode)
+{
+	int		attempts = 0, i, j, try_lock;
+	xfs_log_item_t	*lp;
+
+	ASSERT(ips && (inodes >= 2)); /* we need at least two */
+
+	try_lock = 0;
+	i = 0;
+
+again:
+	for (; i < inodes; i++) {
+		ASSERT(ips[i]);
+
+		if (i && (ips[i] == ips[i-1]))	/* Already locked */
+			continue;
+
+		/*
+		 * If try_lock is not set yet, make sure all locked inodes
+		 * are not in the AIL.
+		 * If any are, set try_lock to be used later.
+		 */
+
+		if (!try_lock) {
+			for (j = (i - 1); j >= 0 && !try_lock; j--) {
+				lp = (xfs_log_item_t *)ips[j]->i_itemp;
+				if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
+					try_lock++;
+				}
+			}
+		}
+
+		/*
+		 * If any of the previous locks we have locked is in the AIL,
+		 * we must TRY to get the second and subsequent locks. If
+		 * we can't get any, we must release all we have
+		 * and try again.
+		 */
+
+		if (try_lock) {
+			/* try_lock must be 0 if i is 0. */
+			/*
+			 * try_lock means we have an inode locked
+			 * that is in the AIL.
+			 */
+			ASSERT(i != 0);
+			if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) {
+				attempts++;
+
+				/*
+				 * Unlock all previous guys and try again.
+				 * xfs_iunlock will try to push the tail
+				 * if the inode is in the AIL.
+				 */
+
+				for(j = i - 1; j >= 0; j--) {
+
+					/*
+					 * Check to see if we've already
+					 * unlocked this one.
+					 * Not the first one going back,
+					 * and the inode ptr is the same.
+					 */
+					if ((j != (i - 1)) && ips[j] ==
+								ips[j+1])
+						continue;
+
+					xfs_iunlock(ips[j], lock_mode);
+				}
+
+				if ((attempts % 5) == 0) {
+					delay(1); /* Don't just spin the CPU */
+#ifdef DEBUG
+					xfs_lock_delays++;
+#endif
+				}
+				i = 0;
+				try_lock = 0;
+				goto again;
+			}
+		} else {
+			xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
+		}
+	}
+
+#ifdef DEBUG
+	if (attempts) {
+		if (attempts < 5) xfs_small_retries++;
+		else if (attempts < 100) xfs_middle_retries++;
+		else xfs_lots_retries++;
+	} else {
+		xfs_locked_n++;
+	}
+#endif
+}
+
+/*
+ * xfs_lock_two_inodes() can only be used to lock one type of lock
+ * at a time - the iolock or the ilock, but not both at once. If
+ * we lock both at once, lockdep will report false positives saying
+ * we have violated locking orders.
+ */
+void
+xfs_lock_two_inodes(
+	xfs_inode_t		*ip0,
+	xfs_inode_t		*ip1,
+	uint			lock_mode)
+{
+	xfs_inode_t		*temp;
+	int			attempts = 0;
+	xfs_log_item_t		*lp;
+
+	if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
+		ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0);
+	ASSERT(ip0->i_ino != ip1->i_ino);
+
+	if (ip0->i_ino > ip1->i_ino) {
+		temp = ip0;
+		ip0 = ip1;
+		ip1 = temp;
+	}
+
+ again:
+	xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0));
+
+	/*
+	 * If the first lock we have locked is in the AIL, we must TRY to get
+	 * the second lock. If we can't get it, we must release the first one
+	 * and try again.
+	 */
+	lp = (xfs_log_item_t *)ip0->i_itemp;
+	if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
+		if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) {
+			xfs_iunlock(ip0, lock_mode);
+			if ((++attempts % 5) == 0)
+				delay(1); /* Don't just spin the CPU */
+			goto again;
+		}
+	} else {
+		xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1));
+	}
+}
+
+
 void
 __xfs_iflock(
 	struct xfs_inode	*ip)
@@ -326,609 +509,6 @@
 	finish_wait(wq, &wait.wait);
 }
 
-#ifdef DEBUG
-/*
- * Make sure that the extents in the given memory buffer
- * are valid.
- */
-STATIC void
-xfs_validate_extents(
-	xfs_ifork_t		*ifp,
-	int			nrecs,
-	xfs_exntfmt_t		fmt)
-{
-	xfs_bmbt_irec_t		irec;
-	xfs_bmbt_rec_host_t	rec;
-	int			i;
-
-	for (i = 0; i < nrecs; i++) {
-		xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
-		rec.l0 = get_unaligned(&ep->l0);
-		rec.l1 = get_unaligned(&ep->l1);
-		xfs_bmbt_get_all(&rec, &irec);
-		if (fmt == XFS_EXTFMT_NOSTATE)
-			ASSERT(irec.br_state == XFS_EXT_NORM);
-	}
-}
-#else /* DEBUG */
-#define xfs_validate_extents(ifp, nrecs, fmt)
-#endif /* DEBUG */
-
-/*
- * Check that none of the inode's in the buffer have a next
- * unlinked field of 0.
- */
-#if defined(DEBUG)
-void
-xfs_inobp_check(
-	xfs_mount_t	*mp,
-	xfs_buf_t	*bp)
-{
-	int		i;
-	int		j;
-	xfs_dinode_t	*dip;
-
-	j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
-
-	for (i = 0; i < j; i++) {
-		dip = (xfs_dinode_t *)xfs_buf_offset(bp,
-					i * mp->m_sb.sb_inodesize);
-		if (!dip->di_next_unlinked)  {
-			xfs_alert(mp,
-	"Detected bogus zero next_unlinked field in incore inode buffer 0x%p.",
-				bp);
-			ASSERT(dip->di_next_unlinked);
-		}
-	}
-}
-#endif
-
-static void
-xfs_inode_buf_verify(
-	struct xfs_buf	*bp)
-{
-	struct xfs_mount *mp = bp->b_target->bt_mount;
-	int		i;
-	int		ni;
-
-	/*
-	 * Validate the magic number and version of every inode in the buffer
-	 */
-	ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
-	for (i = 0; i < ni; i++) {
-		int		di_ok;
-		xfs_dinode_t	*dip;
-
-		dip = (struct xfs_dinode *)xfs_buf_offset(bp,
-					(i << mp->m_sb.sb_inodelog));
-		di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
-			    XFS_DINODE_GOOD_VERSION(dip->di_version);
-		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
-						XFS_ERRTAG_ITOBP_INOTOBP,
-						XFS_RANDOM_ITOBP_INOTOBP))) {
-			xfs_buf_ioerror(bp, EFSCORRUPTED);
-			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH,
-					     mp, dip);
-#ifdef DEBUG
-			xfs_emerg(mp,
-				"bad inode magic/vsn daddr %lld #%d (magic=%x)",
-				(unsigned long long)bp->b_bn, i,
-				be16_to_cpu(dip->di_magic));
-			ASSERT(0);
-#endif
-		}
-	}
-	xfs_inobp_check(mp, bp);
-}
-
-
-static void
-xfs_inode_buf_read_verify(
-	struct xfs_buf	*bp)
-{
-	xfs_inode_buf_verify(bp);
-}
-
-static void
-xfs_inode_buf_write_verify(
-	struct xfs_buf	*bp)
-{
-	xfs_inode_buf_verify(bp);
-}
-
-const struct xfs_buf_ops xfs_inode_buf_ops = {
-	.verify_read = xfs_inode_buf_read_verify,
-	.verify_write = xfs_inode_buf_write_verify,
-};
-
-
-/*
- * This routine is called to map an inode to the buffer containing the on-disk
- * version of the inode.  It returns a pointer to the buffer containing the
- * on-disk inode in the bpp parameter, and in the dipp parameter it returns a
- * pointer to the on-disk inode within that buffer.
- *
- * If a non-zero error is returned, then the contents of bpp and dipp are
- * undefined.
- */
-int
-xfs_imap_to_bp(
-	struct xfs_mount	*mp,
-	struct xfs_trans	*tp,
-	struct xfs_imap		*imap,
-	struct xfs_dinode       **dipp,
-	struct xfs_buf		**bpp,
-	uint			buf_flags,
-	uint			iget_flags)
-{
-	struct xfs_buf		*bp;
-	int			error;
-
-	buf_flags |= XBF_UNMAPPED;
-	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
-				   (int)imap->im_len, buf_flags, &bp,
-				   &xfs_inode_buf_ops);
-	if (error) {
-		if (error == EAGAIN) {
-			ASSERT(buf_flags & XBF_TRYLOCK);
-			return error;
-		}
-
-		if (error == EFSCORRUPTED &&
-		    (iget_flags & XFS_IGET_UNTRUSTED))
-			return XFS_ERROR(EINVAL);
-
-		xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
-			__func__, error);
-		return error;
-	}
-
-	*bpp = bp;
-	*dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset);
-	return 0;
-}
-
-/*
- * Move inode type and inode format specific information from the
- * on-disk inode to the in-core inode.  For fifos, devs, and sockets
- * this means set if_rdev to the proper value.  For files, directories,
- * and symlinks this means to bring in the in-line data or extent
- * pointers.  For a file in B-tree format, only the root is immediately
- * brought in-core.  The rest will be in-lined in if_extents when it
- * is first referenced (see xfs_iread_extents()).
- */
-STATIC int
-xfs_iformat(
-	xfs_inode_t		*ip,
-	xfs_dinode_t		*dip)
-{
-	xfs_attr_shortform_t	*atp;
-	int			size;
-	int			error = 0;
-	xfs_fsize_t             di_size;
-
-	if (unlikely(be32_to_cpu(dip->di_nextents) +
-		     be16_to_cpu(dip->di_anextents) >
-		     be64_to_cpu(dip->di_nblocks))) {
-		xfs_warn(ip->i_mount,
-			"corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
-			(unsigned long long)ip->i_ino,
-			(int)(be32_to_cpu(dip->di_nextents) +
-			      be16_to_cpu(dip->di_anextents)),
-			(unsigned long long)
-				be64_to_cpu(dip->di_nblocks));
-		XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
-				     ip->i_mount, dip);
-		return XFS_ERROR(EFSCORRUPTED);
-	}
-
-	if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
-		xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
-			(unsigned long long)ip->i_ino,
-			dip->di_forkoff);
-		XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
-				     ip->i_mount, dip);
-		return XFS_ERROR(EFSCORRUPTED);
-	}
-
-	if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
-		     !ip->i_mount->m_rtdev_targp)) {
-		xfs_warn(ip->i_mount,
-			"corrupt dinode %Lu, has realtime flag set.",
-			ip->i_ino);
-		XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
-				     XFS_ERRLEVEL_LOW, ip->i_mount, dip);
-		return XFS_ERROR(EFSCORRUPTED);
-	}
-
-	switch (ip->i_d.di_mode & S_IFMT) {
-	case S_IFIFO:
-	case S_IFCHR:
-	case S_IFBLK:
-	case S_IFSOCK:
-		if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
-			XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
-					      ip->i_mount, dip);
-			return XFS_ERROR(EFSCORRUPTED);
-		}
-		ip->i_d.di_size = 0;
-		ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
-		break;
-
-	case S_IFREG:
-	case S_IFLNK:
-	case S_IFDIR:
-		switch (dip->di_format) {
-		case XFS_DINODE_FMT_LOCAL:
-			/*
-			 * no local regular files yet
-			 */
-			if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
-				xfs_warn(ip->i_mount,
-			"corrupt inode %Lu (local format for regular file).",
-					(unsigned long long) ip->i_ino);
-				XFS_CORRUPTION_ERROR("xfs_iformat(4)",
-						     XFS_ERRLEVEL_LOW,
-						     ip->i_mount, dip);
-				return XFS_ERROR(EFSCORRUPTED);
-			}
-
-			di_size = be64_to_cpu(dip->di_size);
-			if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
-				xfs_warn(ip->i_mount,
-			"corrupt inode %Lu (bad size %Ld for local inode).",
-					(unsigned long long) ip->i_ino,
-					(long long) di_size);
-				XFS_CORRUPTION_ERROR("xfs_iformat(5)",
-						     XFS_ERRLEVEL_LOW,
-						     ip->i_mount, dip);
-				return XFS_ERROR(EFSCORRUPTED);
-			}
-
-			size = (int)di_size;
-			error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
-			break;
-		case XFS_DINODE_FMT_EXTENTS:
-			error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
-			break;
-		case XFS_DINODE_FMT_BTREE:
-			error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
-			break;
-		default:
-			XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
-					 ip->i_mount);
-			return XFS_ERROR(EFSCORRUPTED);
-		}
-		break;
-
-	default:
-		XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
-		return XFS_ERROR(EFSCORRUPTED);
-	}
-	if (error) {
-		return error;
-	}
-	if (!XFS_DFORK_Q(dip))
-		return 0;
-
-	ASSERT(ip->i_afp == NULL);
-	ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
-
-	switch (dip->di_aformat) {
-	case XFS_DINODE_FMT_LOCAL:
-		atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
-		size = be16_to_cpu(atp->hdr.totsize);
-
-		if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
-			xfs_warn(ip->i_mount,
-				"corrupt inode %Lu (bad attr fork size %Ld).",
-				(unsigned long long) ip->i_ino,
-				(long long) size);
-			XFS_CORRUPTION_ERROR("xfs_iformat(8)",
-					     XFS_ERRLEVEL_LOW,
-					     ip->i_mount, dip);
-			return XFS_ERROR(EFSCORRUPTED);
-		}
-
-		error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
-		break;
-	case XFS_DINODE_FMT_EXTENTS:
-		error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
-		break;
-	case XFS_DINODE_FMT_BTREE:
-		error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
-		break;
-	default:
-		error = XFS_ERROR(EFSCORRUPTED);
-		break;
-	}
-	if (error) {
-		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
-		ip->i_afp = NULL;
-		xfs_idestroy_fork(ip, XFS_DATA_FORK);
-	}
-	return error;
-}
-
-/*
- * The file is in-lined in the on-disk inode.
- * If it fits into if_inline_data, then copy
- * it there, otherwise allocate a buffer for it
- * and copy the data there.  Either way, set
- * if_data to point at the data.
- * If we allocate a buffer for the data, make
- * sure that its size is a multiple of 4 and
- * record the real size in i_real_bytes.
- */
-STATIC int
-xfs_iformat_local(
-	xfs_inode_t	*ip,
-	xfs_dinode_t	*dip,
-	int		whichfork,
-	int		size)
-{
-	xfs_ifork_t	*ifp;
-	int		real_size;
-
-	/*
-	 * If the size is unreasonable, then something
-	 * is wrong and we just bail out rather than crash in
-	 * kmem_alloc() or memcpy() below.
-	 */
-	if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
-		xfs_warn(ip->i_mount,
-	"corrupt inode %Lu (bad size %d for local fork, size = %d).",
-			(unsigned long long) ip->i_ino, size,
-			XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
-		XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
-				     ip->i_mount, dip);
-		return XFS_ERROR(EFSCORRUPTED);
-	}
-	ifp = XFS_IFORK_PTR(ip, whichfork);
-	real_size = 0;
-	if (size == 0)
-		ifp->if_u1.if_data = NULL;
-	else if (size <= sizeof(ifp->if_u2.if_inline_data))
-		ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
-	else {
-		real_size = roundup(size, 4);
-		ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
-	}
-	ifp->if_bytes = size;
-	ifp->if_real_bytes = real_size;
-	if (size)
-		memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size);
-	ifp->if_flags &= ~XFS_IFEXTENTS;
-	ifp->if_flags |= XFS_IFINLINE;
-	return 0;
-}
-
-/*
- * The file consists of a set of extents all
- * of which fit into the on-disk inode.
- * If there are few enough extents to fit into
- * the if_inline_ext, then copy them there.
- * Otherwise allocate a buffer for them and copy
- * them into it.  Either way, set if_extents
- * to point at the extents.
- */
-STATIC int
-xfs_iformat_extents(
-	xfs_inode_t	*ip,
-	xfs_dinode_t	*dip,
-	int		whichfork)
-{
-	xfs_bmbt_rec_t	*dp;
-	xfs_ifork_t	*ifp;
-	int		nex;
-	int		size;
-	int		i;
-
-	ifp = XFS_IFORK_PTR(ip, whichfork);
-	nex = XFS_DFORK_NEXTENTS(dip, whichfork);
-	size = nex * (uint)sizeof(xfs_bmbt_rec_t);
-
-	/*
-	 * If the number of extents is unreasonable, then something
-	 * is wrong and we just bail out rather than crash in
-	 * kmem_alloc() or memcpy() below.
-	 */
-	if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
-		xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
-			(unsigned long long) ip->i_ino, nex);
-		XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
-				     ip->i_mount, dip);
-		return XFS_ERROR(EFSCORRUPTED);
-	}
-
-	ifp->if_real_bytes = 0;
-	if (nex == 0)
-		ifp->if_u1.if_extents = NULL;
-	else if (nex <= XFS_INLINE_EXTS)
-		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
-	else
-		xfs_iext_add(ifp, 0, nex);
-
-	ifp->if_bytes = size;
-	if (size) {
-		dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
-		xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
-		for (i = 0; i < nex; i++, dp++) {
-			xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
-			ep->l0 = get_unaligned_be64(&dp->l0);
-			ep->l1 = get_unaligned_be64(&dp->l1);
-		}
-		XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
-		if (whichfork != XFS_DATA_FORK ||
-			XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
-				if (unlikely(xfs_check_nostate_extents(
-				    ifp, 0, nex))) {
-					XFS_ERROR_REPORT("xfs_iformat_extents(2)",
-							 XFS_ERRLEVEL_LOW,
-							 ip->i_mount);
-					return XFS_ERROR(EFSCORRUPTED);
-				}
-	}
-	ifp->if_flags |= XFS_IFEXTENTS;
-	return 0;
-}
-
-/*
- * The file has too many extents to fit into
- * the inode, so they are in B-tree format.
- * Allocate a buffer for the root of the B-tree
- * and copy the root into it.  The i_extents
- * field will remain NULL until all of the
- * extents are read in (when they are needed).
- */
-STATIC int
-xfs_iformat_btree(
-	xfs_inode_t		*ip,
-	xfs_dinode_t		*dip,
-	int			whichfork)
-{
-	struct xfs_mount	*mp = ip->i_mount;
-	xfs_bmdr_block_t	*dfp;
-	xfs_ifork_t		*ifp;
-	/* REFERENCED */
-	int			nrecs;
-	int			size;
-
-	ifp = XFS_IFORK_PTR(ip, whichfork);
-	dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
-	size = XFS_BMAP_BROOT_SPACE(mp, dfp);
-	nrecs = be16_to_cpu(dfp->bb_numrecs);
-
-	/*
-	 * blow out if -- fork has less extents than can fit in
-	 * fork (fork shouldn't be a btree format), root btree
-	 * block has more records than can fit into the fork,
-	 * or the number of extents is greater than the number of
-	 * blocks.
-	 */
-	if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
-					XFS_IFORK_MAXEXT(ip, whichfork) ||
-		     XFS_BMDR_SPACE_CALC(nrecs) >
-					XFS_DFORK_SIZE(dip, mp, whichfork) ||
-		     XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
-		xfs_warn(mp, "corrupt inode %Lu (btree).",
-					(unsigned long long) ip->i_ino);
-		XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
-					 mp, dip);
-		return XFS_ERROR(EFSCORRUPTED);
-	}
-
-	ifp->if_broot_bytes = size;
-	ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
-	ASSERT(ifp->if_broot != NULL);
-	/*
-	 * Copy and convert from the on-disk structure
-	 * to the in-memory structure.
-	 */
-	xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
-			 ifp->if_broot, size);
-	ifp->if_flags &= ~XFS_IFEXTENTS;
-	ifp->if_flags |= XFS_IFBROOT;
-
-	return 0;
-}
-
-STATIC void
-xfs_dinode_from_disk(
-	xfs_icdinode_t		*to,
-	xfs_dinode_t		*from)
-{
-	to->di_magic = be16_to_cpu(from->di_magic);
-	to->di_mode = be16_to_cpu(from->di_mode);
-	to->di_version = from ->di_version;
-	to->di_format = from->di_format;
-	to->di_onlink = be16_to_cpu(from->di_onlink);
-	to->di_uid = be32_to_cpu(from->di_uid);
-	to->di_gid = be32_to_cpu(from->di_gid);
-	to->di_nlink = be32_to_cpu(from->di_nlink);
-	to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
-	to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
-	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
-	to->di_flushiter = be16_to_cpu(from->di_flushiter);
-	to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
-	to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec);
-	to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec);
-	to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec);
-	to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec);
-	to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec);
-	to->di_size = be64_to_cpu(from->di_size);
-	to->di_nblocks = be64_to_cpu(from->di_nblocks);
-	to->di_extsize = be32_to_cpu(from->di_extsize);
-	to->di_nextents = be32_to_cpu(from->di_nextents);
-	to->di_anextents = be16_to_cpu(from->di_anextents);
-	to->di_forkoff = from->di_forkoff;
-	to->di_aformat	= from->di_aformat;
-	to->di_dmevmask	= be32_to_cpu(from->di_dmevmask);
-	to->di_dmstate	= be16_to_cpu(from->di_dmstate);
-	to->di_flags	= be16_to_cpu(from->di_flags);
-	to->di_gen	= be32_to_cpu(from->di_gen);
-
-	if (to->di_version == 3) {
-		to->di_changecount = be64_to_cpu(from->di_changecount);
-		to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
-		to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
-		to->di_flags2 = be64_to_cpu(from->di_flags2);
-		to->di_ino = be64_to_cpu(from->di_ino);
-		to->di_lsn = be64_to_cpu(from->di_lsn);
-		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
-		uuid_copy(&to->di_uuid, &from->di_uuid);
-	}
-}
-
-void
-xfs_dinode_to_disk(
-	xfs_dinode_t		*to,
-	xfs_icdinode_t		*from)
-{
-	to->di_magic = cpu_to_be16(from->di_magic);
-	to->di_mode = cpu_to_be16(from->di_mode);
-	to->di_version = from ->di_version;
-	to->di_format = from->di_format;
-	to->di_onlink = cpu_to_be16(from->di_onlink);
-	to->di_uid = cpu_to_be32(from->di_uid);
-	to->di_gid = cpu_to_be32(from->di_gid);
-	to->di_nlink = cpu_to_be32(from->di_nlink);
-	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
-	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
-	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
-	to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
-	to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
-	to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
-	to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
-	to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
-	to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
-	to->di_size = cpu_to_be64(from->di_size);
-	to->di_nblocks = cpu_to_be64(from->di_nblocks);
-	to->di_extsize = cpu_to_be32(from->di_extsize);
-	to->di_nextents = cpu_to_be32(from->di_nextents);
-	to->di_anextents = cpu_to_be16(from->di_anextents);
-	to->di_forkoff = from->di_forkoff;
-	to->di_aformat = from->di_aformat;
-	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
-	to->di_dmstate = cpu_to_be16(from->di_dmstate);
-	to->di_flags = cpu_to_be16(from->di_flags);
-	to->di_gen = cpu_to_be32(from->di_gen);
-
-	if (from->di_version == 3) {
-		to->di_changecount = cpu_to_be64(from->di_changecount);
-		to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
-		to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
-		to->di_flags2 = cpu_to_be64(from->di_flags2);
-		to->di_ino = cpu_to_be64(from->di_ino);
-		to->di_lsn = cpu_to_be64(from->di_lsn);
-		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
-		uuid_copy(&to->di_uuid, &from->di_uuid);
-		to->di_flushiter = 0;
-	} else {
-		to->di_flushiter = cpu_to_be16(from->di_flushiter);
-	}
-}
-
 STATIC uint
 _xfs_dic2xflags(
 	__uint16_t		di_flags)
@@ -987,232 +567,47 @@
 				(XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
 }
 
-static bool
-xfs_dinode_verify(
-	struct xfs_mount	*mp,
-	struct xfs_inode	*ip,
-	struct xfs_dinode	*dip)
-{
-	if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
-		return false;
-
-	/* only version 3 or greater inodes are extensively verified here */
-	if (dip->di_version < 3)
-		return true;
-
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
-		return false;
-	if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
-			      offsetof(struct xfs_dinode, di_crc)))
-		return false;
-	if (be64_to_cpu(dip->di_ino) != ip->i_ino)
-		return false;
-	if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid))
-		return false;
-	return true;
-}
-
-void
-xfs_dinode_calc_crc(
-	struct xfs_mount	*mp,
-	struct xfs_dinode	*dip)
-{
-	__uint32_t		crc;
-
-	if (dip->di_version < 3)
-		return;
-
-	ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
-	crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
-			      offsetof(struct xfs_dinode, di_crc));
-	dip->di_crc = xfs_end_cksum(crc);
-}
-
 /*
- * Read the disk inode attributes into the in-core inode structure.
- *
- * For version 5 superblocks, if we are initialising a new inode and we are not
- * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
- * inode core with a random generation number. If we are keeping inodes around,
- * we need to read the inode cluster to get the existing generation number off
- * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
- * format) then log recovery is dependent on the di_flushiter field being
- * initialised from the current on-disk value and hence we must also read the
- * inode off disk.
+ * Lookups up an inode from "name". If ci_name is not NULL, then a CI match
+ * is allowed, otherwise it has to be an exact match. If a CI match is found,
+ * ci_name->name will point to a the actual name (caller must free) or
+ * will be set to NULL if an exact match is found.
  */
 int
-xfs_iread(
-	xfs_mount_t	*mp,
-	xfs_trans_t	*tp,
-	xfs_inode_t	*ip,
-	uint		iget_flags)
+xfs_lookup(
+	xfs_inode_t		*dp,
+	struct xfs_name		*name,
+	xfs_inode_t		**ipp,
+	struct xfs_name		*ci_name)
 {
-	xfs_buf_t	*bp;
-	xfs_dinode_t	*dip;
-	int		error;
+	xfs_ino_t		inum;
+	int			error;
+	uint			lock_mode;
 
-	/*
-	 * Fill in the location information in the in-core inode.
-	 */
-	error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
+	trace_xfs_lookup(dp, name);
+
+	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
+		return XFS_ERROR(EIO);
+
+	lock_mode = xfs_ilock_map_shared(dp);
+	error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
+	xfs_iunlock_map_shared(dp, lock_mode);
+
 	if (error)
-		return error;
+		goto out;
 
-	/* shortcut IO on inode allocation if possible */
-	if ((iget_flags & XFS_IGET_CREATE) &&
-	    xfs_sb_version_hascrc(&mp->m_sb) &&
-	    !(mp->m_flags & XFS_MOUNT_IKEEP)) {
-		/* initialise the on-disk inode core */
-		memset(&ip->i_d, 0, sizeof(ip->i_d));
-		ip->i_d.di_magic = XFS_DINODE_MAGIC;
-		ip->i_d.di_gen = prandom_u32();
-		if (xfs_sb_version_hascrc(&mp->m_sb)) {
-			ip->i_d.di_version = 3;
-			ip->i_d.di_ino = ip->i_ino;
-			uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid);
-		} else
-			ip->i_d.di_version = 2;
-		return 0;
-	}
-
-	/*
-	 * Get pointers to the on-disk inode and the buffer containing it.
-	 */
-	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
+	error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
 	if (error)
-		return error;
+		goto out_free_name;
 
-	/* even unallocated inodes are verified */
-	if (!xfs_dinode_verify(mp, ip, dip)) {
-		xfs_alert(mp, "%s: validation failed for inode %lld failed",
-				__func__, ip->i_ino);
-
-		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
-		error = XFS_ERROR(EFSCORRUPTED);
-		goto out_brelse;
-	}
-
-	/*
-	 * If the on-disk inode is already linked to a directory
-	 * entry, copy all of the inode into the in-core inode.
-	 * xfs_iformat() handles copying in the inode format
-	 * specific information.
-	 * Otherwise, just get the truly permanent information.
-	 */
-	if (dip->di_mode) {
-		xfs_dinode_from_disk(&ip->i_d, dip);
-		error = xfs_iformat(ip, dip);
-		if (error)  {
-#ifdef DEBUG
-			xfs_alert(mp, "%s: xfs_iformat() returned error %d",
-				__func__, error);
-#endif /* DEBUG */
-			goto out_brelse;
-		}
-	} else {
-		/*
-		 * Partial initialisation of the in-core inode. Just the bits
-		 * that xfs_ialloc won't overwrite or relies on being correct.
-		 */
-		ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
-		ip->i_d.di_version = dip->di_version;
-		ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
-		ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
-
-		if (dip->di_version == 3) {
-			ip->i_d.di_ino = be64_to_cpu(dip->di_ino);
-			uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid);
-		}
-
-		/*
-		 * Make sure to pull in the mode here as well in
-		 * case the inode is released without being used.
-		 * This ensures that xfs_inactive() will see that
-		 * the inode is already free and not try to mess
-		 * with the uninitialized part of it.
-		 */
-		ip->i_d.di_mode = 0;
-	}
-
-	/*
-	 * The inode format changed when we moved the link count and
-	 * made it 32 bits long.  If this is an old format inode,
-	 * convert it in memory to look like a new one.  If it gets
-	 * flushed to disk we will convert back before flushing or
-	 * logging it.  We zero out the new projid field and the old link
-	 * count field.  We'll handle clearing the pad field (the remains
-	 * of the old uuid field) when we actually convert the inode to
-	 * the new format. We don't change the version number so that we
-	 * can distinguish this from a real new format inode.
-	 */
-	if (ip->i_d.di_version == 1) {
-		ip->i_d.di_nlink = ip->i_d.di_onlink;
-		ip->i_d.di_onlink = 0;
-		xfs_set_projid(ip, 0);
-	}
-
-	ip->i_delayed_blks = 0;
-
-	/*
-	 * Mark the buffer containing the inode as something to keep
-	 * around for a while.  This helps to keep recently accessed
-	 * meta-data in-core longer.
-	 */
-	xfs_buf_set_ref(bp, XFS_INO_REF);
-
-	/*
-	 * Use xfs_trans_brelse() to release the buffer containing the on-disk
-	 * inode, because it was acquired with xfs_trans_read_buf() in
-	 * xfs_imap_to_bp() above.  If tp is NULL, this is just a normal
-	 * brelse().  If we're within a transaction, then xfs_trans_brelse()
-	 * will only release the buffer if it is not dirty within the
-	 * transaction.  It will be OK to release the buffer in this case,
-	 * because inodes on disk are never destroyed and we will be locking the
-	 * new in-core inode before putting it in the cache where other
-	 * processes can find it.  Thus we don't have to worry about the inode
-	 * being changed just because we released the buffer.
-	 */
- out_brelse:
-	xfs_trans_brelse(tp, bp);
-	return error;
-}
-
-/*
- * Read in extents from a btree-format inode.
- * Allocate and fill in if_extents.  Real work is done in xfs_bmap.c.
- */
-int
-xfs_iread_extents(
-	xfs_trans_t	*tp,
-	xfs_inode_t	*ip,
-	int		whichfork)
-{
-	int		error;
-	xfs_ifork_t	*ifp;
-	xfs_extnum_t	nextents;
-
-	if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
-		XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
-				 ip->i_mount);
-		return XFS_ERROR(EFSCORRUPTED);
-	}
-	nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
-	ifp = XFS_IFORK_PTR(ip, whichfork);
-
-	/*
-	 * We know that the size is valid (it's checked in iformat_btree)
-	 */
-	ifp->if_bytes = ifp->if_real_bytes = 0;
-	ifp->if_flags |= XFS_IFEXTENTS;
-	xfs_iext_add(ifp, 0, nextents);
-	error = xfs_bmap_read_extents(tp, ip, whichfork);
-	if (error) {
-		xfs_iext_destroy(ifp);
-		ifp->if_flags &= ~XFS_IFEXTENTS;
-		return error;
-	}
-	xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
 	return 0;
+
+out_free_name:
+	if (ci_name)
+		kmem_free(ci_name->name);
+out:
+	*ipp = NULL;
+	return error;
 }
 
 /*
@@ -1295,8 +690,8 @@
 	ip->i_d.di_onlink = 0;
 	ip->i_d.di_nlink = nlink;
 	ASSERT(ip->i_d.di_nlink == nlink);
-	ip->i_d.di_uid = current_fsuid();
-	ip->i_d.di_gid = current_fsgid();
+	ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid());
+	ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid());
 	xfs_set_projid(ip, prid);
 	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
 
@@ -1335,7 +730,7 @@
 	 */
 	if ((irix_sgid_inherit) &&
 	    (ip->i_d.di_mode & S_ISGID) &&
-	    (!in_group_p((gid_t)ip->i_d.di_gid))) {
+	    (!in_group_p(xfs_gid_to_kgid(ip->i_d.di_gid)))) {
 		ip->i_d.di_mode &= ~S_ISGID;
 	}
 
@@ -1467,6 +862,583 @@
 }
 
 /*
+ * Allocates a new inode from disk and return a pointer to the
+ * incore copy. This routine will internally commit the current
+ * transaction and allocate a new one if the Space Manager needed
+ * to do an allocation to replenish the inode free-list.
+ *
+ * This routine is designed to be called from xfs_create and
+ * xfs_create_dir.
+ *
+ */
+int
+xfs_dir_ialloc(
+	xfs_trans_t	**tpp,		/* input: current transaction;
+					   output: may be a new transaction. */
+	xfs_inode_t	*dp,		/* directory within whose allocate
+					   the inode. */
+	umode_t		mode,
+	xfs_nlink_t	nlink,
+	xfs_dev_t	rdev,
+	prid_t		prid,		/* project id */
+	int		okalloc,	/* ok to allocate new space */
+	xfs_inode_t	**ipp,		/* pointer to inode; it will be
+					   locked. */
+	int		*committed)
+
+{
+	xfs_trans_t	*tp;
+	xfs_trans_t	*ntp;
+	xfs_inode_t	*ip;
+	xfs_buf_t	*ialloc_context = NULL;
+	int		code;
+	void		*dqinfo;
+	uint		tflags;
+
+	tp = *tpp;
+	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
+
+	/*
+	 * xfs_ialloc will return a pointer to an incore inode if
+	 * the Space Manager has an available inode on the free
+	 * list. Otherwise, it will do an allocation and replenish
+	 * the freelist.  Since we can only do one allocation per
+	 * transaction without deadlocks, we will need to commit the
+	 * current transaction and start a new one.  We will then
+	 * need to call xfs_ialloc again to get the inode.
+	 *
+	 * If xfs_ialloc did an allocation to replenish the freelist,
+	 * it returns the bp containing the head of the freelist as
+	 * ialloc_context. We will hold a lock on it across the
+	 * transaction commit so that no other process can steal
+	 * the inode(s) that we've just allocated.
+	 */
+	code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc,
+			  &ialloc_context, &ip);
+
+	/*
+	 * Return an error if we were unable to allocate a new inode.
+	 * This should only happen if we run out of space on disk or
+	 * encounter a disk error.
+	 */
+	if (code) {
+		*ipp = NULL;
+		return code;
+	}
+	if (!ialloc_context && !ip) {
+		*ipp = NULL;
+		return XFS_ERROR(ENOSPC);
+	}
+
+	/*
+	 * If the AGI buffer is non-NULL, then we were unable to get an
+	 * inode in one operation.  We need to commit the current
+	 * transaction and call xfs_ialloc() again.  It is guaranteed
+	 * to succeed the second time.
+	 */
+	if (ialloc_context) {
+		struct xfs_trans_res tres;
+
+		/*
+		 * Normally, xfs_trans_commit releases all the locks.
+		 * We call bhold to hang on to the ialloc_context across
+		 * the commit.  Holding this buffer prevents any other
+		 * processes from doing any allocations in this
+		 * allocation group.
+		 */
+		xfs_trans_bhold(tp, ialloc_context);
+		/*
+		 * Save the log reservation so we can use
+		 * them in the next transaction.
+		 */
+		tres.tr_logres = xfs_trans_get_log_res(tp);
+		tres.tr_logcount = xfs_trans_get_log_count(tp);
+
+		/*
+		 * We want the quota changes to be associated with the next
+		 * transaction, NOT this one. So, detach the dqinfo from this
+		 * and attach it to the next transaction.
+		 */
+		dqinfo = NULL;
+		tflags = 0;
+		if (tp->t_dqinfo) {
+			dqinfo = (void *)tp->t_dqinfo;
+			tp->t_dqinfo = NULL;
+			tflags = tp->t_flags & XFS_TRANS_DQ_DIRTY;
+			tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY);
+		}
+
+		ntp = xfs_trans_dup(tp);
+		code = xfs_trans_commit(tp, 0);
+		tp = ntp;
+		if (committed != NULL) {
+			*committed = 1;
+		}
+		/*
+		 * If we get an error during the commit processing,
+		 * release the buffer that is still held and return
+		 * to the caller.
+		 */
+		if (code) {
+			xfs_buf_relse(ialloc_context);
+			if (dqinfo) {
+				tp->t_dqinfo = dqinfo;
+				xfs_trans_free_dqinfo(tp);
+			}
+			*tpp = ntp;
+			*ipp = NULL;
+			return code;
+		}
+
+		/*
+		 * transaction commit worked ok so we can drop the extra ticket
+		 * reference that we gained in xfs_trans_dup()
+		 */
+		xfs_log_ticket_put(tp->t_ticket);
+		tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
+		code = xfs_trans_reserve(tp, &tres, 0, 0);
+
+		/*
+		 * Re-attach the quota info that we detached from prev trx.
+		 */
+		if (dqinfo) {
+			tp->t_dqinfo = dqinfo;
+			tp->t_flags |= tflags;
+		}
+
+		if (code) {
+			xfs_buf_relse(ialloc_context);
+			*tpp = ntp;
+			*ipp = NULL;
+			return code;
+		}
+		xfs_trans_bjoin(tp, ialloc_context);
+
+		/*
+		 * Call ialloc again. Since we've locked out all
+		 * other allocations in this allocation group,
+		 * this call should always succeed.
+		 */
+		code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid,
+				  okalloc, &ialloc_context, &ip);
+
+		/*
+		 * If we get an error at this point, return to the caller
+		 * so that the current transaction can be aborted.
+		 */
+		if (code) {
+			*tpp = tp;
+			*ipp = NULL;
+			return code;
+		}
+		ASSERT(!ialloc_context && ip);
+
+	} else {
+		if (committed != NULL)
+			*committed = 0;
+	}
+
+	*ipp = ip;
+	*tpp = tp;
+
+	return 0;
+}
+
+/*
+ * Decrement the link count on an inode & log the change.
+ * If this causes the link count to go to zero, initiate the
+ * logging activity required to truncate a file.
+ */
+int				/* error */
+xfs_droplink(
+	xfs_trans_t *tp,
+	xfs_inode_t *ip)
+{
+	int	error;
+
+	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
+
+	ASSERT (ip->i_d.di_nlink > 0);
+	ip->i_d.di_nlink--;
+	drop_nlink(VFS_I(ip));
+	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+	error = 0;
+	if (ip->i_d.di_nlink == 0) {
+		/*
+		 * We're dropping the last link to this file.
+		 * Move the on-disk inode to the AGI unlinked list.
+		 * From xfs_inactive() we will pull the inode from
+		 * the list and free it.
+		 */
+		error = xfs_iunlink(tp, ip);
+	}
+	return error;
+}
+
+/*
+ * This gets called when the inode's version needs to be changed from 1 to 2.
+ * Currently this happens when the nlink field overflows the old 16-bit value
+ * or when chproj is called to change the project for the first time.
+ * As a side effect the superblock version will also get rev'd
+ * to contain the NLINK bit.
+ */
+void
+xfs_bump_ino_vers2(
+	xfs_trans_t	*tp,
+	xfs_inode_t	*ip)
+{
+	xfs_mount_t	*mp;
+
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+	ASSERT(ip->i_d.di_version == 1);
+
+	ip->i_d.di_version = 2;
+	ip->i_d.di_onlink = 0;
+	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
+	mp = tp->t_mountp;
+	if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
+		spin_lock(&mp->m_sb_lock);
+		if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
+			xfs_sb_version_addnlink(&mp->m_sb);
+			spin_unlock(&mp->m_sb_lock);
+			xfs_mod_sb(tp, XFS_SB_VERSIONNUM);
+		} else {
+			spin_unlock(&mp->m_sb_lock);
+		}
+	}
+	/* Caller must log the inode */
+}
+
+/*
+ * Increment the link count on an inode & log the change.
+ */
+int
+xfs_bumplink(
+	xfs_trans_t *tp,
+	xfs_inode_t *ip)
+{
+	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
+
+	ASSERT(ip->i_d.di_nlink > 0);
+	ip->i_d.di_nlink++;
+	inc_nlink(VFS_I(ip));
+	if ((ip->i_d.di_version == 1) &&
+	    (ip->i_d.di_nlink > XFS_MAXLINK_1)) {
+		/*
+		 * The inode has increased its number of links beyond
+		 * what can fit in an old format inode.  It now needs
+		 * to be converted to a version 2 inode with a 32 bit
+		 * link count.  If this is the first inode in the file
+		 * system to do this, then we need to bump the superblock
+		 * version number as well.
+		 */
+		xfs_bump_ino_vers2(tp, ip);
+	}
+
+	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+	return 0;
+}
+
+int
+xfs_create(
+	xfs_inode_t		*dp,
+	struct xfs_name		*name,
+	umode_t			mode,
+	xfs_dev_t		rdev,
+	xfs_inode_t		**ipp)
+{
+	int			is_dir = S_ISDIR(mode);
+	struct xfs_mount	*mp = dp->i_mount;
+	struct xfs_inode	*ip = NULL;
+	struct xfs_trans	*tp = NULL;
+	int			error;
+	xfs_bmap_free_t		free_list;
+	xfs_fsblock_t		first_block;
+	bool                    unlock_dp_on_error = false;
+	uint			cancel_flags;
+	int			committed;
+	prid_t			prid;
+	struct xfs_dquot	*udqp = NULL;
+	struct xfs_dquot	*gdqp = NULL;
+	struct xfs_dquot	*pdqp = NULL;
+	struct xfs_trans_res	tres;
+	uint			resblks;
+
+	trace_xfs_create(dp, name);
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return XFS_ERROR(EIO);
+
+	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
+		prid = xfs_get_projid(dp);
+	else
+		prid = XFS_PROJID_DEFAULT;
+
+	/*
+	 * Make sure that we have allocated dquot(s) on disk.
+	 */
+	error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()),
+					xfs_kgid_to_gid(current_fsgid()), prid,
+					XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
+					&udqp, &gdqp, &pdqp);
+	if (error)
+		return error;
+
+	if (is_dir) {
+		rdev = 0;
+		resblks = XFS_MKDIR_SPACE_RES(mp, name->len);
+		tres.tr_logres = M_RES(mp)->tr_mkdir.tr_logres;
+		tres.tr_logcount = XFS_MKDIR_LOG_COUNT;
+		tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);
+	} else {
+		resblks = XFS_CREATE_SPACE_RES(mp, name->len);
+		tres.tr_logres = M_RES(mp)->tr_create.tr_logres;
+		tres.tr_logcount = XFS_CREATE_LOG_COUNT;
+		tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
+	}
+
+	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
+
+	/*
+	 * Initially assume that the file does not exist and
+	 * reserve the resources for that case.  If that is not
+	 * the case we'll drop the one we have and get a more
+	 * appropriate transaction later.
+	 */
+	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
+	error = xfs_trans_reserve(tp, &tres, resblks, 0);
+	if (error == ENOSPC) {
+		/* flush outstanding delalloc blocks and retry */
+		xfs_flush_inodes(mp);
+		error = xfs_trans_reserve(tp, &tres, resblks, 0);
+	}
+	if (error == ENOSPC) {
+		/* No space at all so try a "no-allocation" reservation */
+		resblks = 0;
+		error = xfs_trans_reserve(tp, &tres, 0, 0);
+	}
+	if (error) {
+		cancel_flags = 0;
+		goto out_trans_cancel;
+	}
+
+	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
+	unlock_dp_on_error = true;
+
+	xfs_bmap_init(&free_list, &first_block);
+
+	/*
+	 * Reserve disk quota and the inode.
+	 */
+	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
+						pdqp, resblks, 1, 0);
+	if (error)
+		goto out_trans_cancel;
+
+	error = xfs_dir_canenter(tp, dp, name, resblks);
+	if (error)
+		goto out_trans_cancel;
+
+	/*
+	 * A newly created regular or special file just has one directory
+	 * entry pointing to them, but a directory also the "." entry
+	 * pointing to itself.
+	 */
+	error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
+			       prid, resblks > 0, &ip, &committed);
+	if (error) {
+		if (error == ENOSPC)
+			goto out_trans_cancel;
+		goto out_trans_abort;
+	}
+
+	/*
+	 * Now we join the directory inode to the transaction.  We do not do it
+	 * earlier because xfs_dir_ialloc might commit the previous transaction
+	 * (and release all the locks).  An error from here on will result in
+	 * the transaction cancel unlocking dp so don't do it explicitly in the
+	 * error path.
+	 */
+	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
+	unlock_dp_on_error = false;
+
+	error = xfs_dir_createname(tp, dp, name, ip->i_ino,
+					&first_block, &free_list, resblks ?
+					resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
+	if (error) {
+		ASSERT(error != ENOSPC);
+		goto out_trans_abort;
+	}
+	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+
+	if (is_dir) {
+		error = xfs_dir_init(tp, ip, dp);
+		if (error)
+			goto out_bmap_cancel;
+
+		error = xfs_bumplink(tp, dp);
+		if (error)
+			goto out_bmap_cancel;
+	}
+
+	/*
+	 * If this is a synchronous mount, make sure that the
+	 * create transaction goes to disk before returning to
+	 * the user.
+	 */
+	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
+		xfs_trans_set_sync(tp);
+
+	/*
+	 * Attach the dquot(s) to the inodes and modify them incore.
+	 * These ids of the inode couldn't have changed since the new
+	 * inode has been locked ever since it was created.
+	 */
+	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
+
+	error = xfs_bmap_finish(&tp, &free_list, &committed);
+	if (error)
+		goto out_bmap_cancel;
+
+	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+	if (error)
+		goto out_release_inode;
+
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
+	xfs_qm_dqrele(pdqp);
+
+	*ipp = ip;
+	return 0;
+
+ out_bmap_cancel:
+	xfs_bmap_cancel(&free_list);
+ out_trans_abort:
+	cancel_flags |= XFS_TRANS_ABORT;
+ out_trans_cancel:
+	xfs_trans_cancel(tp, cancel_flags);
+ out_release_inode:
+	/*
+	 * Wait until after the current transaction is aborted to
+	 * release the inode.  This prevents recursive transactions
+	 * and deadlocks from xfs_inactive.
+	 */
+	if (ip)
+		IRELE(ip);
+
+	xfs_qm_dqrele(udqp);
+	xfs_qm_dqrele(gdqp);
+	xfs_qm_dqrele(pdqp);
+
+	if (unlock_dp_on_error)
+		xfs_iunlock(dp, XFS_ILOCK_EXCL);
+	return error;
+}
+
+int
+xfs_link(
+	xfs_inode_t		*tdp,
+	xfs_inode_t		*sip,
+	struct xfs_name		*target_name)
+{
+	xfs_mount_t		*mp = tdp->i_mount;
+	xfs_trans_t		*tp;
+	int			error;
+	xfs_bmap_free_t         free_list;
+	xfs_fsblock_t           first_block;
+	int			cancel_flags;
+	int			committed;
+	int			resblks;
+
+	trace_xfs_link(tdp, target_name);
+
+	ASSERT(!S_ISDIR(sip->i_d.di_mode));
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return XFS_ERROR(EIO);
+
+	error = xfs_qm_dqattach(sip, 0);
+	if (error)
+		goto std_return;
+
+	error = xfs_qm_dqattach(tdp, 0);
+	if (error)
+		goto std_return;
+
+	tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
+	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
+	resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0);
+	if (error == ENOSPC) {
+		resblks = 0;
+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0);
+	}
+	if (error) {
+		cancel_flags = 0;
+		goto error_return;
+	}
+
+	xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
+
+	xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
+	xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
+
+	/*
+	 * If we are using project inheritance, we only allow hard link
+	 * creation in our tree when the project IDs are the same; else
+	 * the tree quota mechanism could be circumvented.
+	 */
+	if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
+		     (xfs_get_projid(tdp) != xfs_get_projid(sip)))) {
+		error = XFS_ERROR(EXDEV);
+		goto error_return;
+	}
+
+	error = xfs_dir_canenter(tp, tdp, target_name, resblks);
+	if (error)
+		goto error_return;
+
+	xfs_bmap_init(&free_list, &first_block);
+
+	error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
+					&first_block, &free_list, resblks);
+	if (error)
+		goto abort_return;
+	xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+	xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
+
+	error = xfs_bumplink(tp, sip);
+	if (error)
+		goto abort_return;
+
+	/*
+	 * If this is a synchronous mount, make sure that the
+	 * link transaction goes to disk before returning to
+	 * the user.
+	 */
+	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
+		xfs_trans_set_sync(tp);
+	}
+
+	error = xfs_bmap_finish (&tp, &free_list, &committed);
+	if (error) {
+		xfs_bmap_cancel(&free_list);
+		goto abort_return;
+	}
+
+	return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+
+ abort_return:
+	cancel_flags |= XFS_TRANS_ABORT;
+ error_return:
+	xfs_trans_cancel(tp, cancel_flags);
+ std_return:
+	return error;
+}
+
+/*
  * Free up the underlying blocks past new_size.  The new size must be smaller
  * than the current size.  This routine can be used both for the attribute and
  * data fork, and does not modify the inode size, which is left to the caller.
@@ -1576,10 +1548,7 @@
 		 * reference that we gained in xfs_trans_dup()
 		 */
 		xfs_log_ticket_put(tp->t_ticket);
-		error = xfs_trans_reserve(tp, 0,
-					XFS_ITRUNCATE_LOG_RES(mp), 0,
-					XFS_TRANS_PERM_LOG_RES,
-					XFS_ITRUNCATE_LOG_COUNT);
+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
 		if (error)
 			goto out;
 	}
@@ -1605,6 +1574,271 @@
 	goto out;
 }
 
+int
+xfs_release(
+	xfs_inode_t	*ip)
+{
+	xfs_mount_t	*mp = ip->i_mount;
+	int		error;
+
+	if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0))
+		return 0;
+
+	/* If this is a read-only mount, don't do this (would generate I/O) */
+	if (mp->m_flags & XFS_MOUNT_RDONLY)
+		return 0;
+
+	if (!XFS_FORCED_SHUTDOWN(mp)) {
+		int truncated;
+
+		/*
+		 * If we are using filestreams, and we have an unlinked
+		 * file that we are processing the last close on, then nothing
+		 * will be able to reopen and write to this file. Purge this
+		 * inode from the filestreams cache so that it doesn't delay
+		 * teardown of the inode.
+		 */
+		if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip))
+			xfs_filestream_deassociate(ip);
+
+		/*
+		 * If we previously truncated this file and removed old data
+		 * in the process, we want to initiate "early" writeout on
+		 * the last close.  This is an attempt to combat the notorious
+		 * NULL files problem which is particularly noticeable from a
+		 * truncate down, buffered (re-)write (delalloc), followed by
+		 * a crash.  What we are effectively doing here is
+		 * significantly reducing the time window where we'd otherwise
+		 * be exposed to that problem.
+		 */
+		truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
+		if (truncated) {
+			xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
+			if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) {
+				error = -filemap_flush(VFS_I(ip)->i_mapping);
+				if (error)
+					return error;
+			}
+		}
+	}
+
+	if (ip->i_d.di_nlink == 0)
+		return 0;
+
+	if (xfs_can_free_eofblocks(ip, false)) {
+
+		/*
+		 * If we can't get the iolock just skip truncating the blocks
+		 * past EOF because we could deadlock with the mmap_sem
+		 * otherwise.  We'll get another chance to drop them once the
+		 * last reference to the inode is dropped, so we'll never leak
+		 * blocks permanently.
+		 *
+		 * Further, check if the inode is being opened, written and
+		 * closed frequently and we have delayed allocation blocks
+		 * outstanding (e.g. streaming writes from the NFS server),
+		 * truncating the blocks past EOF will cause fragmentation to
+		 * occur.
+		 *
+		 * In this case don't do the truncation, either, but we have to
+		 * be careful how we detect this case. Blocks beyond EOF show
+		 * up as i_delayed_blks even when the inode is clean, so we
+		 * need to truncate them away first before checking for a dirty
+		 * release. Hence on the first dirty close we will still remove
+		 * the speculative allocation, but after that we will leave it
+		 * in place.
+		 */
+		if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
+			return 0;
+
+		error = xfs_free_eofblocks(mp, ip, true);
+		if (error && error != EAGAIN)
+			return error;
+
+		/* delalloc blocks after truncation means it really is dirty */
+		if (ip->i_delayed_blks)
+			xfs_iflags_set(ip, XFS_IDIRTY_RELEASE);
+	}
+	return 0;
+}
+
+/*
+ * xfs_inactive
+ *
+ * This is called when the vnode reference count for the vnode
+ * goes to zero.  If the file has been unlinked, then it must
+ * now be truncated.  Also, we clear all of the read-ahead state
+ * kept for the inode here since the file is now closed.
+ */
+int
+xfs_inactive(
+	xfs_inode_t	*ip)
+{
+	xfs_bmap_free_t		free_list;
+	xfs_fsblock_t		first_block;
+	int			committed;
+	struct xfs_trans	*tp;
+	struct xfs_mount	*mp;
+	struct xfs_trans_res	*resp;
+	int			error;
+	int			truncate = 0;
+
+	/*
+	 * If the inode is already free, then there can be nothing
+	 * to clean up here.
+	 */
+	if (ip->i_d.di_mode == 0 || is_bad_inode(VFS_I(ip))) {
+		ASSERT(ip->i_df.if_real_bytes == 0);
+		ASSERT(ip->i_df.if_broot_bytes == 0);
+		return VN_INACTIVE_CACHE;
+	}
+
+	mp = ip->i_mount;
+
+	error = 0;
+
+	/* If this is a read-only mount, don't do this (would generate I/O) */
+	if (mp->m_flags & XFS_MOUNT_RDONLY)
+		goto out;
+
+	if (ip->i_d.di_nlink != 0) {
+		/*
+		 * force is true because we are evicting an inode from the
+		 * cache. Post-eof blocks must be freed, lest we end up with
+		 * broken free space accounting.
+		 */
+		if (xfs_can_free_eofblocks(ip, true)) {
+			error = xfs_free_eofblocks(mp, ip, false);
+			if (error)
+				return VN_INACTIVE_CACHE;
+		}
+		goto out;
+	}
+
+	if (S_ISREG(ip->i_d.di_mode) &&
+	    (ip->i_d.di_size != 0 || XFS_ISIZE(ip) != 0 ||
+	     ip->i_d.di_nextents > 0 || ip->i_delayed_blks > 0))
+		truncate = 1;
+
+	error = xfs_qm_dqattach(ip, 0);
+	if (error)
+		return VN_INACTIVE_CACHE;
+
+	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
+	resp = (truncate || S_ISLNK(ip->i_d.di_mode)) ?
+		&M_RES(mp)->tr_itruncate : &M_RES(mp)->tr_ifree;
+
+	error = xfs_trans_reserve(tp, resp, 0, 0);
+	if (error) {
+		ASSERT(XFS_FORCED_SHUTDOWN(mp));
+		xfs_trans_cancel(tp, 0);
+		return VN_INACTIVE_CACHE;
+	}
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	xfs_trans_ijoin(tp, ip, 0);
+
+	if (S_ISLNK(ip->i_d.di_mode)) {
+		error = xfs_inactive_symlink(ip, &tp);
+		if (error)
+			goto out_cancel;
+	} else if (truncate) {
+		ip->i_d.di_size = 0;
+		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+
+		error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
+		if (error)
+			goto out_cancel;
+
+		ASSERT(ip->i_d.di_nextents == 0);
+	}
+
+	/*
+	 * If there are attributes associated with the file then blow them away
+	 * now.  The code calls a routine that recursively deconstructs the
+	 * attribute fork.  We need to just commit the current transaction
+	 * because we can't use it for xfs_attr_inactive().
+	 */
+	if (ip->i_d.di_anextents > 0) {
+		ASSERT(ip->i_d.di_forkoff != 0);
+
+		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+		if (error)
+			goto out_unlock;
+
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+		error = xfs_attr_inactive(ip);
+		if (error)
+			goto out;
+
+		tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 0, 0);
+		if (error) {
+			xfs_trans_cancel(tp, 0);
+			goto out;
+		}
+
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+		xfs_trans_ijoin(tp, ip, 0);
+	}
+
+	if (ip->i_afp)
+		xfs_idestroy_fork(ip, XFS_ATTR_FORK);
+
+	ASSERT(ip->i_d.di_anextents == 0);
+
+	/*
+	 * Free the inode.
+	 */
+	xfs_bmap_init(&free_list, &first_block);
+	error = xfs_ifree(tp, ip, &free_list);
+	if (error) {
+		/*
+		 * If we fail to free the inode, shut down.  The cancel
+		 * might do that, we need to make sure.  Otherwise the
+		 * inode might be lost for a long time or forever.
+		 */
+		if (!XFS_FORCED_SHUTDOWN(mp)) {
+			xfs_notice(mp, "%s: xfs_ifree returned error %d",
+				__func__, error);
+			xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
+		}
+		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
+	} else {
+		/*
+		 * Credit the quota account(s). The inode is gone.
+		 */
+		xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
+
+		/*
+		 * Just ignore errors at this point.  There is nothing we can
+		 * do except to try to keep going. Make sure it's not a silent
+		 * error.
+		 */
+		error = xfs_bmap_finish(&tp,  &free_list, &committed);
+		if (error)
+			xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
+				__func__, error);
+		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+		if (error)
+			xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
+				__func__, error);
+	}
+
+	/*
+	 * Release the dquots held by inode, if any.
+	 */
+	xfs_qm_dqdetach(ip);
+out_unlock:
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+out:
+	return VN_INACTIVE_CACHE;
+out_cancel:
+	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+	goto out_unlock;
+}
+
 /*
  * This is called when the inode's link count goes to 0.
  * We place the on-disk inode on a list in the AGI.  It
@@ -1861,7 +2095,7 @@
 }
 
 /*
- * A big issue when freeing the inode cluster is is that we _cannot_ skip any
+ * A big issue when freeing the inode cluster is that we _cannot_ skip any
  * inodes that are in memory - they all must be marked stale and attached to
  * the cluster buffer.
  */
@@ -2094,272 +2328,6 @@
 }
 
 /*
- * Reallocate the space for if_broot based on the number of records
- * being added or deleted as indicated in rec_diff.  Move the records
- * and pointers in if_broot to fit the new size.  When shrinking this
- * will eliminate holes between the records and pointers created by
- * the caller.  When growing this will create holes to be filled in
- * by the caller.
- *
- * The caller must not request to add more records than would fit in
- * the on-disk inode root.  If the if_broot is currently NULL, then
- * if we adding records one will be allocated.  The caller must also
- * not request that the number of records go below zero, although
- * it can go to zero.
- *
- * ip -- the inode whose if_broot area is changing
- * ext_diff -- the change in the number of records, positive or negative,
- *	 requested for the if_broot array.
- */
-void
-xfs_iroot_realloc(
-	xfs_inode_t		*ip,
-	int			rec_diff,
-	int			whichfork)
-{
-	struct xfs_mount	*mp = ip->i_mount;
-	int			cur_max;
-	xfs_ifork_t		*ifp;
-	struct xfs_btree_block	*new_broot;
-	int			new_max;
-	size_t			new_size;
-	char			*np;
-	char			*op;
-
-	/*
-	 * Handle the degenerate case quietly.
-	 */
-	if (rec_diff == 0) {
-		return;
-	}
-
-	ifp = XFS_IFORK_PTR(ip, whichfork);
-	if (rec_diff > 0) {
-		/*
-		 * If there wasn't any memory allocated before, just
-		 * allocate it now and get out.
-		 */
-		if (ifp->if_broot_bytes == 0) {
-			new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
-			ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
-			ifp->if_broot_bytes = (int)new_size;
-			return;
-		}
-
-		/*
-		 * If there is already an existing if_broot, then we need
-		 * to realloc() it and shift the pointers to their new
-		 * location.  The records don't change location because
-		 * they are kept butted up against the btree block header.
-		 */
-		cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
-		new_max = cur_max + rec_diff;
-		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
-		ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
-				XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max),
-				KM_SLEEP | KM_NOFS);
-		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
-						     ifp->if_broot_bytes);
-		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
-						     (int)new_size);
-		ifp->if_broot_bytes = (int)new_size;
-		ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
-			XFS_IFORK_SIZE(ip, whichfork));
-		memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
-		return;
-	}
-
-	/*
-	 * rec_diff is less than 0.  In this case, we are shrinking the
-	 * if_broot buffer.  It must already exist.  If we go to zero
-	 * records, just get rid of the root and clear the status bit.
-	 */
-	ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
-	cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
-	new_max = cur_max + rec_diff;
-	ASSERT(new_max >= 0);
-	if (new_max > 0)
-		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
-	else
-		new_size = 0;
-	if (new_size > 0) {
-		new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
-		/*
-		 * First copy over the btree block header.
-		 */
-		memcpy(new_broot, ifp->if_broot,
-			XFS_BMBT_BLOCK_LEN(ip->i_mount));
-	} else {
-		new_broot = NULL;
-		ifp->if_flags &= ~XFS_IFBROOT;
-	}
-
-	/*
-	 * Only copy the records and pointers if there are any.
-	 */
-	if (new_max > 0) {
-		/*
-		 * First copy the records.
-		 */
-		op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
-		np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
-		memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
-
-		/*
-		 * Then copy the pointers.
-		 */
-		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
-						     ifp->if_broot_bytes);
-		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
-						     (int)new_size);
-		memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
-	}
-	kmem_free(ifp->if_broot);
-	ifp->if_broot = new_broot;
-	ifp->if_broot_bytes = (int)new_size;
-	if (ifp->if_broot)
-		ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
-			XFS_IFORK_SIZE(ip, whichfork));
-	return;
-}
-
-
-/*
- * This is called when the amount of space needed for if_data
- * is increased or decreased.  The change in size is indicated by
- * the number of bytes that need to be added or deleted in the
- * byte_diff parameter.
- *
- * If the amount of space needed has decreased below the size of the
- * inline buffer, then switch to using the inline buffer.  Otherwise,
- * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
- * to what is needed.
- *
- * ip -- the inode whose if_data area is changing
- * byte_diff -- the change in the number of bytes, positive or negative,
- *	 requested for the if_data array.
- */
-void
-xfs_idata_realloc(
-	xfs_inode_t	*ip,
-	int		byte_diff,
-	int		whichfork)
-{
-	xfs_ifork_t	*ifp;
-	int		new_size;
-	int		real_size;
-
-	if (byte_diff == 0) {
-		return;
-	}
-
-	ifp = XFS_IFORK_PTR(ip, whichfork);
-	new_size = (int)ifp->if_bytes + byte_diff;
-	ASSERT(new_size >= 0);
-
-	if (new_size == 0) {
-		if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
-			kmem_free(ifp->if_u1.if_data);
-		}
-		ifp->if_u1.if_data = NULL;
-		real_size = 0;
-	} else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
-		/*
-		 * If the valid extents/data can fit in if_inline_ext/data,
-		 * copy them from the malloc'd vector and free it.
-		 */
-		if (ifp->if_u1.if_data == NULL) {
-			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
-		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
-			ASSERT(ifp->if_real_bytes != 0);
-			memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
-			      new_size);
-			kmem_free(ifp->if_u1.if_data);
-			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
-		}
-		real_size = 0;
-	} else {
-		/*
-		 * Stuck with malloc/realloc.
-		 * For inline data, the underlying buffer must be
-		 * a multiple of 4 bytes in size so that it can be
-		 * logged and stay on word boundaries.  We enforce
-		 * that here.
-		 */
-		real_size = roundup(new_size, 4);
-		if (ifp->if_u1.if_data == NULL) {
-			ASSERT(ifp->if_real_bytes == 0);
-			ifp->if_u1.if_data = kmem_alloc(real_size,
-							KM_SLEEP | KM_NOFS);
-		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
-			/*
-			 * Only do the realloc if the underlying size
-			 * is really changing.
-			 */
-			if (ifp->if_real_bytes != real_size) {
-				ifp->if_u1.if_data =
-					kmem_realloc(ifp->if_u1.if_data,
-							real_size,
-							ifp->if_real_bytes,
-							KM_SLEEP | KM_NOFS);
-			}
-		} else {
-			ASSERT(ifp->if_real_bytes == 0);
-			ifp->if_u1.if_data = kmem_alloc(real_size,
-							KM_SLEEP | KM_NOFS);
-			memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
-				ifp->if_bytes);
-		}
-	}
-	ifp->if_real_bytes = real_size;
-	ifp->if_bytes = new_size;
-	ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
-}
-
-void
-xfs_idestroy_fork(
-	xfs_inode_t	*ip,
-	int		whichfork)
-{
-	xfs_ifork_t	*ifp;
-
-	ifp = XFS_IFORK_PTR(ip, whichfork);
-	if (ifp->if_broot != NULL) {
-		kmem_free(ifp->if_broot);
-		ifp->if_broot = NULL;
-	}
-
-	/*
-	 * If the format is local, then we can't have an extents
-	 * array so just look for an inline data array.  If we're
-	 * not local then we may or may not have an extents list,
-	 * so check and free it up if we do.
-	 */
-	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
-		if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
-		    (ifp->if_u1.if_data != NULL)) {
-			ASSERT(ifp->if_real_bytes != 0);
-			kmem_free(ifp->if_u1.if_data);
-			ifp->if_u1.if_data = NULL;
-			ifp->if_real_bytes = 0;
-		}
-	} else if ((ifp->if_flags & XFS_IFEXTENTS) &&
-		   ((ifp->if_flags & XFS_IFEXTIREC) ||
-		    ((ifp->if_u1.if_extents != NULL) &&
-		     (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
-		ASSERT(ifp->if_real_bytes != 0);
-		xfs_iext_destroy(ifp);
-	}
-	ASSERT(ifp->if_u1.if_extents == NULL ||
-	       ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
-	ASSERT(ifp->if_real_bytes == 0);
-	if (whichfork == XFS_ATTR_FORK) {
-		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
-		ip->i_afp = NULL;
-	}
-}
-
-/*
  * This is called to unpin an inode.  The caller must have the inode locked
  * in at least shared mode so that the buffer cannot be subsequently pinned
  * once someone is waiting for it to be unpinned.
@@ -2402,164 +2370,473 @@
 		__xfs_iunpin_wait(ip);
 }
 
-/*
- * xfs_iextents_copy()
- *
- * This is called to copy the REAL extents (as opposed to the delayed
- * allocation extents) from the inode into the given buffer.  It
- * returns the number of bytes copied into the buffer.
- *
- * If there are no delayed allocation extents, then we can just
- * memcpy() the extents into the buffer.  Otherwise, we need to
- * examine each extent in turn and skip those which are delayed.
- */
 int
-xfs_iextents_copy(
-	xfs_inode_t		*ip,
-	xfs_bmbt_rec_t		*dp,
-	int			whichfork)
+xfs_remove(
+	xfs_inode_t             *dp,
+	struct xfs_name		*name,
+	xfs_inode_t		*ip)
 {
-	int			copied;
-	int			i;
-	xfs_ifork_t		*ifp;
-	int			nrecs;
-	xfs_fsblock_t		start_block;
+	xfs_mount_t		*mp = dp->i_mount;
+	xfs_trans_t             *tp = NULL;
+	int			is_dir = S_ISDIR(ip->i_d.di_mode);
+	int                     error = 0;
+	xfs_bmap_free_t         free_list;
+	xfs_fsblock_t           first_block;
+	int			cancel_flags;
+	int			committed;
+	int			link_zero;
+	uint			resblks;
+	uint			log_count;
 
-	ifp = XFS_IFORK_PTR(ip, whichfork);
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
-	ASSERT(ifp->if_bytes > 0);
+	trace_xfs_remove(dp, name);
 
-	nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
-	ASSERT(nrecs > 0);
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return XFS_ERROR(EIO);
+
+	error = xfs_qm_dqattach(dp, 0);
+	if (error)
+		goto std_return;
+
+	error = xfs_qm_dqattach(ip, 0);
+	if (error)
+		goto std_return;
+
+	if (is_dir) {
+		tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
+		log_count = XFS_DEFAULT_LOG_COUNT;
+	} else {
+		tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
+		log_count = XFS_REMOVE_LOG_COUNT;
+	}
+	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
 
 	/*
-	 * There are some delayed allocation extents in the
-	 * inode, so copy the extents one at a time and skip
-	 * the delayed ones.  There must be at least one
-	 * non-delayed extent.
+	 * We try to get the real space reservation first,
+	 * allowing for directory btree deletion(s) implying
+	 * possible bmap insert(s).  If we can't get the space
+	 * reservation then we use 0 instead, and avoid the bmap
+	 * btree insert(s) in the directory code by, if the bmap
+	 * insert tries to happen, instead trimming the LAST
+	 * block from the directory.
 	 */
-	copied = 0;
-	for (i = 0; i < nrecs; i++) {
-		xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
-		start_block = xfs_bmbt_get_startblock(ep);
-		if (isnullstartblock(start_block)) {
-			/*
-			 * It's a delayed allocation extent, so skip it.
-			 */
-			continue;
-		}
-
-		/* Translate to on disk format */
-		put_unaligned(cpu_to_be64(ep->l0), &dp->l0);
-		put_unaligned(cpu_to_be64(ep->l1), &dp->l1);
-		dp++;
-		copied++;
+	resblks = XFS_REMOVE_SPACE_RES(mp);
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, resblks, 0);
+	if (error == ENOSPC) {
+		resblks = 0;
+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, 0, 0);
 	}
-	ASSERT(copied != 0);
-	xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip));
+	if (error) {
+		ASSERT(error != ENOSPC);
+		cancel_flags = 0;
+		goto out_trans_cancel;
+	}
 
-	return (copied * (uint)sizeof(xfs_bmbt_rec_t));
+	xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
+
+	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
+	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+
+	/*
+	 * If we're removing a directory perform some additional validation.
+	 */
+	if (is_dir) {
+		ASSERT(ip->i_d.di_nlink >= 2);
+		if (ip->i_d.di_nlink != 2) {
+			error = XFS_ERROR(ENOTEMPTY);
+			goto out_trans_cancel;
+		}
+		if (!xfs_dir_isempty(ip)) {
+			error = XFS_ERROR(ENOTEMPTY);
+			goto out_trans_cancel;
+		}
+	}
+
+	xfs_bmap_init(&free_list, &first_block);
+	error = xfs_dir_removename(tp, dp, name, ip->i_ino,
+					&first_block, &free_list, resblks);
+	if (error) {
+		ASSERT(error != ENOENT);
+		goto out_bmap_cancel;
+	}
+	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+
+	if (is_dir) {
+		/*
+		 * Drop the link from ip's "..".
+		 */
+		error = xfs_droplink(tp, dp);
+		if (error)
+			goto out_bmap_cancel;
+
+		/*
+		 * Drop the "." link from ip to self.
+		 */
+		error = xfs_droplink(tp, ip);
+		if (error)
+			goto out_bmap_cancel;
+	} else {
+		/*
+		 * When removing a non-directory we need to log the parent
+		 * inode here.  For a directory this is done implicitly
+		 * by the xfs_droplink call for the ".." entry.
+		 */
+		xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+	}
+
+	/*
+	 * Drop the link from dp to ip.
+	 */
+	error = xfs_droplink(tp, ip);
+	if (error)
+		goto out_bmap_cancel;
+
+	/*
+	 * Determine if this is the last link while
+	 * we are in the transaction.
+	 */
+	link_zero = (ip->i_d.di_nlink == 0);
+
+	/*
+	 * If this is a synchronous mount, make sure that the
+	 * remove transaction goes to disk before returning to
+	 * the user.
+	 */
+	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
+		xfs_trans_set_sync(tp);
+
+	error = xfs_bmap_finish(&tp, &free_list, &committed);
+	if (error)
+		goto out_bmap_cancel;
+
+	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+	if (error)
+		goto std_return;
+
+	/*
+	 * If we are using filestreams, kill the stream association.
+	 * If the file is still open it may get a new one but that
+	 * will get killed on last close in xfs_close() so we don't
+	 * have to worry about that.
+	 */
+	if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
+		xfs_filestream_deassociate(ip);
+
+	return 0;
+
+ out_bmap_cancel:
+	xfs_bmap_cancel(&free_list);
+	cancel_flags |= XFS_TRANS_ABORT;
+ out_trans_cancel:
+	xfs_trans_cancel(tp, cancel_flags);
+ std_return:
+	return error;
 }
 
 /*
- * Each of the following cases stores data into the same region
- * of the on-disk inode, so only one of them can be valid at
- * any given time. While it is possible to have conflicting formats
- * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
- * in EXTENTS format, this can only happen when the fork has
- * changed formats after being modified but before being flushed.
- * In these cases, the format always takes precedence, because the
- * format indicates the current state of the fork.
+ * Enter all inodes for a rename transaction into a sorted array.
  */
-/*ARGSUSED*/
 STATIC void
-xfs_iflush_fork(
-	xfs_inode_t		*ip,
-	xfs_dinode_t		*dip,
-	xfs_inode_log_item_t	*iip,
-	int			whichfork,
-	xfs_buf_t		*bp)
+xfs_sort_for_rename(
+	xfs_inode_t	*dp1,	/* in: old (source) directory inode */
+	xfs_inode_t	*dp2,	/* in: new (target) directory inode */
+	xfs_inode_t	*ip1,	/* in: inode of old entry */
+	xfs_inode_t	*ip2,	/* in: inode of new entry, if it
+				   already exists, NULL otherwise. */
+	xfs_inode_t	**i_tab,/* out: array of inode returned, sorted */
+	int		*num_inodes)  /* out: number of inodes in array */
 {
-	char			*cp;
-	xfs_ifork_t		*ifp;
-	xfs_mount_t		*mp;
-	static const short	brootflag[2] =
-		{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
-	static const short	dataflag[2] =
-		{ XFS_ILOG_DDATA, XFS_ILOG_ADATA };
-	static const short	extflag[2] =
-		{ XFS_ILOG_DEXT, XFS_ILOG_AEXT };
+	xfs_inode_t		*temp;
+	int			i, j;
 
-	if (!iip)
-		return;
-	ifp = XFS_IFORK_PTR(ip, whichfork);
 	/*
-	 * This can happen if we gave up in iformat in an error path,
-	 * for the attribute fork.
+	 * i_tab contains a list of pointers to inodes.  We initialize
+	 * the table here & we'll sort it.  We will then use it to
+	 * order the acquisition of the inode locks.
+	 *
+	 * Note that the table may contain duplicates.  e.g., dp1 == dp2.
 	 */
-	if (!ifp) {
-		ASSERT(whichfork == XFS_ATTR_FORK);
-		return;
+	i_tab[0] = dp1;
+	i_tab[1] = dp2;
+	i_tab[2] = ip1;
+	if (ip2) {
+		*num_inodes = 4;
+		i_tab[3] = ip2;
+	} else {
+		*num_inodes = 3;
+		i_tab[3] = NULL;
 	}
-	cp = XFS_DFORK_PTR(dip, whichfork);
-	mp = ip->i_mount;
-	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
-	case XFS_DINODE_FMT_LOCAL:
-		if ((iip->ili_fields & dataflag[whichfork]) &&
-		    (ifp->if_bytes > 0)) {
-			ASSERT(ifp->if_u1.if_data != NULL);
-			ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
-			memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
-		}
-		break;
 
-	case XFS_DINODE_FMT_EXTENTS:
-		ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
-		       !(iip->ili_fields & extflag[whichfork]));
-		if ((iip->ili_fields & extflag[whichfork]) &&
-		    (ifp->if_bytes > 0)) {
-			ASSERT(xfs_iext_get_ext(ifp, 0));
-			ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
-			(void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
-				whichfork);
+	/*
+	 * Sort the elements via bubble sort.  (Remember, there are at
+	 * most 4 elements to sort, so this is adequate.)
+	 */
+	for (i = 0; i < *num_inodes; i++) {
+		for (j = 1; j < *num_inodes; j++) {
+			if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
+				temp = i_tab[j];
+				i_tab[j] = i_tab[j-1];
+				i_tab[j-1] = temp;
+			}
 		}
-		break;
-
-	case XFS_DINODE_FMT_BTREE:
-		if ((iip->ili_fields & brootflag[whichfork]) &&
-		    (ifp->if_broot_bytes > 0)) {
-			ASSERT(ifp->if_broot != NULL);
-			ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
-			        XFS_IFORK_SIZE(ip, whichfork));
-			xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
-				(xfs_bmdr_block_t *)cp,
-				XFS_DFORK_SIZE(dip, mp, whichfork));
-		}
-		break;
-
-	case XFS_DINODE_FMT_DEV:
-		if (iip->ili_fields & XFS_ILOG_DEV) {
-			ASSERT(whichfork == XFS_DATA_FORK);
-			xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
-		}
-		break;
-
-	case XFS_DINODE_FMT_UUID:
-		if (iip->ili_fields & XFS_ILOG_UUID) {
-			ASSERT(whichfork == XFS_DATA_FORK);
-			memcpy(XFS_DFORK_DPTR(dip),
-			       &ip->i_df.if_u2.if_uuid,
-			       sizeof(uuid_t));
-		}
-		break;
-
-	default:
-		ASSERT(0);
-		break;
 	}
 }
 
+/*
+ * xfs_rename
+ */
+int
+xfs_rename(
+	xfs_inode_t	*src_dp,
+	struct xfs_name	*src_name,
+	xfs_inode_t	*src_ip,
+	xfs_inode_t	*target_dp,
+	struct xfs_name	*target_name,
+	xfs_inode_t	*target_ip)
+{
+	xfs_trans_t	*tp = NULL;
+	xfs_mount_t	*mp = src_dp->i_mount;
+	int		new_parent;		/* moving to a new dir */
+	int		src_is_directory;	/* src_name is a directory */
+	int		error;
+	xfs_bmap_free_t free_list;
+	xfs_fsblock_t   first_block;
+	int		cancel_flags;
+	int		committed;
+	xfs_inode_t	*inodes[4];
+	int		spaceres;
+	int		num_inodes;
+
+	trace_xfs_rename(src_dp, target_dp, src_name, target_name);
+
+	new_parent = (src_dp != target_dp);
+	src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
+
+	xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip,
+				inodes, &num_inodes);
+
+	xfs_bmap_init(&free_list, &first_block);
+	tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
+	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
+	spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0);
+	if (error == ENOSPC) {
+		spaceres = 0;
+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0);
+	}
+	if (error) {
+		xfs_trans_cancel(tp, 0);
+		goto std_return;
+	}
+
+	/*
+	 * Attach the dquots to the inodes
+	 */
+	error = xfs_qm_vop_rename_dqattach(inodes);
+	if (error) {
+		xfs_trans_cancel(tp, cancel_flags);
+		goto std_return;
+	}
+
+	/*
+	 * Lock all the participating inodes. Depending upon whether
+	 * the target_name exists in the target directory, and
+	 * whether the target directory is the same as the source
+	 * directory, we can lock from 2 to 4 inodes.
+	 */
+	xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
+
+	/*
+	 * Join all the inodes to the transaction. From this point on,
+	 * we can rely on either trans_commit or trans_cancel to unlock
+	 * them.
+	 */
+	xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
+	if (new_parent)
+		xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
+	xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
+	if (target_ip)
+		xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
+
+	/*
+	 * If we are using project inheritance, we only allow renames
+	 * into our tree when the project IDs are the same; else the
+	 * tree quota mechanism would be circumvented.
+	 */
+	if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
+		     (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) {
+		error = XFS_ERROR(EXDEV);
+		goto error_return;
+	}
+
+	/*
+	 * Set up the target.
+	 */
+	if (target_ip == NULL) {
+		/*
+		 * If there's no space reservation, check the entry will
+		 * fit before actually inserting it.
+		 */
+		error = xfs_dir_canenter(tp, target_dp, target_name, spaceres);
+		if (error)
+			goto error_return;
+		/*
+		 * If target does not exist and the rename crosses
+		 * directories, adjust the target directory link count
+		 * to account for the ".." reference from the new entry.
+		 */
+		error = xfs_dir_createname(tp, target_dp, target_name,
+						src_ip->i_ino, &first_block,
+						&free_list, spaceres);
+		if (error == ENOSPC)
+			goto error_return;
+		if (error)
+			goto abort_return;
+
+		xfs_trans_ichgtime(tp, target_dp,
+					XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+
+		if (new_parent && src_is_directory) {
+			error = xfs_bumplink(tp, target_dp);
+			if (error)
+				goto abort_return;
+		}
+	} else { /* target_ip != NULL */
+		/*
+		 * If target exists and it's a directory, check that both
+		 * target and source are directories and that target can be
+		 * destroyed, or that neither is a directory.
+		 */
+		if (S_ISDIR(target_ip->i_d.di_mode)) {
+			/*
+			 * Make sure target dir is empty.
+			 */
+			if (!(xfs_dir_isempty(target_ip)) ||
+			    (target_ip->i_d.di_nlink > 2)) {
+				error = XFS_ERROR(EEXIST);
+				goto error_return;
+			}
+		}
+
+		/*
+		 * Link the source inode under the target name.
+		 * If the source inode is a directory and we are moving
+		 * it across directories, its ".." entry will be
+		 * inconsistent until we replace that down below.
+		 *
+		 * In case there is already an entry with the same
+		 * name at the destination directory, remove it first.
+		 */
+		error = xfs_dir_replace(tp, target_dp, target_name,
+					src_ip->i_ino,
+					&first_block, &free_list, spaceres);
+		if (error)
+			goto abort_return;
+
+		xfs_trans_ichgtime(tp, target_dp,
+					XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+
+		/*
+		 * Decrement the link count on the target since the target
+		 * dir no longer points to it.
+		 */
+		error = xfs_droplink(tp, target_ip);
+		if (error)
+			goto abort_return;
+
+		if (src_is_directory) {
+			/*
+			 * Drop the link from the old "." entry.
+			 */
+			error = xfs_droplink(tp, target_ip);
+			if (error)
+				goto abort_return;
+		}
+	} /* target_ip != NULL */
+
+	/*
+	 * Remove the source.
+	 */
+	if (new_parent && src_is_directory) {
+		/*
+		 * Rewrite the ".." entry to point to the new
+		 * directory.
+		 */
+		error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
+					target_dp->i_ino,
+					&first_block, &free_list, spaceres);
+		ASSERT(error != EEXIST);
+		if (error)
+			goto abort_return;
+	}
+
+	/*
+	 * We always want to hit the ctime on the source inode.
+	 *
+	 * This isn't strictly required by the standards since the source
+	 * inode isn't really being changed, but old unix file systems did
+	 * it and some incremental backup programs won't work without it.
+	 */
+	xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
+	xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE);
+
+	/*
+	 * Adjust the link count on src_dp.  This is necessary when
+	 * renaming a directory, either within one parent when
+	 * the target existed, or across two parent directories.
+	 */
+	if (src_is_directory && (new_parent || target_ip != NULL)) {
+
+		/*
+		 * Decrement link count on src_directory since the
+		 * entry that's moved no longer points to it.
+		 */
+		error = xfs_droplink(tp, src_dp);
+		if (error)
+			goto abort_return;
+	}
+
+	error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
+					&first_block, &free_list, spaceres);
+	if (error)
+		goto abort_return;
+
+	xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+	xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
+	if (new_parent)
+		xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
+
+	/*
+	 * If this is a synchronous mount, make sure that the
+	 * rename transaction goes to disk before returning to
+	 * the user.
+	 */
+	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
+		xfs_trans_set_sync(tp);
+	}
+
+	error = xfs_bmap_finish(&tp, &free_list, &committed);
+	if (error) {
+		xfs_bmap_cancel(&free_list);
+		xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
+				 XFS_TRANS_ABORT));
+		goto std_return;
+	}
+
+	/*
+	 * trans_commit will unlock src_ip, target_ip & decrement
+	 * the vnode references.
+	 */
+	return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+
+ abort_return:
+	cancel_flags |= XFS_TRANS_ABORT;
+ error_return:
+	xfs_bmap_cancel(&free_list);
+	xfs_trans_cancel(tp, cancel_flags);
+ std_return:
+	return error;
+}
+
 STATIC int
 xfs_iflush_cluster(
 	xfs_inode_t	*ip,
@@ -2816,7 +3093,6 @@
 	return error;
 }
 
-
 STATIC int
 xfs_iflush_int(
 	struct xfs_inode	*ip,
@@ -3004,1072 +3280,3 @@
 corrupt_out:
 	return XFS_ERROR(EFSCORRUPTED);
 }
-
-/*
- * Return a pointer to the extent record at file index idx.
- */
-xfs_bmbt_rec_host_t *
-xfs_iext_get_ext(
-	xfs_ifork_t	*ifp,		/* inode fork pointer */
-	xfs_extnum_t	idx)		/* index of target extent */
-{
-	ASSERT(idx >= 0);
-	ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
-
-	if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
-		return ifp->if_u1.if_ext_irec->er_extbuf;
-	} else if (ifp->if_flags & XFS_IFEXTIREC) {
-		xfs_ext_irec_t	*erp;		/* irec pointer */
-		int		erp_idx = 0;	/* irec index */
-		xfs_extnum_t	page_idx = idx;	/* ext index in target list */
-
-		erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
-		return &erp->er_extbuf[page_idx];
-	} else if (ifp->if_bytes) {
-		return &ifp->if_u1.if_extents[idx];
-	} else {
-		return NULL;
-	}
-}
-
-/*
- * Insert new item(s) into the extent records for incore inode
- * fork 'ifp'.  'count' new items are inserted at index 'idx'.
- */
-void
-xfs_iext_insert(
-	xfs_inode_t	*ip,		/* incore inode pointer */
-	xfs_extnum_t	idx,		/* starting index of new items */
-	xfs_extnum_t	count,		/* number of inserted items */
-	xfs_bmbt_irec_t	*new,		/* items to insert */
-	int		state)		/* type of extent conversion */
-{
-	xfs_ifork_t	*ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
-	xfs_extnum_t	i;		/* extent record index */
-
-	trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
-
-	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
-	xfs_iext_add(ifp, idx, count);
-	for (i = idx; i < idx + count; i++, new++)
-		xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new);
-}
-
-/*
- * This is called when the amount of space required for incore file
- * extents needs to be increased. The ext_diff parameter stores the
- * number of new extents being added and the idx parameter contains
- * the extent index where the new extents will be added. If the new
- * extents are being appended, then we just need to (re)allocate and
- * initialize the space. Otherwise, if the new extents are being
- * inserted into the middle of the existing entries, a bit more work
- * is required to make room for the new extents to be inserted. The
- * caller is responsible for filling in the new extent entries upon
- * return.
- */
-void
-xfs_iext_add(
-	xfs_ifork_t	*ifp,		/* inode fork pointer */
-	xfs_extnum_t	idx,		/* index to begin adding exts */
-	int		ext_diff)	/* number of extents to add */
-{
-	int		byte_diff;	/* new bytes being added */
-	int		new_size;	/* size of extents after adding */
-	xfs_extnum_t	nextents;	/* number of extents in file */
-
-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	ASSERT((idx >= 0) && (idx <= nextents));
-	byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
-	new_size = ifp->if_bytes + byte_diff;
-	/*
-	 * If the new number of extents (nextents + ext_diff)
-	 * fits inside the inode, then continue to use the inline
-	 * extent buffer.
-	 */
-	if (nextents + ext_diff <= XFS_INLINE_EXTS) {
-		if (idx < nextents) {
-			memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
-				&ifp->if_u2.if_inline_ext[idx],
-				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
-			memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
-		}
-		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
-		ifp->if_real_bytes = 0;
-	}
-	/*
-	 * Otherwise use a linear (direct) extent list.
-	 * If the extents are currently inside the inode,
-	 * xfs_iext_realloc_direct will switch us from
-	 * inline to direct extent allocation mode.
-	 */
-	else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
-		xfs_iext_realloc_direct(ifp, new_size);
-		if (idx < nextents) {
-			memmove(&ifp->if_u1.if_extents[idx + ext_diff],
-				&ifp->if_u1.if_extents[idx],
-				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
-			memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
-		}
-	}
-	/* Indirection array */
-	else {
-		xfs_ext_irec_t	*erp;
-		int		erp_idx = 0;
-		int		page_idx = idx;
-
-		ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
-		if (ifp->if_flags & XFS_IFEXTIREC) {
-			erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
-		} else {
-			xfs_iext_irec_init(ifp);
-			ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-			erp = ifp->if_u1.if_ext_irec;
-		}
-		/* Extents fit in target extent page */
-		if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
-			if (page_idx < erp->er_extcount) {
-				memmove(&erp->er_extbuf[page_idx + ext_diff],
-					&erp->er_extbuf[page_idx],
-					(erp->er_extcount - page_idx) *
-					sizeof(xfs_bmbt_rec_t));
-				memset(&erp->er_extbuf[page_idx], 0, byte_diff);
-			}
-			erp->er_extcount += ext_diff;
-			xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
-		}
-		/* Insert a new extent page */
-		else if (erp) {
-			xfs_iext_add_indirect_multi(ifp,
-				erp_idx, page_idx, ext_diff);
-		}
-		/*
-		 * If extent(s) are being appended to the last page in
-		 * the indirection array and the new extent(s) don't fit
-		 * in the page, then erp is NULL and erp_idx is set to
-		 * the next index needed in the indirection array.
-		 */
-		else {
-			int	count = ext_diff;
-
-			while (count) {
-				erp = xfs_iext_irec_new(ifp, erp_idx);
-				erp->er_extcount = count;
-				count -= MIN(count, (int)XFS_LINEAR_EXTS);
-				if (count) {
-					erp_idx++;
-				}
-			}
-		}
-	}
-	ifp->if_bytes = new_size;
-}
-
-/*
- * This is called when incore extents are being added to the indirection
- * array and the new extents do not fit in the target extent list. The
- * erp_idx parameter contains the irec index for the target extent list
- * in the indirection array, and the idx parameter contains the extent
- * index within the list. The number of extents being added is stored
- * in the count parameter.
- *
- *    |-------|   |-------|
- *    |       |   |       |    idx - number of extents before idx
- *    |  idx  |   | count |
- *    |       |   |       |    count - number of extents being inserted at idx
- *    |-------|   |-------|
- *    | count |   | nex2  |    nex2 - number of extents after idx + count
- *    |-------|   |-------|
- */
-void
-xfs_iext_add_indirect_multi(
-	xfs_ifork_t	*ifp,			/* inode fork pointer */
-	int		erp_idx,		/* target extent irec index */
-	xfs_extnum_t	idx,			/* index within target list */
-	int		count)			/* new extents being added */
-{
-	int		byte_diff;		/* new bytes being added */
-	xfs_ext_irec_t	*erp;			/* pointer to irec entry */
-	xfs_extnum_t	ext_diff;		/* number of extents to add */
-	xfs_extnum_t	ext_cnt;		/* new extents still needed */
-	xfs_extnum_t	nex2;			/* extents after idx + count */
-	xfs_bmbt_rec_t	*nex2_ep = NULL;	/* temp list for nex2 extents */
-	int		nlists;			/* number of irec's (lists) */
-
-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-	erp = &ifp->if_u1.if_ext_irec[erp_idx];
-	nex2 = erp->er_extcount - idx;
-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-
-	/*
-	 * Save second part of target extent list
-	 * (all extents past */
-	if (nex2) {
-		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
-		nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
-		memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
-		erp->er_extcount -= nex2;
-		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
-		memset(&erp->er_extbuf[idx], 0, byte_diff);
-	}
-
-	/*
-	 * Add the new extents to the end of the target
-	 * list, then allocate new irec record(s) and
-	 * extent buffer(s) as needed to store the rest
-	 * of the new extents.
-	 */
-	ext_cnt = count;
-	ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
-	if (ext_diff) {
-		erp->er_extcount += ext_diff;
-		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
-		ext_cnt -= ext_diff;
-	}
-	while (ext_cnt) {
-		erp_idx++;
-		erp = xfs_iext_irec_new(ifp, erp_idx);
-		ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
-		erp->er_extcount = ext_diff;
-		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
-		ext_cnt -= ext_diff;
-	}
-
-	/* Add nex2 extents back to indirection array */
-	if (nex2) {
-		xfs_extnum_t	ext_avail;
-		int		i;
-
-		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
-		ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
-		i = 0;
-		/*
-		 * If nex2 extents fit in the current page, append
-		 * nex2_ep after the new extents.
-		 */
-		if (nex2 <= ext_avail) {
-			i = erp->er_extcount;
-		}
-		/*
-		 * Otherwise, check if space is available in the
-		 * next page.
-		 */
-		else if ((erp_idx < nlists - 1) &&
-			 (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
-			  ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
-			erp_idx++;
-			erp++;
-			/* Create a hole for nex2 extents */
-			memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
-				erp->er_extcount * sizeof(xfs_bmbt_rec_t));
-		}
-		/*
-		 * Final choice, create a new extent page for
-		 * nex2 extents.
-		 */
-		else {
-			erp_idx++;
-			erp = xfs_iext_irec_new(ifp, erp_idx);
-		}
-		memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
-		kmem_free(nex2_ep);
-		erp->er_extcount += nex2;
-		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
-	}
-}
-
-/*
- * This is called when the amount of space required for incore file
- * extents needs to be decreased. The ext_diff parameter stores the
- * number of extents to be removed and the idx parameter contains
- * the extent index where the extents will be removed from.
- *
- * If the amount of space needed has decreased below the linear
- * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
- * extent array.  Otherwise, use kmem_realloc() to adjust the
- * size to what is needed.
- */
-void
-xfs_iext_remove(
-	xfs_inode_t	*ip,		/* incore inode pointer */
-	xfs_extnum_t	idx,		/* index to begin removing exts */
-	int		ext_diff,	/* number of extents to remove */
-	int		state)		/* type of extent conversion */
-{
-	xfs_ifork_t	*ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
-	xfs_extnum_t	nextents;	/* number of extents in file */
-	int		new_size;	/* size of extents after removal */
-
-	trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
-
-	ASSERT(ext_diff > 0);
-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
-
-	if (new_size == 0) {
-		xfs_iext_destroy(ifp);
-	} else if (ifp->if_flags & XFS_IFEXTIREC) {
-		xfs_iext_remove_indirect(ifp, idx, ext_diff);
-	} else if (ifp->if_real_bytes) {
-		xfs_iext_remove_direct(ifp, idx, ext_diff);
-	} else {
-		xfs_iext_remove_inline(ifp, idx, ext_diff);
-	}
-	ifp->if_bytes = new_size;
-}
-
-/*
- * This removes ext_diff extents from the inline buffer, beginning
- * at extent index idx.
- */
-void
-xfs_iext_remove_inline(
-	xfs_ifork_t	*ifp,		/* inode fork pointer */
-	xfs_extnum_t	idx,		/* index to begin removing exts */
-	int		ext_diff)	/* number of extents to remove */
-{
-	int		nextents;	/* number of extents in file */
-
-	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
-	ASSERT(idx < XFS_INLINE_EXTS);
-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	ASSERT(((nextents - ext_diff) > 0) &&
-		(nextents - ext_diff) < XFS_INLINE_EXTS);
-
-	if (idx + ext_diff < nextents) {
-		memmove(&ifp->if_u2.if_inline_ext[idx],
-			&ifp->if_u2.if_inline_ext[idx + ext_diff],
-			(nextents - (idx + ext_diff)) *
-			 sizeof(xfs_bmbt_rec_t));
-		memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
-			0, ext_diff * sizeof(xfs_bmbt_rec_t));
-	} else {
-		memset(&ifp->if_u2.if_inline_ext[idx], 0,
-			ext_diff * sizeof(xfs_bmbt_rec_t));
-	}
-}
-
-/*
- * This removes ext_diff extents from a linear (direct) extent list,
- * beginning at extent index idx. If the extents are being removed
- * from the end of the list (ie. truncate) then we just need to re-
- * allocate the list to remove the extra space. Otherwise, if the
- * extents are being removed from the middle of the existing extent
- * entries, then we first need to move the extent records beginning
- * at idx + ext_diff up in the list to overwrite the records being
- * removed, then remove the extra space via kmem_realloc.
- */
-void
-xfs_iext_remove_direct(
-	xfs_ifork_t	*ifp,		/* inode fork pointer */
-	xfs_extnum_t	idx,		/* index to begin removing exts */
-	int		ext_diff)	/* number of extents to remove */
-{
-	xfs_extnum_t	nextents;	/* number of extents in file */
-	int		new_size;	/* size of extents after removal */
-
-	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
-	new_size = ifp->if_bytes -
-		(ext_diff * sizeof(xfs_bmbt_rec_t));
-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-
-	if (new_size == 0) {
-		xfs_iext_destroy(ifp);
-		return;
-	}
-	/* Move extents up in the list (if needed) */
-	if (idx + ext_diff < nextents) {
-		memmove(&ifp->if_u1.if_extents[idx],
-			&ifp->if_u1.if_extents[idx + ext_diff],
-			(nextents - (idx + ext_diff)) *
-			 sizeof(xfs_bmbt_rec_t));
-	}
-	memset(&ifp->if_u1.if_extents[nextents - ext_diff],
-		0, ext_diff * sizeof(xfs_bmbt_rec_t));
-	/*
-	 * Reallocate the direct extent list. If the extents
-	 * will fit inside the inode then xfs_iext_realloc_direct
-	 * will switch from direct to inline extent allocation
-	 * mode for us.
-	 */
-	xfs_iext_realloc_direct(ifp, new_size);
-	ifp->if_bytes = new_size;
-}
-
-/*
- * This is called when incore extents are being removed from the
- * indirection array and the extents being removed span multiple extent
- * buffers. The idx parameter contains the file extent index where we
- * want to begin removing extents, and the count parameter contains
- * how many extents need to be removed.
- *
- *    |-------|   |-------|
- *    | nex1  |   |       |    nex1 - number of extents before idx
- *    |-------|   | count |
- *    |       |   |       |    count - number of extents being removed at idx
- *    | count |   |-------|
- *    |       |   | nex2  |    nex2 - number of extents after idx + count
- *    |-------|   |-------|
- */
-void
-xfs_iext_remove_indirect(
-	xfs_ifork_t	*ifp,		/* inode fork pointer */
-	xfs_extnum_t	idx,		/* index to begin removing extents */
-	int		count)		/* number of extents to remove */
-{
-	xfs_ext_irec_t	*erp;		/* indirection array pointer */
-	int		erp_idx = 0;	/* indirection array index */
-	xfs_extnum_t	ext_cnt;	/* extents left to remove */
-	xfs_extnum_t	ext_diff;	/* extents to remove in current list */
-	xfs_extnum_t	nex1;		/* number of extents before idx */
-	xfs_extnum_t	nex2;		/* extents after idx + count */
-	int		page_idx = idx;	/* index in target extent list */
-
-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-	erp = xfs_iext_idx_to_irec(ifp,  &page_idx, &erp_idx, 0);
-	ASSERT(erp != NULL);
-	nex1 = page_idx;
-	ext_cnt = count;
-	while (ext_cnt) {
-		nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
-		ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
-		/*
-		 * Check for deletion of entire list;
-		 * xfs_iext_irec_remove() updates extent offsets.
-		 */
-		if (ext_diff == erp->er_extcount) {
-			xfs_iext_irec_remove(ifp, erp_idx);
-			ext_cnt -= ext_diff;
-			nex1 = 0;
-			if (ext_cnt) {
-				ASSERT(erp_idx < ifp->if_real_bytes /
-					XFS_IEXT_BUFSZ);
-				erp = &ifp->if_u1.if_ext_irec[erp_idx];
-				nex1 = 0;
-				continue;
-			} else {
-				break;
-			}
-		}
-		/* Move extents up (if needed) */
-		if (nex2) {
-			memmove(&erp->er_extbuf[nex1],
-				&erp->er_extbuf[nex1 + ext_diff],
-				nex2 * sizeof(xfs_bmbt_rec_t));
-		}
-		/* Zero out rest of page */
-		memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
-			((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
-		/* Update remaining counters */
-		erp->er_extcount -= ext_diff;
-		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
-		ext_cnt -= ext_diff;
-		nex1 = 0;
-		erp_idx++;
-		erp++;
-	}
-	ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
-	xfs_iext_irec_compact(ifp);
-}
-
-/*
- * Create, destroy, or resize a linear (direct) block of extents.
- */
-void
-xfs_iext_realloc_direct(
-	xfs_ifork_t	*ifp,		/* inode fork pointer */
-	int		new_size)	/* new size of extents */
-{
-	int		rnew_size;	/* real new size of extents */
-
-	rnew_size = new_size;
-
-	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
-		((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
-		 (new_size != ifp->if_real_bytes)));
-
-	/* Free extent records */
-	if (new_size == 0) {
-		xfs_iext_destroy(ifp);
-	}
-	/* Resize direct extent list and zero any new bytes */
-	else if (ifp->if_real_bytes) {
-		/* Check if extents will fit inside the inode */
-		if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
-			xfs_iext_direct_to_inline(ifp, new_size /
-				(uint)sizeof(xfs_bmbt_rec_t));
-			ifp->if_bytes = new_size;
-			return;
-		}
-		if (!is_power_of_2(new_size)){
-			rnew_size = roundup_pow_of_two(new_size);
-		}
-		if (rnew_size != ifp->if_real_bytes) {
-			ifp->if_u1.if_extents =
-				kmem_realloc(ifp->if_u1.if_extents,
-						rnew_size,
-						ifp->if_real_bytes, KM_NOFS);
-		}
-		if (rnew_size > ifp->if_real_bytes) {
-			memset(&ifp->if_u1.if_extents[ifp->if_bytes /
-				(uint)sizeof(xfs_bmbt_rec_t)], 0,
-				rnew_size - ifp->if_real_bytes);
-		}
-	}
-	/*
-	 * Switch from the inline extent buffer to a direct
-	 * extent list. Be sure to include the inline extent
-	 * bytes in new_size.
-	 */
-	else {
-		new_size += ifp->if_bytes;
-		if (!is_power_of_2(new_size)) {
-			rnew_size = roundup_pow_of_two(new_size);
-		}
-		xfs_iext_inline_to_direct(ifp, rnew_size);
-	}
-	ifp->if_real_bytes = rnew_size;
-	ifp->if_bytes = new_size;
-}
-
-/*
- * Switch from linear (direct) extent records to inline buffer.
- */
-void
-xfs_iext_direct_to_inline(
-	xfs_ifork_t	*ifp,		/* inode fork pointer */
-	xfs_extnum_t	nextents)	/* number of extents in file */
-{
-	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
-	ASSERT(nextents <= XFS_INLINE_EXTS);
-	/*
-	 * The inline buffer was zeroed when we switched
-	 * from inline to direct extent allocation mode,
-	 * so we don't need to clear it here.
-	 */
-	memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
-		nextents * sizeof(xfs_bmbt_rec_t));
-	kmem_free(ifp->if_u1.if_extents);
-	ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
-	ifp->if_real_bytes = 0;
-}
-
-/*
- * Switch from inline buffer to linear (direct) extent records.
- * new_size should already be rounded up to the next power of 2
- * by the caller (when appropriate), so use new_size as it is.
- * However, since new_size may be rounded up, we can't update
- * if_bytes here. It is the caller's responsibility to update
- * if_bytes upon return.
- */
-void
-xfs_iext_inline_to_direct(
-	xfs_ifork_t	*ifp,		/* inode fork pointer */
-	int		new_size)	/* number of extents in file */
-{
-	ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
-	memset(ifp->if_u1.if_extents, 0, new_size);
-	if (ifp->if_bytes) {
-		memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
-			ifp->if_bytes);
-		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
-			sizeof(xfs_bmbt_rec_t));
-	}
-	ifp->if_real_bytes = new_size;
-}
-
-/*
- * Resize an extent indirection array to new_size bytes.
- */
-STATIC void
-xfs_iext_realloc_indirect(
-	xfs_ifork_t	*ifp,		/* inode fork pointer */
-	int		new_size)	/* new indirection array size */
-{
-	int		nlists;		/* number of irec's (ex lists) */
-	int		size;		/* current indirection array size */
-
-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-	size = nlists * sizeof(xfs_ext_irec_t);
-	ASSERT(ifp->if_real_bytes);
-	ASSERT((new_size >= 0) && (new_size != size));
-	if (new_size == 0) {
-		xfs_iext_destroy(ifp);
-	} else {
-		ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
-			kmem_realloc(ifp->if_u1.if_ext_irec,
-				new_size, size, KM_NOFS);
-	}
-}
-
-/*
- * Switch from indirection array to linear (direct) extent allocations.
- */
-STATIC void
-xfs_iext_indirect_to_direct(
-	 xfs_ifork_t	*ifp)		/* inode fork pointer */
-{
-	xfs_bmbt_rec_host_t *ep;	/* extent record pointer */
-	xfs_extnum_t	nextents;	/* number of extents in file */
-	int		size;		/* size of file extents */
-
-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	ASSERT(nextents <= XFS_LINEAR_EXTS);
-	size = nextents * sizeof(xfs_bmbt_rec_t);
-
-	xfs_iext_irec_compact_pages(ifp);
-	ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
-
-	ep = ifp->if_u1.if_ext_irec->er_extbuf;
-	kmem_free(ifp->if_u1.if_ext_irec);
-	ifp->if_flags &= ~XFS_IFEXTIREC;
-	ifp->if_u1.if_extents = ep;
-	ifp->if_bytes = size;
-	if (nextents < XFS_LINEAR_EXTS) {
-		xfs_iext_realloc_direct(ifp, size);
-	}
-}
-
-/*
- * Free incore file extents.
- */
-void
-xfs_iext_destroy(
-	xfs_ifork_t	*ifp)		/* inode fork pointer */
-{
-	if (ifp->if_flags & XFS_IFEXTIREC) {
-		int	erp_idx;
-		int	nlists;
-
-		nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-		for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
-			xfs_iext_irec_remove(ifp, erp_idx);
-		}
-		ifp->if_flags &= ~XFS_IFEXTIREC;
-	} else if (ifp->if_real_bytes) {
-		kmem_free(ifp->if_u1.if_extents);
-	} else if (ifp->if_bytes) {
-		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
-			sizeof(xfs_bmbt_rec_t));
-	}
-	ifp->if_u1.if_extents = NULL;
-	ifp->if_real_bytes = 0;
-	ifp->if_bytes = 0;
-}
-
-/*
- * Return a pointer to the extent record for file system block bno.
- */
-xfs_bmbt_rec_host_t *			/* pointer to found extent record */
-xfs_iext_bno_to_ext(
-	xfs_ifork_t	*ifp,		/* inode fork pointer */
-	xfs_fileoff_t	bno,		/* block number to search for */
-	xfs_extnum_t	*idxp)		/* index of target extent */
-{
-	xfs_bmbt_rec_host_t *base;	/* pointer to first extent */
-	xfs_filblks_t	blockcount = 0;	/* number of blocks in extent */
-	xfs_bmbt_rec_host_t *ep = NULL;	/* pointer to target extent */
-	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
-	int		high;		/* upper boundary in search */
-	xfs_extnum_t	idx = 0;	/* index of target extent */
-	int		low;		/* lower boundary in search */
-	xfs_extnum_t	nextents;	/* number of file extents */
-	xfs_fileoff_t	startoff = 0;	/* start offset of extent */
-
-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	if (nextents == 0) {
-		*idxp = 0;
-		return NULL;
-	}
-	low = 0;
-	if (ifp->if_flags & XFS_IFEXTIREC) {
-		/* Find target extent list */
-		int	erp_idx = 0;
-		erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
-		base = erp->er_extbuf;
-		high = erp->er_extcount - 1;
-	} else {
-		base = ifp->if_u1.if_extents;
-		high = nextents - 1;
-	}
-	/* Binary search extent records */
-	while (low <= high) {
-		idx = (low + high) >> 1;
-		ep = base + idx;
-		startoff = xfs_bmbt_get_startoff(ep);
-		blockcount = xfs_bmbt_get_blockcount(ep);
-		if (bno < startoff) {
-			high = idx - 1;
-		} else if (bno >= startoff + blockcount) {
-			low = idx + 1;
-		} else {
-			/* Convert back to file-based extent index */
-			if (ifp->if_flags & XFS_IFEXTIREC) {
-				idx += erp->er_extoff;
-			}
-			*idxp = idx;
-			return ep;
-		}
-	}
-	/* Convert back to file-based extent index */
-	if (ifp->if_flags & XFS_IFEXTIREC) {
-		idx += erp->er_extoff;
-	}
-	if (bno >= startoff + blockcount) {
-		if (++idx == nextents) {
-			ep = NULL;
-		} else {
-			ep = xfs_iext_get_ext(ifp, idx);
-		}
-	}
-	*idxp = idx;
-	return ep;
-}
-
-/*
- * Return a pointer to the indirection array entry containing the
- * extent record for filesystem block bno. Store the index of the
- * target irec in *erp_idxp.
- */
-xfs_ext_irec_t *			/* pointer to found extent record */
-xfs_iext_bno_to_irec(
-	xfs_ifork_t	*ifp,		/* inode fork pointer */
-	xfs_fileoff_t	bno,		/* block number to search for */
-	int		*erp_idxp)	/* irec index of target ext list */
-{
-	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
-	xfs_ext_irec_t	*erp_next;	/* next indirection array entry */
-	int		erp_idx;	/* indirection array index */
-	int		nlists;		/* number of extent irec's (lists) */
-	int		high;		/* binary search upper limit */
-	int		low;		/* binary search lower limit */
-
-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-	erp_idx = 0;
-	low = 0;
-	high = nlists - 1;
-	while (low <= high) {
-		erp_idx = (low + high) >> 1;
-		erp = &ifp->if_u1.if_ext_irec[erp_idx];
-		erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
-		if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
-			high = erp_idx - 1;
-		} else if (erp_next && bno >=
-			   xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
-			low = erp_idx + 1;
-		} else {
-			break;
-		}
-	}
-	*erp_idxp = erp_idx;
-	return erp;
-}
-
-/*
- * Return a pointer to the indirection array entry containing the
- * extent record at file extent index *idxp. Store the index of the
- * target irec in *erp_idxp and store the page index of the target
- * extent record in *idxp.
- */
-xfs_ext_irec_t *
-xfs_iext_idx_to_irec(
-	xfs_ifork_t	*ifp,		/* inode fork pointer */
-	xfs_extnum_t	*idxp,		/* extent index (file -> page) */
-	int		*erp_idxp,	/* pointer to target irec */
-	int		realloc)	/* new bytes were just added */
-{
-	xfs_ext_irec_t	*prev;		/* pointer to previous irec */
-	xfs_ext_irec_t	*erp = NULL;	/* pointer to current irec */
-	int		erp_idx;	/* indirection array index */
-	int		nlists;		/* number of irec's (ex lists) */
-	int		high;		/* binary search upper limit */
-	int		low;		/* binary search lower limit */
-	xfs_extnum_t	page_idx = *idxp; /* extent index in target list */
-
-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-	ASSERT(page_idx >= 0);
-	ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
-	ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
-
-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-	erp_idx = 0;
-	low = 0;
-	high = nlists - 1;
-
-	/* Binary search extent irec's */
-	while (low <= high) {
-		erp_idx = (low + high) >> 1;
-		erp = &ifp->if_u1.if_ext_irec[erp_idx];
-		prev = erp_idx > 0 ? erp - 1 : NULL;
-		if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
-		     realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
-			high = erp_idx - 1;
-		} else if (page_idx > erp->er_extoff + erp->er_extcount ||
-			   (page_idx == erp->er_extoff + erp->er_extcount &&
-			    !realloc)) {
-			low = erp_idx + 1;
-		} else if (page_idx == erp->er_extoff + erp->er_extcount &&
-			   erp->er_extcount == XFS_LINEAR_EXTS) {
-			ASSERT(realloc);
-			page_idx = 0;
-			erp_idx++;
-			erp = erp_idx < nlists ? erp + 1 : NULL;
-			break;
-		} else {
-			page_idx -= erp->er_extoff;
-			break;
-		}
-	}
-	*idxp = page_idx;
-	*erp_idxp = erp_idx;
-	return(erp);
-}
-
-/*
- * Allocate and initialize an indirection array once the space needed
- * for incore extents increases above XFS_IEXT_BUFSZ.
- */
-void
-xfs_iext_irec_init(
-	xfs_ifork_t	*ifp)		/* inode fork pointer */
-{
-	xfs_ext_irec_t	*erp;		/* indirection array pointer */
-	xfs_extnum_t	nextents;	/* number of extents in file */
-
-	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-	ASSERT(nextents <= XFS_LINEAR_EXTS);
-
-	erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
-
-	if (nextents == 0) {
-		ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
-	} else if (!ifp->if_real_bytes) {
-		xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
-	} else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
-		xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
-	}
-	erp->er_extbuf = ifp->if_u1.if_extents;
-	erp->er_extcount = nextents;
-	erp->er_extoff = 0;
-
-	ifp->if_flags |= XFS_IFEXTIREC;
-	ifp->if_real_bytes = XFS_IEXT_BUFSZ;
-	ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
-	ifp->if_u1.if_ext_irec = erp;
-
-	return;
-}
-
-/*
- * Allocate and initialize a new entry in the indirection array.
- */
-xfs_ext_irec_t *
-xfs_iext_irec_new(
-	xfs_ifork_t	*ifp,		/* inode fork pointer */
-	int		erp_idx)	/* index for new irec */
-{
-	xfs_ext_irec_t	*erp;		/* indirection array pointer */
-	int		i;		/* loop counter */
-	int		nlists;		/* number of irec's (ex lists) */
-
-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-
-	/* Resize indirection array */
-	xfs_iext_realloc_indirect(ifp, ++nlists *
-				  sizeof(xfs_ext_irec_t));
-	/*
-	 * Move records down in the array so the
-	 * new page can use erp_idx.
-	 */
-	erp = ifp->if_u1.if_ext_irec;
-	for (i = nlists - 1; i > erp_idx; i--) {
-		memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
-	}
-	ASSERT(i == erp_idx);
-
-	/* Initialize new extent record */
-	erp = ifp->if_u1.if_ext_irec;
-	erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
-	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
-	memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
-	erp[erp_idx].er_extcount = 0;
-	erp[erp_idx].er_extoff = erp_idx > 0 ?
-		erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
-	return (&erp[erp_idx]);
-}
-
-/*
- * Remove a record from the indirection array.
- */
-void
-xfs_iext_irec_remove(
-	xfs_ifork_t	*ifp,		/* inode fork pointer */
-	int		erp_idx)	/* irec index to remove */
-{
-	xfs_ext_irec_t	*erp;		/* indirection array pointer */
-	int		i;		/* loop counter */
-	int		nlists;		/* number of irec's (ex lists) */
-
-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-	erp = &ifp->if_u1.if_ext_irec[erp_idx];
-	if (erp->er_extbuf) {
-		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
-			-erp->er_extcount);
-		kmem_free(erp->er_extbuf);
-	}
-	/* Compact extent records */
-	erp = ifp->if_u1.if_ext_irec;
-	for (i = erp_idx; i < nlists - 1; i++) {
-		memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
-	}
-	/*
-	 * Manually free the last extent record from the indirection
-	 * array.  A call to xfs_iext_realloc_indirect() with a size
-	 * of zero would result in a call to xfs_iext_destroy() which
-	 * would in turn call this function again, creating a nasty
-	 * infinite loop.
-	 */
-	if (--nlists) {
-		xfs_iext_realloc_indirect(ifp,
-			nlists * sizeof(xfs_ext_irec_t));
-	} else {
-		kmem_free(ifp->if_u1.if_ext_irec);
-	}
-	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
-}
-
-/*
- * This is called to clean up large amounts of unused memory allocated
- * by the indirection array.  Before compacting anything though, verify
- * that the indirection array is still needed and switch back to the
- * linear extent list (or even the inline buffer) if possible.  The
- * compaction policy is as follows:
- *
- *    Full Compaction: Extents fit into a single page (or inline buffer)
- * Partial Compaction: Extents occupy less than 50% of allocated space
- *      No Compaction: Extents occupy at least 50% of allocated space
- */
-void
-xfs_iext_irec_compact(
-	xfs_ifork_t	*ifp)		/* inode fork pointer */
-{
-	xfs_extnum_t	nextents;	/* number of extents in file */
-	int		nlists;		/* number of irec's (ex lists) */
-
-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
-
-	if (nextents == 0) {
-		xfs_iext_destroy(ifp);
-	} else if (nextents <= XFS_INLINE_EXTS) {
-		xfs_iext_indirect_to_direct(ifp);
-		xfs_iext_direct_to_inline(ifp, nextents);
-	} else if (nextents <= XFS_LINEAR_EXTS) {
-		xfs_iext_indirect_to_direct(ifp);
-	} else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
-		xfs_iext_irec_compact_pages(ifp);
-	}
-}
-
-/*
- * Combine extents from neighboring extent pages.
- */
-void
-xfs_iext_irec_compact_pages(
-	xfs_ifork_t	*ifp)		/* inode fork pointer */
-{
-	xfs_ext_irec_t	*erp, *erp_next;/* pointers to irec entries */
-	int		erp_idx = 0;	/* indirection array index */
-	int		nlists;		/* number of irec's (ex lists) */
-
-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-	while (erp_idx < nlists - 1) {
-		erp = &ifp->if_u1.if_ext_irec[erp_idx];
-		erp_next = erp + 1;
-		if (erp_next->er_extcount <=
-		    (XFS_LINEAR_EXTS - erp->er_extcount)) {
-			memcpy(&erp->er_extbuf[erp->er_extcount],
-				erp_next->er_extbuf, erp_next->er_extcount *
-				sizeof(xfs_bmbt_rec_t));
-			erp->er_extcount += erp_next->er_extcount;
-			/*
-			 * Free page before removing extent record
-			 * so er_extoffs don't get modified in
-			 * xfs_iext_irec_remove.
-			 */
-			kmem_free(erp_next->er_extbuf);
-			erp_next->er_extbuf = NULL;
-			xfs_iext_irec_remove(ifp, erp_idx + 1);
-			nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-		} else {
-			erp_idx++;
-		}
-	}
-}
-
-/*
- * This is called to update the er_extoff field in the indirection
- * array when extents have been added or removed from one of the
- * extent lists. erp_idx contains the irec index to begin updating
- * at and ext_diff contains the number of extents that were added
- * or removed.
- */
-void
-xfs_iext_irec_update_extoffs(
-	xfs_ifork_t	*ifp,		/* inode fork pointer */
-	int		erp_idx,	/* irec index to update */
-	int		ext_diff)	/* number of new extents */
-{
-	int		i;		/* loop counter */
-	int		nlists;		/* number of irec's (ex lists */
-
-	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
-	for (i = erp_idx; i < nlists; i++) {
-		ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
-	}
-}
-
-/*
- * Test whether it is appropriate to check an inode for and free post EOF
- * blocks. The 'force' parameter determines whether we should also consider
- * regular files that are marked preallocated or append-only.
- */
-bool
-xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
-{
-	/* prealloc/delalloc exists only on regular files */
-	if (!S_ISREG(ip->i_d.di_mode))
-		return false;
-
-	/*
-	 * Zero sized files with no cached pages and delalloc blocks will not
-	 * have speculative prealloc/delalloc blocks to remove.
-	 */
-	if (VFS_I(ip)->i_size == 0 &&
-	    VN_CACHED(VFS_I(ip)) == 0 &&
-	    ip->i_delayed_blks == 0)
-		return false;
-
-	/* If we haven't read in the extent list, then don't do it now. */
-	if (!(ip->i_df.if_flags & XFS_IFEXTENTS))
-		return false;
-
-	/*
-	 * Do not free real preallocated or append-only files unless the file
-	 * has delalloc blocks and we are forced to remove them.
-	 */
-	if (ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))
-		if (!force || ip->i_delayed_blks == 0)
-			return false;
-
-	return true;
-}
-
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index b55fd34..4a91358 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -18,225 +18,15 @@
 #ifndef	__XFS_INODE_H__
 #define	__XFS_INODE_H__
 
-struct posix_acl;
+#include "xfs_inode_buf.h"
+#include "xfs_inode_fork.h"
+
+/*
+ * Kernel only inode definitions
+ */
+
 struct xfs_dinode;
 struct xfs_inode;
-
-/*
- * Fork identifiers.
- */
-#define	XFS_DATA_FORK	0
-#define	XFS_ATTR_FORK	1
-
-/*
- * The following xfs_ext_irec_t struct introduces a second (top) level
- * to the in-core extent allocation scheme. These structs are allocated
- * in a contiguous block, creating an indirection array where each entry
- * (irec) contains a pointer to a buffer of in-core extent records which
- * it manages. Each extent buffer is 4k in size, since 4k is the system
- * page size on Linux i386 and systems with larger page sizes don't seem
- * to gain much, if anything, by using their native page size as the
- * extent buffer size. Also, using 4k extent buffers everywhere provides
- * a consistent interface for CXFS across different platforms.
- *
- * There is currently no limit on the number of irec's (extent lists)
- * allowed, so heavily fragmented files may require an indirection array
- * which spans multiple system pages of memory. The number of extents
- * which would require this amount of contiguous memory is very large
- * and should not cause problems in the foreseeable future. However,
- * if the memory needed for the contiguous array ever becomes a problem,
- * it is possible that a third level of indirection may be required.
- */
-typedef struct xfs_ext_irec {
-	xfs_bmbt_rec_host_t *er_extbuf;	/* block of extent records */
-	xfs_extnum_t	er_extoff;	/* extent offset in file */
-	xfs_extnum_t	er_extcount;	/* number of extents in page/block */
-} xfs_ext_irec_t;
-
-/*
- * File incore extent information, present for each of data & attr forks.
- */
-#define	XFS_IEXT_BUFSZ		4096
-#define	XFS_LINEAR_EXTS		(XFS_IEXT_BUFSZ / (uint)sizeof(xfs_bmbt_rec_t))
-#define	XFS_INLINE_EXTS		2
-#define	XFS_INLINE_DATA		32
-typedef struct xfs_ifork {
-	int			if_bytes;	/* bytes in if_u1 */
-	int			if_real_bytes;	/* bytes allocated in if_u1 */
-	struct xfs_btree_block	*if_broot;	/* file's incore btree root */
-	short			if_broot_bytes;	/* bytes allocated for root */
-	unsigned char		if_flags;	/* per-fork flags */
-	union {
-		xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */
-		xfs_ext_irec_t	*if_ext_irec;	/* irec map file exts */
-		char		*if_data;	/* inline file data */
-	} if_u1;
-	union {
-		xfs_bmbt_rec_host_t if_inline_ext[XFS_INLINE_EXTS];
-						/* very small file extents */
-		char		if_inline_data[XFS_INLINE_DATA];
-						/* very small file data */
-		xfs_dev_t	if_rdev;	/* dev number if special */
-		uuid_t		if_uuid;	/* mount point value */
-	} if_u2;
-} xfs_ifork_t;
-
-/*
- * Inode location information.  Stored in the inode and passed to
- * xfs_imap_to_bp() to get a buffer and dinode for a given inode.
- */
-struct xfs_imap {
-	xfs_daddr_t	im_blkno;	/* starting BB of inode chunk */
-	ushort		im_len;		/* length in BBs of inode chunk */
-	ushort		im_boffset;	/* inode offset in block in bytes */
-};
-
-/*
- * This is the xfs in-core inode structure.
- * Most of the on-disk inode is embedded in the i_d field.
- *
- * The extent pointers/inline file space, however, are managed
- * separately.  The memory for this information is pointed to by
- * the if_u1 unions depending on the type of the data.
- * This is used to linearize the array of extents for fast in-core
- * access.  This is used until the file's number of extents
- * surpasses XFS_MAX_INCORE_EXTENTS, at which point all extent pointers
- * are accessed through the buffer cache.
- *
- * Other state kept in the in-core inode is used for identification,
- * locking, transactional updating, etc of the inode.
- *
- * Generally, we do not want to hold the i_rlock while holding the
- * i_ilock. Hierarchy is i_iolock followed by i_rlock.
- *
- * xfs_iptr_t contains all the inode fields up to and including the
- * i_mnext and i_mprev fields, it is used as a marker in the inode
- * chain off the mount structure by xfs_sync calls.
- */
-
-typedef struct xfs_ictimestamp {
-	__int32_t	t_sec;		/* timestamp seconds */
-	__int32_t	t_nsec;		/* timestamp nanoseconds */
-} xfs_ictimestamp_t;
-
-/*
- * NOTE:  This structure must be kept identical to struct xfs_dinode
- * 	  in xfs_dinode.h except for the endianness annotations.
- */
-typedef struct xfs_icdinode {
-	__uint16_t	di_magic;	/* inode magic # = XFS_DINODE_MAGIC */
-	__uint16_t	di_mode;	/* mode and type of file */
-	__int8_t	di_version;	/* inode version */
-	__int8_t	di_format;	/* format of di_c data */
-	__uint16_t	di_onlink;	/* old number of links to file */
-	__uint32_t	di_uid;		/* owner's user id */
-	__uint32_t	di_gid;		/* owner's group id */
-	__uint32_t	di_nlink;	/* number of links to file */
-	__uint16_t	di_projid_lo;	/* lower part of owner's project id */
-	__uint16_t	di_projid_hi;	/* higher part of owner's project id */
-	__uint8_t	di_pad[6];	/* unused, zeroed space */
-	__uint16_t	di_flushiter;	/* incremented on flush */
-	xfs_ictimestamp_t di_atime;	/* time last accessed */
-	xfs_ictimestamp_t di_mtime;	/* time last modified */
-	xfs_ictimestamp_t di_ctime;	/* time created/inode modified */
-	xfs_fsize_t	di_size;	/* number of bytes in file */
-	xfs_drfsbno_t	di_nblocks;	/* # of direct & btree blocks used */
-	xfs_extlen_t	di_extsize;	/* basic/minimum extent size for file */
-	xfs_extnum_t	di_nextents;	/* number of extents in data fork */
-	xfs_aextnum_t	di_anextents;	/* number of extents in attribute fork*/
-	__uint8_t	di_forkoff;	/* attr fork offs, <<3 for 64b align */
-	__int8_t	di_aformat;	/* format of attr fork's data */
-	__uint32_t	di_dmevmask;	/* DMIG event mask */
-	__uint16_t	di_dmstate;	/* DMIG state info */
-	__uint16_t	di_flags;	/* random flags, XFS_DIFLAG_... */
-	__uint32_t	di_gen;		/* generation number */
-
-	/* di_next_unlinked is the only non-core field in the old dinode */
-	xfs_agino_t	di_next_unlinked;/* agi unlinked list ptr */
-
-	/* start of the extended dinode, writable fields */
-	__uint32_t	di_crc;		/* CRC of the inode */
-	__uint64_t	di_changecount;	/* number of attribute changes */
-	xfs_lsn_t	di_lsn;		/* flush sequence */
-	__uint64_t	di_flags2;	/* more random flags */
-	__uint8_t	di_pad2[16];	/* more padding for future expansion */
-
-	/* fields only written to during inode creation */
-	xfs_ictimestamp_t di_crtime;	/* time created */
-	xfs_ino_t	di_ino;		/* inode number */
-	uuid_t		di_uuid;	/* UUID of the filesystem */
-
-	/* structure must be padded to 64 bit alignment */
-} xfs_icdinode_t;
-
-static inline uint xfs_icdinode_size(int version)
-{
-	if (version == 3)
-		return sizeof(struct xfs_icdinode);
-	return offsetof(struct xfs_icdinode, di_next_unlinked);
-}
-
-/*
- * Flags for xfs_ichgtime().
- */
-#define	XFS_ICHGTIME_MOD	0x1	/* data fork modification timestamp */
-#define	XFS_ICHGTIME_CHG	0x2	/* inode field change timestamp */
-#define	XFS_ICHGTIME_CREATE	0x4	/* inode create timestamp */
-
-/*
- * Per-fork incore inode flags.
- */
-#define	XFS_IFINLINE	0x01	/* Inline data is read in */
-#define	XFS_IFEXTENTS	0x02	/* All extent pointers are read in */
-#define	XFS_IFBROOT	0x04	/* i_broot points to the bmap b-tree root */
-#define	XFS_IFEXTIREC	0x08	/* Indirection array of extent blocks */
-
-/*
- * Fork handling.
- */
-
-#define XFS_IFORK_Q(ip)			((ip)->i_d.di_forkoff != 0)
-#define XFS_IFORK_BOFF(ip)		((int)((ip)->i_d.di_forkoff << 3))
-
-#define XFS_IFORK_PTR(ip,w)		\
-	((w) == XFS_DATA_FORK ? \
-		&(ip)->i_df : \
-		(ip)->i_afp)
-#define XFS_IFORK_DSIZE(ip) \
-	(XFS_IFORK_Q(ip) ? \
-		XFS_IFORK_BOFF(ip) : \
-		XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version))
-#define XFS_IFORK_ASIZE(ip) \
-	(XFS_IFORK_Q(ip) ? \
-		XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version) - \
-			XFS_IFORK_BOFF(ip) : \
-		0)
-#define XFS_IFORK_SIZE(ip,w) \
-	((w) == XFS_DATA_FORK ? \
-		XFS_IFORK_DSIZE(ip) : \
-		XFS_IFORK_ASIZE(ip))
-#define XFS_IFORK_FORMAT(ip,w) \
-	((w) == XFS_DATA_FORK ? \
-		(ip)->i_d.di_format : \
-		(ip)->i_d.di_aformat)
-#define XFS_IFORK_FMT_SET(ip,w,n) \
-	((w) == XFS_DATA_FORK ? \
-		((ip)->i_d.di_format = (n)) : \
-		((ip)->i_d.di_aformat = (n)))
-#define XFS_IFORK_NEXTENTS(ip,w) \
-	((w) == XFS_DATA_FORK ? \
-		(ip)->i_d.di_nextents : \
-		(ip)->i_d.di_anextents)
-#define XFS_IFORK_NEXT_SET(ip,w,n) \
-	((w) == XFS_DATA_FORK ? \
-		((ip)->i_d.di_nextents = (n)) : \
-		((ip)->i_d.di_anextents = (n)))
-#define XFS_IFORK_MAXEXT(ip, w) \
-	(XFS_IFORK_SIZE(ip, w) / sizeof(xfs_bmbt_rec_t))
-
-
-#ifdef __KERNEL__
-
 struct xfs_buf;
 struct xfs_bmap_free;
 struct xfs_bmbt_irec;
@@ -525,9 +315,21 @@
 	 ((pip)->i_d.di_mode & S_ISGID))
 
 
-/*
- * xfs_inode.c prototypes.
- */
+int		xfs_release(struct xfs_inode *ip);
+int		xfs_inactive(struct xfs_inode *ip);
+int		xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
+			   struct xfs_inode **ipp, struct xfs_name *ci_name);
+int		xfs_create(struct xfs_inode *dp, struct xfs_name *name,
+			   umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp);
+int		xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
+			   struct xfs_inode *ip);
+int		xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
+			 struct xfs_name *target_name);
+int		xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
+			   struct xfs_inode *src_ip, struct xfs_inode *target_dp,
+			   struct xfs_name *target_name,
+			   struct xfs_inode *target_ip);
+
 void		xfs_ilock(xfs_inode_t *, uint);
 int		xfs_ilock_nowait(xfs_inode_t *, uint);
 void		xfs_iunlock(xfs_inode_t *, uint);
@@ -548,13 +350,28 @@
 int		xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
 
 void		xfs_iext_realloc(xfs_inode_t *, int, int);
+
 void		xfs_iunpin_wait(xfs_inode_t *);
+#define xfs_ipincount(ip)	((unsigned int) atomic_read(&ip->i_pincount))
+
 int		xfs_iflush(struct xfs_inode *, struct xfs_buf **);
 void		xfs_lock_inodes(xfs_inode_t **, int, uint);
 void		xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
 
 xfs_extlen_t	xfs_get_extsz_hint(struct xfs_inode *ip);
 
+int		xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t,
+			       xfs_nlink_t, xfs_dev_t, prid_t, int,
+			       struct xfs_inode **, int *);
+int		xfs_droplink(struct xfs_trans *, struct xfs_inode *);
+int		xfs_bumplink(struct xfs_trans *, struct xfs_inode *);
+void		xfs_bump_ino_vers2(struct xfs_trans *, struct xfs_inode *);
+
+/* from xfs_file.c */
+int		xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
+int		xfs_iozero(struct xfs_inode *, loff_t, size_t);
+
+
 #define IHOLD(ip) \
 do { \
 	ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
@@ -568,65 +385,6 @@
 	iput(VFS_I(ip)); \
 } while (0)
 
-#endif /* __KERNEL__ */
-
-/*
- * Flags for xfs_iget()
- */
-#define XFS_IGET_CREATE		0x1
-#define XFS_IGET_UNTRUSTED	0x2
-#define XFS_IGET_DONTCACHE	0x4
-
-int		xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
-			       struct xfs_imap *, struct xfs_dinode **,
-			       struct xfs_buf **, uint, uint);
-int		xfs_iread(struct xfs_mount *, struct xfs_trans *,
-			  struct xfs_inode *, uint);
-void		xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
-void		xfs_dinode_to_disk(struct xfs_dinode *,
-				   struct xfs_icdinode *);
-void		xfs_idestroy_fork(struct xfs_inode *, int);
-void		xfs_idata_realloc(struct xfs_inode *, int, int);
-void		xfs_iroot_realloc(struct xfs_inode *, int, int);
-int		xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int);
-int		xfs_iextents_copy(struct xfs_inode *, xfs_bmbt_rec_t *, int);
-
-xfs_bmbt_rec_host_t *xfs_iext_get_ext(xfs_ifork_t *, xfs_extnum_t);
-void		xfs_iext_insert(xfs_inode_t *, xfs_extnum_t, xfs_extnum_t,
-				xfs_bmbt_irec_t *, int);
-void		xfs_iext_add(xfs_ifork_t *, xfs_extnum_t, int);
-void		xfs_iext_add_indirect_multi(xfs_ifork_t *, int, xfs_extnum_t, int);
-void		xfs_iext_remove(xfs_inode_t *, xfs_extnum_t, int, int);
-void		xfs_iext_remove_inline(xfs_ifork_t *, xfs_extnum_t, int);
-void		xfs_iext_remove_direct(xfs_ifork_t *, xfs_extnum_t, int);
-void		xfs_iext_remove_indirect(xfs_ifork_t *, xfs_extnum_t, int);
-void		xfs_iext_realloc_direct(xfs_ifork_t *, int);
-void		xfs_iext_direct_to_inline(xfs_ifork_t *, xfs_extnum_t);
-void		xfs_iext_inline_to_direct(xfs_ifork_t *, int);
-void		xfs_iext_destroy(xfs_ifork_t *);
-xfs_bmbt_rec_host_t *xfs_iext_bno_to_ext(xfs_ifork_t *, xfs_fileoff_t, int *);
-xfs_ext_irec_t	*xfs_iext_bno_to_irec(xfs_ifork_t *, xfs_fileoff_t, int *);
-xfs_ext_irec_t	*xfs_iext_idx_to_irec(xfs_ifork_t *, xfs_extnum_t *, int *, int);
-void		xfs_iext_irec_init(xfs_ifork_t *);
-xfs_ext_irec_t *xfs_iext_irec_new(xfs_ifork_t *, int);
-void		xfs_iext_irec_remove(xfs_ifork_t *, int);
-void		xfs_iext_irec_compact(xfs_ifork_t *);
-void		xfs_iext_irec_compact_pages(xfs_ifork_t *);
-void		xfs_iext_irec_compact_full(xfs_ifork_t *);
-void		xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int);
-bool		xfs_can_free_eofblocks(struct xfs_inode *, bool);
-
-#define xfs_ipincount(ip)	((unsigned int) atomic_read(&ip->i_pincount))
-
-#if defined(DEBUG)
-void		xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
-#else
-#define	xfs_inobp_check(mp, bp)
-#endif /* DEBUG */
-
-extern struct kmem_zone	*xfs_ifork_zone;
 extern struct kmem_zone	*xfs_inode_zone;
-extern struct kmem_zone	*xfs_ili_zone;
-extern const struct xfs_buf_ops xfs_inode_buf_ops;
 
 #endif	/* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/xfs_inode_buf.c
new file mode 100644
index 0000000..e011d59
--- /dev/null
+++ b/fs/xfs/xfs_inode_buf.c
@@ -0,0 +1,483 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_format.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_error.h"
+#include "xfs_cksum.h"
+#include "xfs_icache.h"
+#include "xfs_ialloc.h"
+
+/*
+ * Check that none of the inode's in the buffer have a next
+ * unlinked field of 0.
+ */
+#if defined(DEBUG)
+void
+xfs_inobp_check(
+	xfs_mount_t	*mp,
+	xfs_buf_t	*bp)
+{
+	int		i;
+	int		j;
+	xfs_dinode_t	*dip;
+
+	j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
+
+	for (i = 0; i < j; i++) {
+		dip = (xfs_dinode_t *)xfs_buf_offset(bp,
+					i * mp->m_sb.sb_inodesize);
+		if (!dip->di_next_unlinked)  {
+			xfs_alert(mp,
+	"Detected bogus zero next_unlinked field in incore inode buffer 0x%p.",
+				bp);
+			ASSERT(dip->di_next_unlinked);
+		}
+	}
+}
+#endif
+
+/*
+ * If we are doing readahead on an inode buffer, we might be in log recovery
+ * reading an inode allocation buffer that hasn't yet been replayed, and hence
+ * has not had the inode cores stamped into it. Hence for readahead, the buffer
+ * may be potentially invalid.
+ *
+ * If the readahead buffer is invalid, we don't want to mark it with an error,
+ * but we do want to clear the DONE status of the buffer so that a followup read
+ * will re-read it from disk. This will ensure that we don't get an unnecessary
+ * warnings during log recovery and we don't get unnecssary panics on debug
+ * kernels.
+ */
+static void
+xfs_inode_buf_verify(
+	struct xfs_buf	*bp,
+	bool		readahead)
+{
+	struct xfs_mount *mp = bp->b_target->bt_mount;
+	int		i;
+	int		ni;
+
+	/*
+	 * Validate the magic number and version of every inode in the buffer
+	 */
+	ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
+	for (i = 0; i < ni; i++) {
+		int		di_ok;
+		xfs_dinode_t	*dip;
+
+		dip = (struct xfs_dinode *)xfs_buf_offset(bp,
+					(i << mp->m_sb.sb_inodelog));
+		di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
+			    XFS_DINODE_GOOD_VERSION(dip->di_version);
+		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
+						XFS_ERRTAG_ITOBP_INOTOBP,
+						XFS_RANDOM_ITOBP_INOTOBP))) {
+			if (readahead) {
+				bp->b_flags &= ~XBF_DONE;
+				return;
+			}
+
+			xfs_buf_ioerror(bp, EFSCORRUPTED);
+			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH,
+					     mp, dip);
+#ifdef DEBUG
+			xfs_emerg(mp,
+				"bad inode magic/vsn daddr %lld #%d (magic=%x)",
+				(unsigned long long)bp->b_bn, i,
+				be16_to_cpu(dip->di_magic));
+			ASSERT(0);
+#endif
+		}
+	}
+	xfs_inobp_check(mp, bp);
+}
+
+
+static void
+xfs_inode_buf_read_verify(
+	struct xfs_buf	*bp)
+{
+	xfs_inode_buf_verify(bp, false);
+}
+
+static void
+xfs_inode_buf_readahead_verify(
+	struct xfs_buf	*bp)
+{
+	xfs_inode_buf_verify(bp, true);
+}
+
+static void
+xfs_inode_buf_write_verify(
+	struct xfs_buf	*bp)
+{
+	xfs_inode_buf_verify(bp, false);
+}
+
+const struct xfs_buf_ops xfs_inode_buf_ops = {
+	.verify_read = xfs_inode_buf_read_verify,
+	.verify_write = xfs_inode_buf_write_verify,
+};
+
+const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
+	.verify_read = xfs_inode_buf_readahead_verify,
+	.verify_write = xfs_inode_buf_write_verify,
+};
+
+
+/*
+ * This routine is called to map an inode to the buffer containing the on-disk
+ * version of the inode.  It returns a pointer to the buffer containing the
+ * on-disk inode in the bpp parameter, and in the dipp parameter it returns a
+ * pointer to the on-disk inode within that buffer.
+ *
+ * If a non-zero error is returned, then the contents of bpp and dipp are
+ * undefined.
+ */
+int
+xfs_imap_to_bp(
+	struct xfs_mount	*mp,
+	struct xfs_trans	*tp,
+	struct xfs_imap		*imap,
+	struct xfs_dinode       **dipp,
+	struct xfs_buf		**bpp,
+	uint			buf_flags,
+	uint			iget_flags)
+{
+	struct xfs_buf		*bp;
+	int			error;
+
+	buf_flags |= XBF_UNMAPPED;
+	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
+				   (int)imap->im_len, buf_flags, &bp,
+				   &xfs_inode_buf_ops);
+	if (error) {
+		if (error == EAGAIN) {
+			ASSERT(buf_flags & XBF_TRYLOCK);
+			return error;
+		}
+
+		if (error == EFSCORRUPTED &&
+		    (iget_flags & XFS_IGET_UNTRUSTED))
+			return XFS_ERROR(EINVAL);
+
+		xfs_warn(mp, "%s: xfs_trans_read_buf() returned error %d.",
+			__func__, error);
+		return error;
+	}
+
+	*bpp = bp;
+	*dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset);
+	return 0;
+}
+
+STATIC void
+xfs_dinode_from_disk(
+	xfs_icdinode_t		*to,
+	xfs_dinode_t		*from)
+{
+	to->di_magic = be16_to_cpu(from->di_magic);
+	to->di_mode = be16_to_cpu(from->di_mode);
+	to->di_version = from ->di_version;
+	to->di_format = from->di_format;
+	to->di_onlink = be16_to_cpu(from->di_onlink);
+	to->di_uid = be32_to_cpu(from->di_uid);
+	to->di_gid = be32_to_cpu(from->di_gid);
+	to->di_nlink = be32_to_cpu(from->di_nlink);
+	to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
+	to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
+	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
+	to->di_flushiter = be16_to_cpu(from->di_flushiter);
+	to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
+	to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec);
+	to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec);
+	to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec);
+	to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec);
+	to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec);
+	to->di_size = be64_to_cpu(from->di_size);
+	to->di_nblocks = be64_to_cpu(from->di_nblocks);
+	to->di_extsize = be32_to_cpu(from->di_extsize);
+	to->di_nextents = be32_to_cpu(from->di_nextents);
+	to->di_anextents = be16_to_cpu(from->di_anextents);
+	to->di_forkoff = from->di_forkoff;
+	to->di_aformat	= from->di_aformat;
+	to->di_dmevmask	= be32_to_cpu(from->di_dmevmask);
+	to->di_dmstate	= be16_to_cpu(from->di_dmstate);
+	to->di_flags	= be16_to_cpu(from->di_flags);
+	to->di_gen	= be32_to_cpu(from->di_gen);
+
+	if (to->di_version == 3) {
+		to->di_changecount = be64_to_cpu(from->di_changecount);
+		to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
+		to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
+		to->di_flags2 = be64_to_cpu(from->di_flags2);
+		to->di_ino = be64_to_cpu(from->di_ino);
+		to->di_lsn = be64_to_cpu(from->di_lsn);
+		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
+		uuid_copy(&to->di_uuid, &from->di_uuid);
+	}
+}
+
+void
+xfs_dinode_to_disk(
+	xfs_dinode_t		*to,
+	xfs_icdinode_t		*from)
+{
+	to->di_magic = cpu_to_be16(from->di_magic);
+	to->di_mode = cpu_to_be16(from->di_mode);
+	to->di_version = from ->di_version;
+	to->di_format = from->di_format;
+	to->di_onlink = cpu_to_be16(from->di_onlink);
+	to->di_uid = cpu_to_be32(from->di_uid);
+	to->di_gid = cpu_to_be32(from->di_gid);
+	to->di_nlink = cpu_to_be32(from->di_nlink);
+	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
+	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
+	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
+	to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
+	to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
+	to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
+	to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
+	to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
+	to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
+	to->di_size = cpu_to_be64(from->di_size);
+	to->di_nblocks = cpu_to_be64(from->di_nblocks);
+	to->di_extsize = cpu_to_be32(from->di_extsize);
+	to->di_nextents = cpu_to_be32(from->di_nextents);
+	to->di_anextents = cpu_to_be16(from->di_anextents);
+	to->di_forkoff = from->di_forkoff;
+	to->di_aformat = from->di_aformat;
+	to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
+	to->di_dmstate = cpu_to_be16(from->di_dmstate);
+	to->di_flags = cpu_to_be16(from->di_flags);
+	to->di_gen = cpu_to_be32(from->di_gen);
+
+	if (from->di_version == 3) {
+		to->di_changecount = cpu_to_be64(from->di_changecount);
+		to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
+		to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
+		to->di_flags2 = cpu_to_be64(from->di_flags2);
+		to->di_ino = cpu_to_be64(from->di_ino);
+		to->di_lsn = cpu_to_be64(from->di_lsn);
+		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
+		uuid_copy(&to->di_uuid, &from->di_uuid);
+		to->di_flushiter = 0;
+	} else {
+		to->di_flushiter = cpu_to_be16(from->di_flushiter);
+	}
+}
+
+static bool
+xfs_dinode_verify(
+	struct xfs_mount	*mp,
+	struct xfs_inode	*ip,
+	struct xfs_dinode	*dip)
+{
+	if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
+		return false;
+
+	/* only version 3 or greater inodes are extensively verified here */
+	if (dip->di_version < 3)
+		return true;
+
+	if (!xfs_sb_version_hascrc(&mp->m_sb))
+		return false;
+	if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
+			      offsetof(struct xfs_dinode, di_crc)))
+		return false;
+	if (be64_to_cpu(dip->di_ino) != ip->i_ino)
+		return false;
+	if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid))
+		return false;
+	return true;
+}
+
+void
+xfs_dinode_calc_crc(
+	struct xfs_mount	*mp,
+	struct xfs_dinode	*dip)
+{
+	__uint32_t		crc;
+
+	if (dip->di_version < 3)
+		return;
+
+	ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
+	crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
+			      offsetof(struct xfs_dinode, di_crc));
+	dip->di_crc = xfs_end_cksum(crc);
+}
+
+/*
+ * Read the disk inode attributes into the in-core inode structure.
+ *
+ * For version 5 superblocks, if we are initialising a new inode and we are not
+ * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
+ * inode core with a random generation number. If we are keeping inodes around,
+ * we need to read the inode cluster to get the existing generation number off
+ * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
+ * format) then log recovery is dependent on the di_flushiter field being
+ * initialised from the current on-disk value and hence we must also read the
+ * inode off disk.
+ */
+int
+xfs_iread(
+	xfs_mount_t	*mp,
+	xfs_trans_t	*tp,
+	xfs_inode_t	*ip,
+	uint		iget_flags)
+{
+	xfs_buf_t	*bp;
+	xfs_dinode_t	*dip;
+	int		error;
+
+	/*
+	 * Fill in the location information in the in-core inode.
+	 */
+	error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
+	if (error)
+		return error;
+
+	/* shortcut IO on inode allocation if possible */
+	if ((iget_flags & XFS_IGET_CREATE) &&
+	    xfs_sb_version_hascrc(&mp->m_sb) &&
+	    !(mp->m_flags & XFS_MOUNT_IKEEP)) {
+		/* initialise the on-disk inode core */
+		memset(&ip->i_d, 0, sizeof(ip->i_d));
+		ip->i_d.di_magic = XFS_DINODE_MAGIC;
+		ip->i_d.di_gen = prandom_u32();
+		if (xfs_sb_version_hascrc(&mp->m_sb)) {
+			ip->i_d.di_version = 3;
+			ip->i_d.di_ino = ip->i_ino;
+			uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid);
+		} else
+			ip->i_d.di_version = 2;
+		return 0;
+	}
+
+	/*
+	 * Get pointers to the on-disk inode and the buffer containing it.
+	 */
+	error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
+	if (error)
+		return error;
+
+	/* even unallocated inodes are verified */
+	if (!xfs_dinode_verify(mp, ip, dip)) {
+		xfs_alert(mp, "%s: validation failed for inode %lld failed",
+				__func__, ip->i_ino);
+
+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip);
+		error = XFS_ERROR(EFSCORRUPTED);
+		goto out_brelse;
+	}
+
+	/*
+	 * If the on-disk inode is already linked to a directory
+	 * entry, copy all of the inode into the in-core inode.
+	 * xfs_iformat_fork() handles copying in the inode format
+	 * specific information.
+	 * Otherwise, just get the truly permanent information.
+	 */
+	if (dip->di_mode) {
+		xfs_dinode_from_disk(&ip->i_d, dip);
+		error = xfs_iformat_fork(ip, dip);
+		if (error)  {
+#ifdef DEBUG
+			xfs_alert(mp, "%s: xfs_iformat() returned error %d",
+				__func__, error);
+#endif /* DEBUG */
+			goto out_brelse;
+		}
+	} else {
+		/*
+		 * Partial initialisation of the in-core inode. Just the bits
+		 * that xfs_ialloc won't overwrite or relies on being correct.
+		 */
+		ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
+		ip->i_d.di_version = dip->di_version;
+		ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
+		ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
+
+		if (dip->di_version == 3) {
+			ip->i_d.di_ino = be64_to_cpu(dip->di_ino);
+			uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid);
+		}
+
+		/*
+		 * Make sure to pull in the mode here as well in
+		 * case the inode is released without being used.
+		 * This ensures that xfs_inactive() will see that
+		 * the inode is already free and not try to mess
+		 * with the uninitialized part of it.
+		 */
+		ip->i_d.di_mode = 0;
+	}
+
+	/*
+	 * The inode format changed when we moved the link count and
+	 * made it 32 bits long.  If this is an old format inode,
+	 * convert it in memory to look like a new one.  If it gets
+	 * flushed to disk we will convert back before flushing or
+	 * logging it.  We zero out the new projid field and the old link
+	 * count field.  We'll handle clearing the pad field (the remains
+	 * of the old uuid field) when we actually convert the inode to
+	 * the new format. We don't change the version number so that we
+	 * can distinguish this from a real new format inode.
+	 */
+	if (ip->i_d.di_version == 1) {
+		ip->i_d.di_nlink = ip->i_d.di_onlink;
+		ip->i_d.di_onlink = 0;
+		xfs_set_projid(ip, 0);
+	}
+
+	ip->i_delayed_blks = 0;
+
+	/*
+	 * Mark the buffer containing the inode as something to keep
+	 * around for a while.  This helps to keep recently accessed
+	 * meta-data in-core longer.
+	 */
+	xfs_buf_set_ref(bp, XFS_INO_REF);
+
+	/*
+	 * Use xfs_trans_brelse() to release the buffer containing the on-disk
+	 * inode, because it was acquired with xfs_trans_read_buf() in
+	 * xfs_imap_to_bp() above.  If tp is NULL, this is just a normal
+	 * brelse().  If we're within a transaction, then xfs_trans_brelse()
+	 * will only release the buffer if it is not dirty within the
+	 * transaction.  It will be OK to release the buffer in this case,
+	 * because inodes on disk are never destroyed and we will be locking the
+	 * new in-core inode before putting it in the cache where other
+	 * processes can find it.  Thus we don't have to worry about the inode
+	 * being changed just because we released the buffer.
+	 */
+ out_brelse:
+	xfs_trans_brelse(tp, bp);
+	return error;
+}
diff --git a/fs/xfs/xfs_inode_buf.h b/fs/xfs/xfs_inode_buf.h
new file mode 100644
index 0000000..599e6c0
--- /dev/null
+++ b/fs/xfs/xfs_inode_buf.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef	__XFS_INODE_BUF_H__
+#define	__XFS_INODE_BUF_H__
+
+struct xfs_inode;
+struct xfs_dinode;
+struct xfs_icdinode;
+
+/*
+ * Inode location information.  Stored in the inode and passed to
+ * xfs_imap_to_bp() to get a buffer and dinode for a given inode.
+ */
+struct xfs_imap {
+	xfs_daddr_t	im_blkno;	/* starting BB of inode chunk */
+	ushort		im_len;		/* length in BBs of inode chunk */
+	ushort		im_boffset;	/* inode offset in block in bytes */
+};
+
+int		xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
+			       struct xfs_imap *, struct xfs_dinode **,
+			       struct xfs_buf **, uint, uint);
+int		xfs_iread(struct xfs_mount *, struct xfs_trans *,
+			  struct xfs_inode *, uint);
+void		xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
+void		xfs_dinode_to_disk(struct xfs_dinode *,
+				   struct xfs_icdinode *);
+
+#if defined(DEBUG)
+void		xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
+#else
+#define	xfs_inobp_check(mp, bp)
+#endif /* DEBUG */
+
+extern const struct xfs_buf_ops xfs_inode_buf_ops;
+extern const struct xfs_buf_ops xfs_inode_buf_ra_ops;
+
+#endif	/* __XFS_INODE_BUF_H__ */
diff --git a/fs/xfs/xfs_inode_fork.c b/fs/xfs/xfs_inode_fork.c
new file mode 100644
index 0000000..02f1083
--- /dev/null
+++ b/fs/xfs/xfs_inode_fork.c
@@ -0,0 +1,1920 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include <linux/log2.h>
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_format.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_buf_item.h"
+#include "xfs_inode_item.h"
+#include "xfs_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_ialloc.h"
+#include "xfs_bmap.h"
+#include "xfs_error.h"
+#include "xfs_quota.h"
+#include "xfs_filestream.h"
+#include "xfs_cksum.h"
+#include "xfs_trace.h"
+#include "xfs_icache.h"
+
+kmem_zone_t *xfs_ifork_zone;
+
+STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int);
+STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int);
+STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int);
+
+#ifdef DEBUG
+/*
+ * Make sure that the extents in the given memory buffer
+ * are valid.
+ */
+void
+xfs_validate_extents(
+	xfs_ifork_t		*ifp,
+	int			nrecs,
+	xfs_exntfmt_t		fmt)
+{
+	xfs_bmbt_irec_t		irec;
+	xfs_bmbt_rec_host_t	rec;
+	int			i;
+
+	for (i = 0; i < nrecs; i++) {
+		xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
+		rec.l0 = get_unaligned(&ep->l0);
+		rec.l1 = get_unaligned(&ep->l1);
+		xfs_bmbt_get_all(&rec, &irec);
+		if (fmt == XFS_EXTFMT_NOSTATE)
+			ASSERT(irec.br_state == XFS_EXT_NORM);
+	}
+}
+#else /* DEBUG */
+#define xfs_validate_extents(ifp, nrecs, fmt)
+#endif /* DEBUG */
+
+
+/*
+ * Move inode type and inode format specific information from the
+ * on-disk inode to the in-core inode.  For fifos, devs, and sockets
+ * this means set if_rdev to the proper value.  For files, directories,
+ * and symlinks this means to bring in the in-line data or extent
+ * pointers.  For a file in B-tree format, only the root is immediately
+ * brought in-core.  The rest will be in-lined in if_extents when it
+ * is first referenced (see xfs_iread_extents()).
+ */
+int
+xfs_iformat_fork(
+	xfs_inode_t		*ip,
+	xfs_dinode_t		*dip)
+{
+	xfs_attr_shortform_t	*atp;
+	int			size;
+	int			error = 0;
+	xfs_fsize_t             di_size;
+
+	if (unlikely(be32_to_cpu(dip->di_nextents) +
+		     be16_to_cpu(dip->di_anextents) >
+		     be64_to_cpu(dip->di_nblocks))) {
+		xfs_warn(ip->i_mount,
+			"corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
+			(unsigned long long)ip->i_ino,
+			(int)(be32_to_cpu(dip->di_nextents) +
+			      be16_to_cpu(dip->di_anextents)),
+			(unsigned long long)
+				be64_to_cpu(dip->di_nblocks));
+		XFS_CORRUPTION_ERROR("xfs_iformat(1)", XFS_ERRLEVEL_LOW,
+				     ip->i_mount, dip);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+
+	if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
+		xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
+			(unsigned long long)ip->i_ino,
+			dip->di_forkoff);
+		XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
+				     ip->i_mount, dip);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+
+	if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
+		     !ip->i_mount->m_rtdev_targp)) {
+		xfs_warn(ip->i_mount,
+			"corrupt dinode %Lu, has realtime flag set.",
+			ip->i_ino);
+		XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
+				     XFS_ERRLEVEL_LOW, ip->i_mount, dip);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+
+	switch (ip->i_d.di_mode & S_IFMT) {
+	case S_IFIFO:
+	case S_IFCHR:
+	case S_IFBLK:
+	case S_IFSOCK:
+		if (unlikely(dip->di_format != XFS_DINODE_FMT_DEV)) {
+			XFS_CORRUPTION_ERROR("xfs_iformat(3)", XFS_ERRLEVEL_LOW,
+					      ip->i_mount, dip);
+			return XFS_ERROR(EFSCORRUPTED);
+		}
+		ip->i_d.di_size = 0;
+		ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
+		break;
+
+	case S_IFREG:
+	case S_IFLNK:
+	case S_IFDIR:
+		switch (dip->di_format) {
+		case XFS_DINODE_FMT_LOCAL:
+			/*
+			 * no local regular files yet
+			 */
+			if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
+				xfs_warn(ip->i_mount,
+			"corrupt inode %Lu (local format for regular file).",
+					(unsigned long long) ip->i_ino);
+				XFS_CORRUPTION_ERROR("xfs_iformat(4)",
+						     XFS_ERRLEVEL_LOW,
+						     ip->i_mount, dip);
+				return XFS_ERROR(EFSCORRUPTED);
+			}
+
+			di_size = be64_to_cpu(dip->di_size);
+			if (unlikely(di_size < 0 ||
+				     di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
+				xfs_warn(ip->i_mount,
+			"corrupt inode %Lu (bad size %Ld for local inode).",
+					(unsigned long long) ip->i_ino,
+					(long long) di_size);
+				XFS_CORRUPTION_ERROR("xfs_iformat(5)",
+						     XFS_ERRLEVEL_LOW,
+						     ip->i_mount, dip);
+				return XFS_ERROR(EFSCORRUPTED);
+			}
+
+			size = (int)di_size;
+			error = xfs_iformat_local(ip, dip, XFS_DATA_FORK, size);
+			break;
+		case XFS_DINODE_FMT_EXTENTS:
+			error = xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
+			break;
+		case XFS_DINODE_FMT_BTREE:
+			error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
+			break;
+		default:
+			XFS_ERROR_REPORT("xfs_iformat(6)", XFS_ERRLEVEL_LOW,
+					 ip->i_mount);
+			return XFS_ERROR(EFSCORRUPTED);
+		}
+		break;
+
+	default:
+		XFS_ERROR_REPORT("xfs_iformat(7)", XFS_ERRLEVEL_LOW, ip->i_mount);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+	if (error) {
+		return error;
+	}
+	if (!XFS_DFORK_Q(dip))
+		return 0;
+
+	ASSERT(ip->i_afp == NULL);
+	ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
+
+	switch (dip->di_aformat) {
+	case XFS_DINODE_FMT_LOCAL:
+		atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
+		size = be16_to_cpu(atp->hdr.totsize);
+
+		if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
+			xfs_warn(ip->i_mount,
+				"corrupt inode %Lu (bad attr fork size %Ld).",
+				(unsigned long long) ip->i_ino,
+				(long long) size);
+			XFS_CORRUPTION_ERROR("xfs_iformat(8)",
+					     XFS_ERRLEVEL_LOW,
+					     ip->i_mount, dip);
+			return XFS_ERROR(EFSCORRUPTED);
+		}
+
+		error = xfs_iformat_local(ip, dip, XFS_ATTR_FORK, size);
+		break;
+	case XFS_DINODE_FMT_EXTENTS:
+		error = xfs_iformat_extents(ip, dip, XFS_ATTR_FORK);
+		break;
+	case XFS_DINODE_FMT_BTREE:
+		error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
+		break;
+	default:
+		error = XFS_ERROR(EFSCORRUPTED);
+		break;
+	}
+	if (error) {
+		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
+		ip->i_afp = NULL;
+		xfs_idestroy_fork(ip, XFS_DATA_FORK);
+	}
+	return error;
+}
+
+/*
+ * The file is in-lined in the on-disk inode.
+ * If it fits into if_inline_data, then copy
+ * it there, otherwise allocate a buffer for it
+ * and copy the data there.  Either way, set
+ * if_data to point at the data.
+ * If we allocate a buffer for the data, make
+ * sure that its size is a multiple of 4 and
+ * record the real size in i_real_bytes.
+ */
+STATIC int
+xfs_iformat_local(
+	xfs_inode_t	*ip,
+	xfs_dinode_t	*dip,
+	int		whichfork,
+	int		size)
+{
+	xfs_ifork_t	*ifp;
+	int		real_size;
+
+	/*
+	 * If the size is unreasonable, then something
+	 * is wrong and we just bail out rather than crash in
+	 * kmem_alloc() or memcpy() below.
+	 */
+	if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
+		xfs_warn(ip->i_mount,
+	"corrupt inode %Lu (bad size %d for local fork, size = %d).",
+			(unsigned long long) ip->i_ino, size,
+			XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
+		XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
+				     ip->i_mount, dip);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	real_size = 0;
+	if (size == 0)
+		ifp->if_u1.if_data = NULL;
+	else if (size <= sizeof(ifp->if_u2.if_inline_data))
+		ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
+	else {
+		real_size = roundup(size, 4);
+		ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
+	}
+	ifp->if_bytes = size;
+	ifp->if_real_bytes = real_size;
+	if (size)
+		memcpy(ifp->if_u1.if_data, XFS_DFORK_PTR(dip, whichfork), size);
+	ifp->if_flags &= ~XFS_IFEXTENTS;
+	ifp->if_flags |= XFS_IFINLINE;
+	return 0;
+}
+
+/*
+ * The file consists of a set of extents all
+ * of which fit into the on-disk inode.
+ * If there are few enough extents to fit into
+ * the if_inline_ext, then copy them there.
+ * Otherwise allocate a buffer for them and copy
+ * them into it.  Either way, set if_extents
+ * to point at the extents.
+ */
+STATIC int
+xfs_iformat_extents(
+	xfs_inode_t	*ip,
+	xfs_dinode_t	*dip,
+	int		whichfork)
+{
+	xfs_bmbt_rec_t	*dp;
+	xfs_ifork_t	*ifp;
+	int		nex;
+	int		size;
+	int		i;
+
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	nex = XFS_DFORK_NEXTENTS(dip, whichfork);
+	size = nex * (uint)sizeof(xfs_bmbt_rec_t);
+
+	/*
+	 * If the number of extents is unreasonable, then something
+	 * is wrong and we just bail out rather than crash in
+	 * kmem_alloc() or memcpy() below.
+	 */
+	if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
+		xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
+			(unsigned long long) ip->i_ino, nex);
+		XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
+				     ip->i_mount, dip);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+
+	ifp->if_real_bytes = 0;
+	if (nex == 0)
+		ifp->if_u1.if_extents = NULL;
+	else if (nex <= XFS_INLINE_EXTS)
+		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
+	else
+		xfs_iext_add(ifp, 0, nex);
+
+	ifp->if_bytes = size;
+	if (size) {
+		dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork);
+		xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip));
+		for (i = 0; i < nex; i++, dp++) {
+			xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
+			ep->l0 = get_unaligned_be64(&dp->l0);
+			ep->l1 = get_unaligned_be64(&dp->l1);
+		}
+		XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork);
+		if (whichfork != XFS_DATA_FORK ||
+			XFS_EXTFMT_INODE(ip) == XFS_EXTFMT_NOSTATE)
+				if (unlikely(xfs_check_nostate_extents(
+				    ifp, 0, nex))) {
+					XFS_ERROR_REPORT("xfs_iformat_extents(2)",
+							 XFS_ERRLEVEL_LOW,
+							 ip->i_mount);
+					return XFS_ERROR(EFSCORRUPTED);
+				}
+	}
+	ifp->if_flags |= XFS_IFEXTENTS;
+	return 0;
+}
+
+/*
+ * The file has too many extents to fit into
+ * the inode, so they are in B-tree format.
+ * Allocate a buffer for the root of the B-tree
+ * and copy the root into it.  The i_extents
+ * field will remain NULL until all of the
+ * extents are read in (when they are needed).
+ */
+STATIC int
+xfs_iformat_btree(
+	xfs_inode_t		*ip,
+	xfs_dinode_t		*dip,
+	int			whichfork)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	xfs_bmdr_block_t	*dfp;
+	xfs_ifork_t		*ifp;
+	/* REFERENCED */
+	int			nrecs;
+	int			size;
+
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
+	size = XFS_BMAP_BROOT_SPACE(mp, dfp);
+	nrecs = be16_to_cpu(dfp->bb_numrecs);
+
+	/*
+	 * blow out if -- fork has less extents than can fit in
+	 * fork (fork shouldn't be a btree format), root btree
+	 * block has more records than can fit into the fork,
+	 * or the number of extents is greater than the number of
+	 * blocks.
+	 */
+	if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
+					XFS_IFORK_MAXEXT(ip, whichfork) ||
+		     XFS_BMDR_SPACE_CALC(nrecs) >
+					XFS_DFORK_SIZE(dip, mp, whichfork) ||
+		     XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
+		xfs_warn(mp, "corrupt inode %Lu (btree).",
+					(unsigned long long) ip->i_ino);
+		XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
+					 mp, dip);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+
+	ifp->if_broot_bytes = size;
+	ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
+	ASSERT(ifp->if_broot != NULL);
+	/*
+	 * Copy and convert from the on-disk structure
+	 * to the in-memory structure.
+	 */
+	xfs_bmdr_to_bmbt(ip, dfp, XFS_DFORK_SIZE(dip, ip->i_mount, whichfork),
+			 ifp->if_broot, size);
+	ifp->if_flags &= ~XFS_IFEXTENTS;
+	ifp->if_flags |= XFS_IFBROOT;
+
+	return 0;
+}
+
+/*
+ * Read in extents from a btree-format inode.
+ * Allocate and fill in if_extents.  Real work is done in xfs_bmap.c.
+ */
+int
+xfs_iread_extents(
+	xfs_trans_t	*tp,
+	xfs_inode_t	*ip,
+	int		whichfork)
+{
+	int		error;
+	xfs_ifork_t	*ifp;
+	xfs_extnum_t	nextents;
+
+	if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
+		XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,
+				 ip->i_mount);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+	nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+
+	/*
+	 * We know that the size is valid (it's checked in iformat_btree)
+	 */
+	ifp->if_bytes = ifp->if_real_bytes = 0;
+	ifp->if_flags |= XFS_IFEXTENTS;
+	xfs_iext_add(ifp, 0, nextents);
+	error = xfs_bmap_read_extents(tp, ip, whichfork);
+	if (error) {
+		xfs_iext_destroy(ifp);
+		ifp->if_flags &= ~XFS_IFEXTENTS;
+		return error;
+	}
+	xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
+	return 0;
+}
+/*
+ * Reallocate the space for if_broot based on the number of records
+ * being added or deleted as indicated in rec_diff.  Move the records
+ * and pointers in if_broot to fit the new size.  When shrinking this
+ * will eliminate holes between the records and pointers created by
+ * the caller.  When growing this will create holes to be filled in
+ * by the caller.
+ *
+ * The caller must not request to add more records than would fit in
+ * the on-disk inode root.  If the if_broot is currently NULL, then
+ * if we are adding records, one will be allocated.  The caller must also
+ * not request that the number of records go below zero, although
+ * it can go to zero.
+ *
+ * ip -- the inode whose if_broot area is changing
+ * ext_diff -- the change in the number of records, positive or negative,
+ *	 requested for the if_broot array.
+ */
+void
+xfs_iroot_realloc(
+	xfs_inode_t		*ip,
+	int			rec_diff,
+	int			whichfork)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	int			cur_max;
+	xfs_ifork_t		*ifp;
+	struct xfs_btree_block	*new_broot;
+	int			new_max;
+	size_t			new_size;
+	char			*np;
+	char			*op;
+
+	/*
+	 * Handle the degenerate case quietly.
+	 */
+	if (rec_diff == 0) {
+		return;
+	}
+
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	if (rec_diff > 0) {
+		/*
+		 * If there wasn't any memory allocated before, just
+		 * allocate it now and get out.
+		 */
+		if (ifp->if_broot_bytes == 0) {
+			new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
+			ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
+			ifp->if_broot_bytes = (int)new_size;
+			return;
+		}
+
+		/*
+		 * If there is already an existing if_broot, then we need
+		 * to realloc() it and shift the pointers to their new
+		 * location.  The records don't change location because
+		 * they are kept butted up against the btree block header.
+		 */
+		cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
+		new_max = cur_max + rec_diff;
+		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
+		ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
+				XFS_BMAP_BROOT_SPACE_CALC(mp, cur_max),
+				KM_SLEEP | KM_NOFS);
+		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
+						     ifp->if_broot_bytes);
+		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
+						     (int)new_size);
+		ifp->if_broot_bytes = (int)new_size;
+		ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
+			XFS_IFORK_SIZE(ip, whichfork));
+		memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t));
+		return;
+	}
+
+	/*
+	 * rec_diff is less than 0.  In this case, we are shrinking the
+	 * if_broot buffer.  It must already exist.  If we go to zero
+	 * records, just get rid of the root and clear the status bit.
+	 */
+	ASSERT((ifp->if_broot != NULL) && (ifp->if_broot_bytes > 0));
+	cur_max = xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0);
+	new_max = cur_max + rec_diff;
+	ASSERT(new_max >= 0);
+	if (new_max > 0)
+		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
+	else
+		new_size = 0;
+	if (new_size > 0) {
+		new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
+		/*
+		 * First copy over the btree block header.
+		 */
+		memcpy(new_broot, ifp->if_broot,
+			XFS_BMBT_BLOCK_LEN(ip->i_mount));
+	} else {
+		new_broot = NULL;
+		ifp->if_flags &= ~XFS_IFBROOT;
+	}
+
+	/*
+	 * Only copy the records and pointers if there are any.
+	 */
+	if (new_max > 0) {
+		/*
+		 * First copy the records.
+		 */
+		op = (char *)XFS_BMBT_REC_ADDR(mp, ifp->if_broot, 1);
+		np = (char *)XFS_BMBT_REC_ADDR(mp, new_broot, 1);
+		memcpy(np, op, new_max * (uint)sizeof(xfs_bmbt_rec_t));
+
+		/*
+		 * Then copy the pointers.
+		 */
+		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
+						     ifp->if_broot_bytes);
+		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, new_broot, 1,
+						     (int)new_size);
+		memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
+	}
+	kmem_free(ifp->if_broot);
+	ifp->if_broot = new_broot;
+	ifp->if_broot_bytes = (int)new_size;
+	if (ifp->if_broot)
+		ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
+			XFS_IFORK_SIZE(ip, whichfork));
+	return;
+}
+
+
+/*
+ * This is called when the amount of space needed for if_data
+ * is increased or decreased.  The change in size is indicated by
+ * the number of bytes that need to be added or deleted in the
+ * byte_diff parameter.
+ *
+ * If the amount of space needed has decreased below the size of the
+ * inline buffer, then switch to using the inline buffer.  Otherwise,
+ * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
+ * to what is needed.
+ *
+ * ip -- the inode whose if_data area is changing
+ * byte_diff -- the change in the number of bytes, positive or negative,
+ *	 requested for the if_data array.
+ */
+void
+xfs_idata_realloc(
+	xfs_inode_t	*ip,
+	int		byte_diff,
+	int		whichfork)
+{
+	xfs_ifork_t	*ifp;
+	int		new_size;
+	int		real_size;
+
+	if (byte_diff == 0) {
+		return;
+	}
+
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	new_size = (int)ifp->if_bytes + byte_diff;
+	ASSERT(new_size >= 0);
+
+	if (new_size == 0) {
+		if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
+			kmem_free(ifp->if_u1.if_data);
+		}
+		ifp->if_u1.if_data = NULL;
+		real_size = 0;
+	} else if (new_size <= sizeof(ifp->if_u2.if_inline_data)) {
+		/*
+		 * If the valid extents/data can fit in if_inline_ext/data,
+		 * copy them from the malloc'd vector and free it.
+		 */
+		if (ifp->if_u1.if_data == NULL) {
+			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
+		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
+			ASSERT(ifp->if_real_bytes != 0);
+			memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
+			      new_size);
+			kmem_free(ifp->if_u1.if_data);
+			ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
+		}
+		real_size = 0;
+	} else {
+		/*
+		 * Stuck with malloc/realloc.
+		 * For inline data, the underlying buffer must be
+		 * a multiple of 4 bytes in size so that it can be
+		 * logged and stay on word boundaries.  We enforce
+		 * that here.
+		 */
+		real_size = roundup(new_size, 4);
+		if (ifp->if_u1.if_data == NULL) {
+			ASSERT(ifp->if_real_bytes == 0);
+			ifp->if_u1.if_data = kmem_alloc(real_size,
+							KM_SLEEP | KM_NOFS);
+		} else if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
+			/*
+			 * Only do the realloc if the underlying size
+			 * is really changing.
+			 */
+			if (ifp->if_real_bytes != real_size) {
+				ifp->if_u1.if_data =
+					kmem_realloc(ifp->if_u1.if_data,
+							real_size,
+							ifp->if_real_bytes,
+							KM_SLEEP | KM_NOFS);
+			}
+		} else {
+			ASSERT(ifp->if_real_bytes == 0);
+			ifp->if_u1.if_data = kmem_alloc(real_size,
+							KM_SLEEP | KM_NOFS);
+			memcpy(ifp->if_u1.if_data, ifp->if_u2.if_inline_data,
+				ifp->if_bytes);
+		}
+	}
+	ifp->if_real_bytes = real_size;
+	ifp->if_bytes = new_size;
+	ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
+}
+
+void
+xfs_idestroy_fork(
+	xfs_inode_t	*ip,
+	int		whichfork)
+{
+	xfs_ifork_t	*ifp;
+
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	if (ifp->if_broot != NULL) {
+		kmem_free(ifp->if_broot);
+		ifp->if_broot = NULL;
+	}
+
+	/*
+	 * If the format is local, then we can't have an extents
+	 * array so just look for an inline data array.  If we're
+	 * not local then we may or may not have an extents list,
+	 * so check and free it up if we do.
+	 */
+	if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
+		if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
+		    (ifp->if_u1.if_data != NULL)) {
+			ASSERT(ifp->if_real_bytes != 0);
+			kmem_free(ifp->if_u1.if_data);
+			ifp->if_u1.if_data = NULL;
+			ifp->if_real_bytes = 0;
+		}
+	} else if ((ifp->if_flags & XFS_IFEXTENTS) &&
+		   ((ifp->if_flags & XFS_IFEXTIREC) ||
+		    ((ifp->if_u1.if_extents != NULL) &&
+		     (ifp->if_u1.if_extents != ifp->if_u2.if_inline_ext)))) {
+		ASSERT(ifp->if_real_bytes != 0);
+		xfs_iext_destroy(ifp);
+	}
+	ASSERT(ifp->if_u1.if_extents == NULL ||
+	       ifp->if_u1.if_extents == ifp->if_u2.if_inline_ext);
+	ASSERT(ifp->if_real_bytes == 0);
+	if (whichfork == XFS_ATTR_FORK) {
+		kmem_zone_free(xfs_ifork_zone, ip->i_afp);
+		ip->i_afp = NULL;
+	}
+}
+
+/*
+ * xfs_iextents_copy()
+ *
+ * This is called to copy the REAL extents (as opposed to the delayed
+ * allocation extents) from the inode into the given buffer.  It
+ * returns the number of bytes copied into the buffer.
+ *
+ * If there are no delayed allocation extents, then we can just
+ * memcpy() the extents into the buffer.  Otherwise, we need to
+ * examine each extent in turn and skip those which are delayed.
+ */
+int
+xfs_iextents_copy(
+	xfs_inode_t		*ip,
+	xfs_bmbt_rec_t		*dp,
+	int			whichfork)
+{
+	int			copied;
+	int			i;
+	xfs_ifork_t		*ifp;
+	int			nrecs;
+	xfs_fsblock_t		start_block;
+
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
+	ASSERT(ifp->if_bytes > 0);
+
+	nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	XFS_BMAP_TRACE_EXLIST(ip, nrecs, whichfork);
+	ASSERT(nrecs > 0);
+
+	/*
+	 * There are some delayed allocation extents in the
+	 * inode, so copy the extents one at a time and skip
+	 * the delayed ones.  There must be at least one
+	 * non-delayed extent.
+	 */
+	copied = 0;
+	for (i = 0; i < nrecs; i++) {
+		xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i);
+		start_block = xfs_bmbt_get_startblock(ep);
+		if (isnullstartblock(start_block)) {
+			/*
+			 * It's a delayed allocation extent, so skip it.
+			 */
+			continue;
+		}
+
+		/* Translate to on disk format */
+		put_unaligned_be64(ep->l0, &dp->l0);
+		put_unaligned_be64(ep->l1, &dp->l1);
+		dp++;
+		copied++;
+	}
+	ASSERT(copied != 0);
+	xfs_validate_extents(ifp, copied, XFS_EXTFMT_INODE(ip));
+
+	return (copied * (uint)sizeof(xfs_bmbt_rec_t));
+}
+
+/*
+ * Each of the following cases stores data into the same region
+ * of the on-disk inode, so only one of them can be valid at
+ * any given time. While it is possible to have conflicting formats
+ * and log flags, e.g. having XFS_ILOG_?DATA set when the fork is
+ * in EXTENTS format, this can only happen when the fork has
+ * changed formats after being modified but before being flushed.
+ * In these cases, the format always takes precedence, because the
+ * format indicates the current state of the fork.
+ */
+void
+xfs_iflush_fork(
+	xfs_inode_t		*ip,
+	xfs_dinode_t		*dip,
+	xfs_inode_log_item_t	*iip,
+	int			whichfork,
+	xfs_buf_t		*bp)
+{
+	char			*cp;
+	xfs_ifork_t		*ifp;
+	xfs_mount_t		*mp;
+	static const short	brootflag[2] =
+		{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
+	static const short	dataflag[2] =
+		{ XFS_ILOG_DDATA, XFS_ILOG_ADATA };
+	static const short	extflag[2] =
+		{ XFS_ILOG_DEXT, XFS_ILOG_AEXT };
+
+	if (!iip)
+		return;
+	ifp = XFS_IFORK_PTR(ip, whichfork);
+	/*
+	 * This can happen if we gave up in iformat in an error path,
+	 * for the attribute fork.
+	 */
+	if (!ifp) {
+		ASSERT(whichfork == XFS_ATTR_FORK);
+		return;
+	}
+	cp = XFS_DFORK_PTR(dip, whichfork);
+	mp = ip->i_mount;
+	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
+	case XFS_DINODE_FMT_LOCAL:
+		if ((iip->ili_fields & dataflag[whichfork]) &&
+		    (ifp->if_bytes > 0)) {
+			ASSERT(ifp->if_u1.if_data != NULL);
+			ASSERT(ifp->if_bytes <= XFS_IFORK_SIZE(ip, whichfork));
+			memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes);
+		}
+		break;
+
+	case XFS_DINODE_FMT_EXTENTS:
+		ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
+		       !(iip->ili_fields & extflag[whichfork]));
+		if ((iip->ili_fields & extflag[whichfork]) &&
+		    (ifp->if_bytes > 0)) {
+			ASSERT(xfs_iext_get_ext(ifp, 0));
+			ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
+			(void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
+				whichfork);
+		}
+		break;
+
+	case XFS_DINODE_FMT_BTREE:
+		if ((iip->ili_fields & brootflag[whichfork]) &&
+		    (ifp->if_broot_bytes > 0)) {
+			ASSERT(ifp->if_broot != NULL);
+			ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <=
+			        XFS_IFORK_SIZE(ip, whichfork));
+			xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes,
+				(xfs_bmdr_block_t *)cp,
+				XFS_DFORK_SIZE(dip, mp, whichfork));
+		}
+		break;
+
+	case XFS_DINODE_FMT_DEV:
+		if (iip->ili_fields & XFS_ILOG_DEV) {
+			ASSERT(whichfork == XFS_DATA_FORK);
+			xfs_dinode_put_rdev(dip, ip->i_df.if_u2.if_rdev);
+		}
+		break;
+
+	case XFS_DINODE_FMT_UUID:
+		if (iip->ili_fields & XFS_ILOG_UUID) {
+			ASSERT(whichfork == XFS_DATA_FORK);
+			memcpy(XFS_DFORK_DPTR(dip),
+			       &ip->i_df.if_u2.if_uuid,
+			       sizeof(uuid_t));
+		}
+		break;
+
+	default:
+		ASSERT(0);
+		break;
+	}
+}
+
+/*
+ * Return a pointer to the extent record at file index idx.
+ */
+xfs_bmbt_rec_host_t *
+xfs_iext_get_ext(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	idx)		/* index of target extent */
+{
+	ASSERT(idx >= 0);
+	ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
+
+	if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
+		return ifp->if_u1.if_ext_irec->er_extbuf;
+	} else if (ifp->if_flags & XFS_IFEXTIREC) {
+		xfs_ext_irec_t	*erp;		/* irec pointer */
+		int		erp_idx = 0;	/* irec index */
+		xfs_extnum_t	page_idx = idx;	/* ext index in target list */
+
+		erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 0);
+		return &erp->er_extbuf[page_idx];
+	} else if (ifp->if_bytes) {
+		return &ifp->if_u1.if_extents[idx];
+	} else {
+		return NULL;
+	}
+}
+
+/*
+ * Insert new item(s) into the extent records for incore inode
+ * fork 'ifp'.  'count' new items are inserted at index 'idx'.
+ */
+void
+xfs_iext_insert(
+	xfs_inode_t	*ip,		/* incore inode pointer */
+	xfs_extnum_t	idx,		/* starting index of new items */
+	xfs_extnum_t	count,		/* number of inserted items */
+	xfs_bmbt_irec_t	*new,		/* items to insert */
+	int		state)		/* type of extent conversion */
+{
+	xfs_ifork_t	*ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
+	xfs_extnum_t	i;		/* extent record index */
+
+	trace_xfs_iext_insert(ip, idx, new, state, _RET_IP_);
+
+	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
+	xfs_iext_add(ifp, idx, count);
+	for (i = idx; i < idx + count; i++, new++)
+		xfs_bmbt_set_all(xfs_iext_get_ext(ifp, i), new);
+}
+
+/*
+ * This is called when the amount of space required for incore file
+ * extents needs to be increased. The ext_diff parameter stores the
+ * number of new extents being added and the idx parameter contains
+ * the extent index where the new extents will be added. If the new
+ * extents are being appended, then we just need to (re)allocate and
+ * initialize the space. Otherwise, if the new extents are being
+ * inserted into the middle of the existing entries, a bit more work
+ * is required to make room for the new extents to be inserted. The
+ * caller is responsible for filling in the new extent entries upon
+ * return.
+ */
+void
+xfs_iext_add(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	idx,		/* index to begin adding exts */
+	int		ext_diff)	/* number of extents to add */
+{
+	int		byte_diff;	/* new bytes being added */
+	int		new_size;	/* size of extents after adding */
+	xfs_extnum_t	nextents;	/* number of extents in file */
+
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	ASSERT((idx >= 0) && (idx <= nextents));
+	byte_diff = ext_diff * sizeof(xfs_bmbt_rec_t);
+	new_size = ifp->if_bytes + byte_diff;
+	/*
+	 * If the new number of extents (nextents + ext_diff)
+	 * fits inside the inode, then continue to use the inline
+	 * extent buffer.
+	 */
+	if (nextents + ext_diff <= XFS_INLINE_EXTS) {
+		if (idx < nextents) {
+			memmove(&ifp->if_u2.if_inline_ext[idx + ext_diff],
+				&ifp->if_u2.if_inline_ext[idx],
+				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
+			memset(&ifp->if_u2.if_inline_ext[idx], 0, byte_diff);
+		}
+		ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
+		ifp->if_real_bytes = 0;
+	}
+	/*
+	 * Otherwise use a linear (direct) extent list.
+	 * If the extents are currently inside the inode,
+	 * xfs_iext_realloc_direct will switch us from
+	 * inline to direct extent allocation mode.
+	 */
+	else if (nextents + ext_diff <= XFS_LINEAR_EXTS) {
+		xfs_iext_realloc_direct(ifp, new_size);
+		if (idx < nextents) {
+			memmove(&ifp->if_u1.if_extents[idx + ext_diff],
+				&ifp->if_u1.if_extents[idx],
+				(nextents - idx) * sizeof(xfs_bmbt_rec_t));
+			memset(&ifp->if_u1.if_extents[idx], 0, byte_diff);
+		}
+	}
+	/* Indirection array */
+	else {
+		xfs_ext_irec_t	*erp;
+		int		erp_idx = 0;
+		int		page_idx = idx;
+
+		ASSERT(nextents + ext_diff > XFS_LINEAR_EXTS);
+		if (ifp->if_flags & XFS_IFEXTIREC) {
+			erp = xfs_iext_idx_to_irec(ifp, &page_idx, &erp_idx, 1);
+		} else {
+			xfs_iext_irec_init(ifp);
+			ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+			erp = ifp->if_u1.if_ext_irec;
+		}
+		/* Extents fit in target extent page */
+		if (erp && erp->er_extcount + ext_diff <= XFS_LINEAR_EXTS) {
+			if (page_idx < erp->er_extcount) {
+				memmove(&erp->er_extbuf[page_idx + ext_diff],
+					&erp->er_extbuf[page_idx],
+					(erp->er_extcount - page_idx) *
+					sizeof(xfs_bmbt_rec_t));
+				memset(&erp->er_extbuf[page_idx], 0, byte_diff);
+			}
+			erp->er_extcount += ext_diff;
+			xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
+		}
+		/* Insert a new extent page */
+		else if (erp) {
+			xfs_iext_add_indirect_multi(ifp,
+				erp_idx, page_idx, ext_diff);
+		}
+		/*
+		 * If extent(s) are being appended to the last page in
+		 * the indirection array and the new extent(s) don't fit
+		 * in the page, then erp is NULL and erp_idx is set to
+		 * the next index needed in the indirection array.
+		 */
+		else {
+			int	count = ext_diff;
+
+			while (count) {
+				erp = xfs_iext_irec_new(ifp, erp_idx);
+				erp->er_extcount = count;
+				count -= MIN(count, (int)XFS_LINEAR_EXTS);
+				if (count) {
+					erp_idx++;
+				}
+			}
+		}
+	}
+	ifp->if_bytes = new_size;
+}
+
+/*
+ * This is called when incore extents are being added to the indirection
+ * array and the new extents do not fit in the target extent list. The
+ * erp_idx parameter contains the irec index for the target extent list
+ * in the indirection array, and the idx parameter contains the extent
+ * index within the list. The number of extents being added is stored
+ * in the count parameter.
+ *
+ *    |-------|   |-------|
+ *    |       |   |       |    idx - number of extents before idx
+ *    |  idx  |   | count |
+ *    |       |   |       |    count - number of extents being inserted at idx
+ *    |-------|   |-------|
+ *    | count |   | nex2  |    nex2 - number of extents after idx + count
+ *    |-------|   |-------|
+ */
+void
+xfs_iext_add_indirect_multi(
+	xfs_ifork_t	*ifp,			/* inode fork pointer */
+	int		erp_idx,		/* target extent irec index */
+	xfs_extnum_t	idx,			/* index within target list */
+	int		count)			/* new extents being added */
+{
+	int		byte_diff;		/* new bytes being added */
+	xfs_ext_irec_t	*erp;			/* pointer to irec entry */
+	xfs_extnum_t	ext_diff;		/* number of extents to add */
+	xfs_extnum_t	ext_cnt;		/* new extents still needed */
+	xfs_extnum_t	nex2;			/* extents after idx + count */
+	xfs_bmbt_rec_t	*nex2_ep = NULL;	/* temp list for nex2 extents */
+	int		nlists;			/* number of irec's (lists) */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	erp = &ifp->if_u1.if_ext_irec[erp_idx];
+	nex2 = erp->er_extcount - idx;
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+
+	/*
+	 * Save second part of target extent list
+	 * (all extents past */
+	if (nex2) {
+		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
+		nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS);
+		memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff);
+		erp->er_extcount -= nex2;
+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2);
+		memset(&erp->er_extbuf[idx], 0, byte_diff);
+	}
+
+	/*
+	 * Add the new extents to the end of the target
+	 * list, then allocate new irec record(s) and
+	 * extent buffer(s) as needed to store the rest
+	 * of the new extents.
+	 */
+	ext_cnt = count;
+	ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS - erp->er_extcount);
+	if (ext_diff) {
+		erp->er_extcount += ext_diff;
+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
+		ext_cnt -= ext_diff;
+	}
+	while (ext_cnt) {
+		erp_idx++;
+		erp = xfs_iext_irec_new(ifp, erp_idx);
+		ext_diff = MIN(ext_cnt, (int)XFS_LINEAR_EXTS);
+		erp->er_extcount = ext_diff;
+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, ext_diff);
+		ext_cnt -= ext_diff;
+	}
+
+	/* Add nex2 extents back to indirection array */
+	if (nex2) {
+		xfs_extnum_t	ext_avail;
+		int		i;
+
+		byte_diff = nex2 * sizeof(xfs_bmbt_rec_t);
+		ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
+		i = 0;
+		/*
+		 * If nex2 extents fit in the current page, append
+		 * nex2_ep after the new extents.
+		 */
+		if (nex2 <= ext_avail) {
+			i = erp->er_extcount;
+		}
+		/*
+		 * Otherwise, check if space is available in the
+		 * next page.
+		 */
+		else if ((erp_idx < nlists - 1) &&
+			 (nex2 <= (ext_avail = XFS_LINEAR_EXTS -
+			  ifp->if_u1.if_ext_irec[erp_idx+1].er_extcount))) {
+			erp_idx++;
+			erp++;
+			/* Create a hole for nex2 extents */
+			memmove(&erp->er_extbuf[nex2], erp->er_extbuf,
+				erp->er_extcount * sizeof(xfs_bmbt_rec_t));
+		}
+		/*
+		 * Final choice, create a new extent page for
+		 * nex2 extents.
+		 */
+		else {
+			erp_idx++;
+			erp = xfs_iext_irec_new(ifp, erp_idx);
+		}
+		memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
+		kmem_free(nex2_ep);
+		erp->er_extcount += nex2;
+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
+	}
+}
+
+/*
+ * This is called when the amount of space required for incore file
+ * extents needs to be decreased. The ext_diff parameter stores the
+ * number of extents to be removed and the idx parameter contains
+ * the extent index where the extents will be removed from.
+ *
+ * If the amount of space needed has decreased below the linear
+ * limit, XFS_IEXT_BUFSZ, then switch to using the contiguous
+ * extent array.  Otherwise, use kmem_realloc() to adjust the
+ * size to what is needed.
+ */
+void
+xfs_iext_remove(
+	xfs_inode_t	*ip,		/* incore inode pointer */
+	xfs_extnum_t	idx,		/* index to begin removing exts */
+	int		ext_diff,	/* number of extents to remove */
+	int		state)		/* type of extent conversion */
+{
+	xfs_ifork_t	*ifp = (state & BMAP_ATTRFORK) ? ip->i_afp : &ip->i_df;
+	xfs_extnum_t	nextents;	/* number of extents in file */
+	int		new_size;	/* size of extents after removal */
+
+	trace_xfs_iext_remove(ip, idx, state, _RET_IP_);
+
+	ASSERT(ext_diff > 0);
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	new_size = (nextents - ext_diff) * sizeof(xfs_bmbt_rec_t);
+
+	if (new_size == 0) {
+		xfs_iext_destroy(ifp);
+	} else if (ifp->if_flags & XFS_IFEXTIREC) {
+		xfs_iext_remove_indirect(ifp, idx, ext_diff);
+	} else if (ifp->if_real_bytes) {
+		xfs_iext_remove_direct(ifp, idx, ext_diff);
+	} else {
+		xfs_iext_remove_inline(ifp, idx, ext_diff);
+	}
+	ifp->if_bytes = new_size;
+}
+
+/*
+ * This removes ext_diff extents from the inline buffer, beginning
+ * at extent index idx.
+ */
+void
+xfs_iext_remove_inline(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	idx,		/* index to begin removing exts */
+	int		ext_diff)	/* number of extents to remove */
+{
+	int		nextents;	/* number of extents in file */
+
+	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
+	ASSERT(idx < XFS_INLINE_EXTS);
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	ASSERT(((nextents - ext_diff) > 0) &&
+		(nextents - ext_diff) < XFS_INLINE_EXTS);
+
+	if (idx + ext_diff < nextents) {
+		memmove(&ifp->if_u2.if_inline_ext[idx],
+			&ifp->if_u2.if_inline_ext[idx + ext_diff],
+			(nextents - (idx + ext_diff)) *
+			 sizeof(xfs_bmbt_rec_t));
+		memset(&ifp->if_u2.if_inline_ext[nextents - ext_diff],
+			0, ext_diff * sizeof(xfs_bmbt_rec_t));
+	} else {
+		memset(&ifp->if_u2.if_inline_ext[idx], 0,
+			ext_diff * sizeof(xfs_bmbt_rec_t));
+	}
+}
+
+/*
+ * This removes ext_diff extents from a linear (direct) extent list,
+ * beginning at extent index idx. If the extents are being removed
+ * from the end of the list (ie. truncate) then we just need to re-
+ * allocate the list to remove the extra space. Otherwise, if the
+ * extents are being removed from the middle of the existing extent
+ * entries, then we first need to move the extent records beginning
+ * at idx + ext_diff up in the list to overwrite the records being
+ * removed, then remove the extra space via kmem_realloc.
+ */
+void
+xfs_iext_remove_direct(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	idx,		/* index to begin removing exts */
+	int		ext_diff)	/* number of extents to remove */
+{
+	xfs_extnum_t	nextents;	/* number of extents in file */
+	int		new_size;	/* size of extents after removal */
+
+	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
+	new_size = ifp->if_bytes -
+		(ext_diff * sizeof(xfs_bmbt_rec_t));
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+
+	if (new_size == 0) {
+		xfs_iext_destroy(ifp);
+		return;
+	}
+	/* Move extents up in the list (if needed) */
+	if (idx + ext_diff < nextents) {
+		memmove(&ifp->if_u1.if_extents[idx],
+			&ifp->if_u1.if_extents[idx + ext_diff],
+			(nextents - (idx + ext_diff)) *
+			 sizeof(xfs_bmbt_rec_t));
+	}
+	memset(&ifp->if_u1.if_extents[nextents - ext_diff],
+		0, ext_diff * sizeof(xfs_bmbt_rec_t));
+	/*
+	 * Reallocate the direct extent list. If the extents
+	 * will fit inside the inode then xfs_iext_realloc_direct
+	 * will switch from direct to inline extent allocation
+	 * mode for us.
+	 */
+	xfs_iext_realloc_direct(ifp, new_size);
+	ifp->if_bytes = new_size;
+}
+
+/*
+ * This is called when incore extents are being removed from the
+ * indirection array and the extents being removed span multiple extent
+ * buffers. The idx parameter contains the file extent index where we
+ * want to begin removing extents, and the count parameter contains
+ * how many extents need to be removed.
+ *
+ *    |-------|   |-------|
+ *    | nex1  |   |       |    nex1 - number of extents before idx
+ *    |-------|   | count |
+ *    |       |   |       |    count - number of extents being removed at idx
+ *    | count |   |-------|
+ *    |       |   | nex2  |    nex2 - number of extents after idx + count
+ *    |-------|   |-------|
+ */
+void
+xfs_iext_remove_indirect(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	idx,		/* index to begin removing extents */
+	int		count)		/* number of extents to remove */
+{
+	xfs_ext_irec_t	*erp;		/* indirection array pointer */
+	int		erp_idx = 0;	/* indirection array index */
+	xfs_extnum_t	ext_cnt;	/* extents left to remove */
+	xfs_extnum_t	ext_diff;	/* extents to remove in current list */
+	xfs_extnum_t	nex1;		/* number of extents before idx */
+	xfs_extnum_t	nex2;		/* extents after idx + count */
+	int		page_idx = idx;	/* index in target extent list */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	erp = xfs_iext_idx_to_irec(ifp,  &page_idx, &erp_idx, 0);
+	ASSERT(erp != NULL);
+	nex1 = page_idx;
+	ext_cnt = count;
+	while (ext_cnt) {
+		nex2 = MAX((erp->er_extcount - (nex1 + ext_cnt)), 0);
+		ext_diff = MIN(ext_cnt, (erp->er_extcount - nex1));
+		/*
+		 * Check for deletion of entire list;
+		 * xfs_iext_irec_remove() updates extent offsets.
+		 */
+		if (ext_diff == erp->er_extcount) {
+			xfs_iext_irec_remove(ifp, erp_idx);
+			ext_cnt -= ext_diff;
+			nex1 = 0;
+			if (ext_cnt) {
+				ASSERT(erp_idx < ifp->if_real_bytes /
+					XFS_IEXT_BUFSZ);
+				erp = &ifp->if_u1.if_ext_irec[erp_idx];
+				nex1 = 0;
+				continue;
+			} else {
+				break;
+			}
+		}
+		/* Move extents up (if needed) */
+		if (nex2) {
+			memmove(&erp->er_extbuf[nex1],
+				&erp->er_extbuf[nex1 + ext_diff],
+				nex2 * sizeof(xfs_bmbt_rec_t));
+		}
+		/* Zero out rest of page */
+		memset(&erp->er_extbuf[nex1 + nex2], 0, (XFS_IEXT_BUFSZ -
+			((nex1 + nex2) * sizeof(xfs_bmbt_rec_t))));
+		/* Update remaining counters */
+		erp->er_extcount -= ext_diff;
+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -ext_diff);
+		ext_cnt -= ext_diff;
+		nex1 = 0;
+		erp_idx++;
+		erp++;
+	}
+	ifp->if_bytes -= count * sizeof(xfs_bmbt_rec_t);
+	xfs_iext_irec_compact(ifp);
+}
+
+/*
+ * Create, destroy, or resize a linear (direct) block of extents.
+ */
+void
+xfs_iext_realloc_direct(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	int		new_size)	/* new size of extents */
+{
+	int		rnew_size;	/* real new size of extents */
+
+	rnew_size = new_size;
+
+	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC) ||
+		((new_size >= 0) && (new_size <= XFS_IEXT_BUFSZ) &&
+		 (new_size != ifp->if_real_bytes)));
+
+	/* Free extent records */
+	if (new_size == 0) {
+		xfs_iext_destroy(ifp);
+	}
+	/* Resize direct extent list and zero any new bytes */
+	else if (ifp->if_real_bytes) {
+		/* Check if extents will fit inside the inode */
+		if (new_size <= XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)) {
+			xfs_iext_direct_to_inline(ifp, new_size /
+				(uint)sizeof(xfs_bmbt_rec_t));
+			ifp->if_bytes = new_size;
+			return;
+		}
+		if (!is_power_of_2(new_size)){
+			rnew_size = roundup_pow_of_two(new_size);
+		}
+		if (rnew_size != ifp->if_real_bytes) {
+			ifp->if_u1.if_extents =
+				kmem_realloc(ifp->if_u1.if_extents,
+						rnew_size,
+						ifp->if_real_bytes, KM_NOFS);
+		}
+		if (rnew_size > ifp->if_real_bytes) {
+			memset(&ifp->if_u1.if_extents[ifp->if_bytes /
+				(uint)sizeof(xfs_bmbt_rec_t)], 0,
+				rnew_size - ifp->if_real_bytes);
+		}
+	}
+	/*
+	 * Switch from the inline extent buffer to a direct
+	 * extent list. Be sure to include the inline extent
+	 * bytes in new_size.
+	 */
+	else {
+		new_size += ifp->if_bytes;
+		if (!is_power_of_2(new_size)) {
+			rnew_size = roundup_pow_of_two(new_size);
+		}
+		xfs_iext_inline_to_direct(ifp, rnew_size);
+	}
+	ifp->if_real_bytes = rnew_size;
+	ifp->if_bytes = new_size;
+}
+
+/*
+ * Switch from linear (direct) extent records to inline buffer.
+ */
+void
+xfs_iext_direct_to_inline(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	nextents)	/* number of extents in file */
+{
+	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
+	ASSERT(nextents <= XFS_INLINE_EXTS);
+	/*
+	 * The inline buffer was zeroed when we switched
+	 * from inline to direct extent allocation mode,
+	 * so we don't need to clear it here.
+	 */
+	memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
+		nextents * sizeof(xfs_bmbt_rec_t));
+	kmem_free(ifp->if_u1.if_extents);
+	ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
+	ifp->if_real_bytes = 0;
+}
+
+/*
+ * Switch from inline buffer to linear (direct) extent records.
+ * new_size should already be rounded up to the next power of 2
+ * by the caller (when appropriate), so use new_size as it is.
+ * However, since new_size may be rounded up, we can't update
+ * if_bytes here. It is the caller's responsibility to update
+ * if_bytes upon return.
+ */
+void
+xfs_iext_inline_to_direct(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	int		new_size)	/* number of extents in file */
+{
+	ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS);
+	memset(ifp->if_u1.if_extents, 0, new_size);
+	if (ifp->if_bytes) {
+		memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext,
+			ifp->if_bytes);
+		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
+			sizeof(xfs_bmbt_rec_t));
+	}
+	ifp->if_real_bytes = new_size;
+}
+
+/*
+ * Resize an extent indirection array to new_size bytes.
+ */
+STATIC void
+xfs_iext_realloc_indirect(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	int		new_size)	/* new indirection array size */
+{
+	int		nlists;		/* number of irec's (ex lists) */
+	int		size;		/* current indirection array size */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	size = nlists * sizeof(xfs_ext_irec_t);
+	ASSERT(ifp->if_real_bytes);
+	ASSERT((new_size >= 0) && (new_size != size));
+	if (new_size == 0) {
+		xfs_iext_destroy(ifp);
+	} else {
+		ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *)
+			kmem_realloc(ifp->if_u1.if_ext_irec,
+				new_size, size, KM_NOFS);
+	}
+}
+
+/*
+ * Switch from indirection array to linear (direct) extent allocations.
+ */
+STATIC void
+xfs_iext_indirect_to_direct(
+	 xfs_ifork_t	*ifp)		/* inode fork pointer */
+{
+	xfs_bmbt_rec_host_t *ep;	/* extent record pointer */
+	xfs_extnum_t	nextents;	/* number of extents in file */
+	int		size;		/* size of file extents */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	ASSERT(nextents <= XFS_LINEAR_EXTS);
+	size = nextents * sizeof(xfs_bmbt_rec_t);
+
+	xfs_iext_irec_compact_pages(ifp);
+	ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
+
+	ep = ifp->if_u1.if_ext_irec->er_extbuf;
+	kmem_free(ifp->if_u1.if_ext_irec);
+	ifp->if_flags &= ~XFS_IFEXTIREC;
+	ifp->if_u1.if_extents = ep;
+	ifp->if_bytes = size;
+	if (nextents < XFS_LINEAR_EXTS) {
+		xfs_iext_realloc_direct(ifp, size);
+	}
+}
+
+/*
+ * Free incore file extents.
+ */
+void
+xfs_iext_destroy(
+	xfs_ifork_t	*ifp)		/* inode fork pointer */
+{
+	if (ifp->if_flags & XFS_IFEXTIREC) {
+		int	erp_idx;
+		int	nlists;
+
+		nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+		for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
+			xfs_iext_irec_remove(ifp, erp_idx);
+		}
+		ifp->if_flags &= ~XFS_IFEXTIREC;
+	} else if (ifp->if_real_bytes) {
+		kmem_free(ifp->if_u1.if_extents);
+	} else if (ifp->if_bytes) {
+		memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
+			sizeof(xfs_bmbt_rec_t));
+	}
+	ifp->if_u1.if_extents = NULL;
+	ifp->if_real_bytes = 0;
+	ifp->if_bytes = 0;
+}
+
+/*
+ * Return a pointer to the extent record for file system block bno.
+ */
+xfs_bmbt_rec_host_t *			/* pointer to found extent record */
+xfs_iext_bno_to_ext(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_fileoff_t	bno,		/* block number to search for */
+	xfs_extnum_t	*idxp)		/* index of target extent */
+{
+	xfs_bmbt_rec_host_t *base;	/* pointer to first extent */
+	xfs_filblks_t	blockcount = 0;	/* number of blocks in extent */
+	xfs_bmbt_rec_host_t *ep = NULL;	/* pointer to target extent */
+	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
+	int		high;		/* upper boundary in search */
+	xfs_extnum_t	idx = 0;	/* index of target extent */
+	int		low;		/* lower boundary in search */
+	xfs_extnum_t	nextents;	/* number of file extents */
+	xfs_fileoff_t	startoff = 0;	/* start offset of extent */
+
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	if (nextents == 0) {
+		*idxp = 0;
+		return NULL;
+	}
+	low = 0;
+	if (ifp->if_flags & XFS_IFEXTIREC) {
+		/* Find target extent list */
+		int	erp_idx = 0;
+		erp = xfs_iext_bno_to_irec(ifp, bno, &erp_idx);
+		base = erp->er_extbuf;
+		high = erp->er_extcount - 1;
+	} else {
+		base = ifp->if_u1.if_extents;
+		high = nextents - 1;
+	}
+	/* Binary search extent records */
+	while (low <= high) {
+		idx = (low + high) >> 1;
+		ep = base + idx;
+		startoff = xfs_bmbt_get_startoff(ep);
+		blockcount = xfs_bmbt_get_blockcount(ep);
+		if (bno < startoff) {
+			high = idx - 1;
+		} else if (bno >= startoff + blockcount) {
+			low = idx + 1;
+		} else {
+			/* Convert back to file-based extent index */
+			if (ifp->if_flags & XFS_IFEXTIREC) {
+				idx += erp->er_extoff;
+			}
+			*idxp = idx;
+			return ep;
+		}
+	}
+	/* Convert back to file-based extent index */
+	if (ifp->if_flags & XFS_IFEXTIREC) {
+		idx += erp->er_extoff;
+	}
+	if (bno >= startoff + blockcount) {
+		if (++idx == nextents) {
+			ep = NULL;
+		} else {
+			ep = xfs_iext_get_ext(ifp, idx);
+		}
+	}
+	*idxp = idx;
+	return ep;
+}
+
+/*
+ * Return a pointer to the indirection array entry containing the
+ * extent record for filesystem block bno. Store the index of the
+ * target irec in *erp_idxp.
+ */
+xfs_ext_irec_t *			/* pointer to found extent record */
+xfs_iext_bno_to_irec(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_fileoff_t	bno,		/* block number to search for */
+	int		*erp_idxp)	/* irec index of target ext list */
+{
+	xfs_ext_irec_t	*erp = NULL;	/* indirection array pointer */
+	xfs_ext_irec_t	*erp_next;	/* next indirection array entry */
+	int		erp_idx;	/* indirection array index */
+	int		nlists;		/* number of extent irec's (lists) */
+	int		high;		/* binary search upper limit */
+	int		low;		/* binary search lower limit */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	erp_idx = 0;
+	low = 0;
+	high = nlists - 1;
+	while (low <= high) {
+		erp_idx = (low + high) >> 1;
+		erp = &ifp->if_u1.if_ext_irec[erp_idx];
+		erp_next = erp_idx < nlists - 1 ? erp + 1 : NULL;
+		if (bno < xfs_bmbt_get_startoff(erp->er_extbuf)) {
+			high = erp_idx - 1;
+		} else if (erp_next && bno >=
+			   xfs_bmbt_get_startoff(erp_next->er_extbuf)) {
+			low = erp_idx + 1;
+		} else {
+			break;
+		}
+	}
+	*erp_idxp = erp_idx;
+	return erp;
+}
+
+/*
+ * Return a pointer to the indirection array entry containing the
+ * extent record at file extent index *idxp. Store the index of the
+ * target irec in *erp_idxp and store the page index of the target
+ * extent record in *idxp.
+ */
+xfs_ext_irec_t *
+xfs_iext_idx_to_irec(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	xfs_extnum_t	*idxp,		/* extent index (file -> page) */
+	int		*erp_idxp,	/* pointer to target irec */
+	int		realloc)	/* new bytes were just added */
+{
+	xfs_ext_irec_t	*prev;		/* pointer to previous irec */
+	xfs_ext_irec_t	*erp = NULL;	/* pointer to current irec */
+	int		erp_idx;	/* indirection array index */
+	int		nlists;		/* number of irec's (ex lists) */
+	int		high;		/* binary search upper limit */
+	int		low;		/* binary search lower limit */
+	xfs_extnum_t	page_idx = *idxp; /* extent index in target list */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	ASSERT(page_idx >= 0);
+	ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
+	ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
+
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	erp_idx = 0;
+	low = 0;
+	high = nlists - 1;
+
+	/* Binary search extent irec's */
+	while (low <= high) {
+		erp_idx = (low + high) >> 1;
+		erp = &ifp->if_u1.if_ext_irec[erp_idx];
+		prev = erp_idx > 0 ? erp - 1 : NULL;
+		if (page_idx < erp->er_extoff || (page_idx == erp->er_extoff &&
+		     realloc && prev && prev->er_extcount < XFS_LINEAR_EXTS)) {
+			high = erp_idx - 1;
+		} else if (page_idx > erp->er_extoff + erp->er_extcount ||
+			   (page_idx == erp->er_extoff + erp->er_extcount &&
+			    !realloc)) {
+			low = erp_idx + 1;
+		} else if (page_idx == erp->er_extoff + erp->er_extcount &&
+			   erp->er_extcount == XFS_LINEAR_EXTS) {
+			ASSERT(realloc);
+			page_idx = 0;
+			erp_idx++;
+			erp = erp_idx < nlists ? erp + 1 : NULL;
+			break;
+		} else {
+			page_idx -= erp->er_extoff;
+			break;
+		}
+	}
+	*idxp = page_idx;
+	*erp_idxp = erp_idx;
+	return(erp);
+}
+
+/*
+ * Allocate and initialize an indirection array once the space needed
+ * for incore extents increases above XFS_IEXT_BUFSZ.
+ */
+void
+xfs_iext_irec_init(
+	xfs_ifork_t	*ifp)		/* inode fork pointer */
+{
+	xfs_ext_irec_t	*erp;		/* indirection array pointer */
+	xfs_extnum_t	nextents;	/* number of extents in file */
+
+	ASSERT(!(ifp->if_flags & XFS_IFEXTIREC));
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+	ASSERT(nextents <= XFS_LINEAR_EXTS);
+
+	erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS);
+
+	if (nextents == 0) {
+		ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
+	} else if (!ifp->if_real_bytes) {
+		xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ);
+	} else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) {
+		xfs_iext_realloc_direct(ifp, XFS_IEXT_BUFSZ);
+	}
+	erp->er_extbuf = ifp->if_u1.if_extents;
+	erp->er_extcount = nextents;
+	erp->er_extoff = 0;
+
+	ifp->if_flags |= XFS_IFEXTIREC;
+	ifp->if_real_bytes = XFS_IEXT_BUFSZ;
+	ifp->if_bytes = nextents * sizeof(xfs_bmbt_rec_t);
+	ifp->if_u1.if_ext_irec = erp;
+
+	return;
+}
+
+/*
+ * Allocate and initialize a new entry in the indirection array.
+ */
+xfs_ext_irec_t *
+xfs_iext_irec_new(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	int		erp_idx)	/* index for new irec */
+{
+	xfs_ext_irec_t	*erp;		/* indirection array pointer */
+	int		i;		/* loop counter */
+	int		nlists;		/* number of irec's (ex lists) */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+
+	/* Resize indirection array */
+	xfs_iext_realloc_indirect(ifp, ++nlists *
+				  sizeof(xfs_ext_irec_t));
+	/*
+	 * Move records down in the array so the
+	 * new page can use erp_idx.
+	 */
+	erp = ifp->if_u1.if_ext_irec;
+	for (i = nlists - 1; i > erp_idx; i--) {
+		memmove(&erp[i], &erp[i-1], sizeof(xfs_ext_irec_t));
+	}
+	ASSERT(i == erp_idx);
+
+	/* Initialize new extent record */
+	erp = ifp->if_u1.if_ext_irec;
+	erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS);
+	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
+	memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ);
+	erp[erp_idx].er_extcount = 0;
+	erp[erp_idx].er_extoff = erp_idx > 0 ?
+		erp[erp_idx-1].er_extoff + erp[erp_idx-1].er_extcount : 0;
+	return (&erp[erp_idx]);
+}
+
+/*
+ * Remove a record from the indirection array.
+ */
+void
+xfs_iext_irec_remove(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	int		erp_idx)	/* irec index to remove */
+{
+	xfs_ext_irec_t	*erp;		/* indirection array pointer */
+	int		i;		/* loop counter */
+	int		nlists;		/* number of irec's (ex lists) */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	erp = &ifp->if_u1.if_ext_irec[erp_idx];
+	if (erp->er_extbuf) {
+		xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
+			-erp->er_extcount);
+		kmem_free(erp->er_extbuf);
+	}
+	/* Compact extent records */
+	erp = ifp->if_u1.if_ext_irec;
+	for (i = erp_idx; i < nlists - 1; i++) {
+		memmove(&erp[i], &erp[i+1], sizeof(xfs_ext_irec_t));
+	}
+	/*
+	 * Manually free the last extent record from the indirection
+	 * array.  A call to xfs_iext_realloc_indirect() with a size
+	 * of zero would result in a call to xfs_iext_destroy() which
+	 * would in turn call this function again, creating a nasty
+	 * infinite loop.
+	 */
+	if (--nlists) {
+		xfs_iext_realloc_indirect(ifp,
+			nlists * sizeof(xfs_ext_irec_t));
+	} else {
+		kmem_free(ifp->if_u1.if_ext_irec);
+	}
+	ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
+}
+
+/*
+ * This is called to clean up large amounts of unused memory allocated
+ * by the indirection array.  Before compacting anything though, verify
+ * that the indirection array is still needed and switch back to the
+ * linear extent list (or even the inline buffer) if possible.  The
+ * compaction policy is as follows:
+ *
+ *    Full Compaction: Extents fit into a single page (or inline buffer)
+ * Partial Compaction: Extents occupy less than 50% of allocated space
+ *      No Compaction: Extents occupy at least 50% of allocated space
+ */
+void
+xfs_iext_irec_compact(
+	xfs_ifork_t	*ifp)		/* inode fork pointer */
+{
+	xfs_extnum_t	nextents;	/* number of extents in file */
+	int		nlists;		/* number of irec's (ex lists) */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+
+	if (nextents == 0) {
+		xfs_iext_destroy(ifp);
+	} else if (nextents <= XFS_INLINE_EXTS) {
+		xfs_iext_indirect_to_direct(ifp);
+		xfs_iext_direct_to_inline(ifp, nextents);
+	} else if (nextents <= XFS_LINEAR_EXTS) {
+		xfs_iext_indirect_to_direct(ifp);
+	} else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) {
+		xfs_iext_irec_compact_pages(ifp);
+	}
+}
+
+/*
+ * Combine extents from neighboring extent pages.
+ */
+void
+xfs_iext_irec_compact_pages(
+	xfs_ifork_t	*ifp)		/* inode fork pointer */
+{
+	xfs_ext_irec_t	*erp, *erp_next;/* pointers to irec entries */
+	int		erp_idx = 0;	/* indirection array index */
+	int		nlists;		/* number of irec's (ex lists) */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	while (erp_idx < nlists - 1) {
+		erp = &ifp->if_u1.if_ext_irec[erp_idx];
+		erp_next = erp + 1;
+		if (erp_next->er_extcount <=
+		    (XFS_LINEAR_EXTS - erp->er_extcount)) {
+			memcpy(&erp->er_extbuf[erp->er_extcount],
+				erp_next->er_extbuf, erp_next->er_extcount *
+				sizeof(xfs_bmbt_rec_t));
+			erp->er_extcount += erp_next->er_extcount;
+			/*
+			 * Free page before removing extent record
+			 * so er_extoffs don't get modified in
+			 * xfs_iext_irec_remove.
+			 */
+			kmem_free(erp_next->er_extbuf);
+			erp_next->er_extbuf = NULL;
+			xfs_iext_irec_remove(ifp, erp_idx + 1);
+			nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+		} else {
+			erp_idx++;
+		}
+	}
+}
+
+/*
+ * This is called to update the er_extoff field in the indirection
+ * array when extents have been added or removed from one of the
+ * extent lists. erp_idx contains the irec index to begin updating
+ * at and ext_diff contains the number of extents that were added
+ * or removed.
+ */
+void
+xfs_iext_irec_update_extoffs(
+	xfs_ifork_t	*ifp,		/* inode fork pointer */
+	int		erp_idx,	/* irec index to update */
+	int		ext_diff)	/* number of new extents */
+{
+	int		i;		/* loop counter */
+	int		nlists;		/* number of irec's (ex lists */
+
+	ASSERT(ifp->if_flags & XFS_IFEXTIREC);
+	nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
+	for (i = erp_idx; i < nlists; i++) {
+		ifp->if_u1.if_ext_irec[i].er_extoff += ext_diff;
+	}
+}
diff --git a/fs/xfs/xfs_inode_fork.h b/fs/xfs/xfs_inode_fork.h
new file mode 100644
index 0000000..28661a0
--- /dev/null
+++ b/fs/xfs/xfs_inode_fork.h
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef	__XFS_INODE_FORK_H__
+#define	__XFS_INODE_FORK_H__
+
+struct xfs_inode_log_item;
+
+/*
+ * The following xfs_ext_irec_t struct introduces a second (top) level
+ * to the in-core extent allocation scheme. These structs are allocated
+ * in a contiguous block, creating an indirection array where each entry
+ * (irec) contains a pointer to a buffer of in-core extent records which
+ * it manages. Each extent buffer is 4k in size, since 4k is the system
+ * page size on Linux i386 and systems with larger page sizes don't seem
+ * to gain much, if anything, by using their native page size as the
+ * extent buffer size. Also, using 4k extent buffers everywhere provides
+ * a consistent interface for CXFS across different platforms.
+ *
+ * There is currently no limit on the number of irec's (extent lists)
+ * allowed, so heavily fragmented files may require an indirection array
+ * which spans multiple system pages of memory. The number of extents
+ * which would require this amount of contiguous memory is very large
+ * and should not cause problems in the foreseeable future. However,
+ * if the memory needed for the contiguous array ever becomes a problem,
+ * it is possible that a third level of indirection may be required.
+ */
+typedef struct xfs_ext_irec {
+	xfs_bmbt_rec_host_t *er_extbuf;	/* block of extent records */
+	xfs_extnum_t	er_extoff;	/* extent offset in file */
+	xfs_extnum_t	er_extcount;	/* number of extents in page/block */
+} xfs_ext_irec_t;
+
+/*
+ * File incore extent information, present for each of data & attr forks.
+ */
+#define	XFS_IEXT_BUFSZ		4096
+#define	XFS_LINEAR_EXTS		(XFS_IEXT_BUFSZ / (uint)sizeof(xfs_bmbt_rec_t))
+#define	XFS_INLINE_EXTS		2
+#define	XFS_INLINE_DATA		32
+typedef struct xfs_ifork {
+	int			if_bytes;	/* bytes in if_u1 */
+	int			if_real_bytes;	/* bytes allocated in if_u1 */
+	struct xfs_btree_block	*if_broot;	/* file's incore btree root */
+	short			if_broot_bytes;	/* bytes allocated for root */
+	unsigned char		if_flags;	/* per-fork flags */
+	union {
+		xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */
+		xfs_ext_irec_t	*if_ext_irec;	/* irec map file exts */
+		char		*if_data;	/* inline file data */
+	} if_u1;
+	union {
+		xfs_bmbt_rec_host_t if_inline_ext[XFS_INLINE_EXTS];
+						/* very small file extents */
+		char		if_inline_data[XFS_INLINE_DATA];
+						/* very small file data */
+		xfs_dev_t	if_rdev;	/* dev number if special */
+		uuid_t		if_uuid;	/* mount point value */
+	} if_u2;
+} xfs_ifork_t;
+
+/*
+ * Per-fork incore inode flags.
+ */
+#define	XFS_IFINLINE	0x01	/* Inline data is read in */
+#define	XFS_IFEXTENTS	0x02	/* All extent pointers are read in */
+#define	XFS_IFBROOT	0x04	/* i_broot points to the bmap b-tree root */
+#define	XFS_IFEXTIREC	0x08	/* Indirection array of extent blocks */
+
+/*
+ * Fork handling.
+ */
+
+#define XFS_IFORK_Q(ip)			((ip)->i_d.di_forkoff != 0)
+#define XFS_IFORK_BOFF(ip)		((int)((ip)->i_d.di_forkoff << 3))
+
+#define XFS_IFORK_PTR(ip,w)		\
+	((w) == XFS_DATA_FORK ? \
+		&(ip)->i_df : \
+		(ip)->i_afp)
+#define XFS_IFORK_DSIZE(ip) \
+	(XFS_IFORK_Q(ip) ? \
+		XFS_IFORK_BOFF(ip) : \
+		XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version))
+#define XFS_IFORK_ASIZE(ip) \
+	(XFS_IFORK_Q(ip) ? \
+		XFS_LITINO((ip)->i_mount, (ip)->i_d.di_version) - \
+			XFS_IFORK_BOFF(ip) : \
+		0)
+#define XFS_IFORK_SIZE(ip,w) \
+	((w) == XFS_DATA_FORK ? \
+		XFS_IFORK_DSIZE(ip) : \
+		XFS_IFORK_ASIZE(ip))
+#define XFS_IFORK_FORMAT(ip,w) \
+	((w) == XFS_DATA_FORK ? \
+		(ip)->i_d.di_format : \
+		(ip)->i_d.di_aformat)
+#define XFS_IFORK_FMT_SET(ip,w,n) \
+	((w) == XFS_DATA_FORK ? \
+		((ip)->i_d.di_format = (n)) : \
+		((ip)->i_d.di_aformat = (n)))
+#define XFS_IFORK_NEXTENTS(ip,w) \
+	((w) == XFS_DATA_FORK ? \
+		(ip)->i_d.di_nextents : \
+		(ip)->i_d.di_anextents)
+#define XFS_IFORK_NEXT_SET(ip,w,n) \
+	((w) == XFS_DATA_FORK ? \
+		((ip)->i_d.di_nextents = (n)) : \
+		((ip)->i_d.di_anextents = (n)))
+#define XFS_IFORK_MAXEXT(ip, w) \
+	(XFS_IFORK_SIZE(ip, w) / sizeof(xfs_bmbt_rec_t))
+
+int		xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
+void		xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
+				struct xfs_inode_log_item *, int,
+				struct xfs_buf *);
+void		xfs_idestroy_fork(struct xfs_inode *, int);
+void		xfs_idata_realloc(struct xfs_inode *, int, int);
+void		xfs_iroot_realloc(struct xfs_inode *, int, int);
+int		xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int);
+int		xfs_iextents_copy(struct xfs_inode *, struct xfs_bmbt_rec *,
+				  int);
+
+struct xfs_bmbt_rec_host *
+		xfs_iext_get_ext(struct xfs_ifork *, xfs_extnum_t);
+void		xfs_iext_insert(struct xfs_inode *, xfs_extnum_t, xfs_extnum_t,
+				struct xfs_bmbt_irec *, int);
+void		xfs_iext_add(struct xfs_ifork *, xfs_extnum_t, int);
+void		xfs_iext_add_indirect_multi(struct xfs_ifork *, int,
+					    xfs_extnum_t, int);
+void		xfs_iext_remove(struct xfs_inode *, xfs_extnum_t, int, int);
+void		xfs_iext_remove_inline(struct xfs_ifork *, xfs_extnum_t, int);
+void		xfs_iext_remove_direct(struct xfs_ifork *, xfs_extnum_t, int);
+void		xfs_iext_remove_indirect(struct xfs_ifork *, xfs_extnum_t, int);
+void		xfs_iext_realloc_direct(struct xfs_ifork *, int);
+void		xfs_iext_direct_to_inline(struct xfs_ifork *, xfs_extnum_t);
+void		xfs_iext_inline_to_direct(struct xfs_ifork *, int);
+void		xfs_iext_destroy(struct xfs_ifork *);
+struct xfs_bmbt_rec_host *
+		xfs_iext_bno_to_ext(struct xfs_ifork *, xfs_fileoff_t, int *);
+struct xfs_ext_irec *
+		xfs_iext_bno_to_irec(struct xfs_ifork *, xfs_fileoff_t, int *);
+struct xfs_ext_irec *
+		xfs_iext_idx_to_irec(struct xfs_ifork *, xfs_extnum_t *, int *,
+				     int);
+void		xfs_iext_irec_init(struct xfs_ifork *);
+struct xfs_ext_irec *
+		xfs_iext_irec_new(struct xfs_ifork *, int);
+void		xfs_iext_irec_remove(struct xfs_ifork *, int);
+void		xfs_iext_irec_compact(struct xfs_ifork *);
+void		xfs_iext_irec_compact_pages(struct xfs_ifork *);
+void		xfs_iext_irec_compact_full(struct xfs_ifork *);
+void		xfs_iext_irec_update_extoffs(struct xfs_ifork *, int, int);
+
+extern struct kmem_zone	*xfs_ifork_zone;
+
+#endif	/* __XFS_INODE_FORK_H__ */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index f76ff52..3780811 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -47,32 +47,44 @@
  * inode core, and possibly one for the inode data/extents/b-tree root
  * and one for the inode attribute data/extents/b-tree root.
  */
-STATIC uint
+STATIC void
 xfs_inode_item_size(
-	struct xfs_log_item	*lip)
+	struct xfs_log_item	*lip,
+	int			*nvecs,
+	int			*nbytes)
 {
 	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
 	struct xfs_inode	*ip = iip->ili_inode;
-	uint			nvecs = 2;
+
+	*nvecs += 2;
+	*nbytes += sizeof(struct xfs_inode_log_format) +
+		   xfs_icdinode_size(ip->i_d.di_version);
 
 	switch (ip->i_d.di_format) {
 	case XFS_DINODE_FMT_EXTENTS:
 		if ((iip->ili_fields & XFS_ILOG_DEXT) &&
 		    ip->i_d.di_nextents > 0 &&
-		    ip->i_df.if_bytes > 0)
-			nvecs++;
+		    ip->i_df.if_bytes > 0) {
+			/* worst case, doesn't subtract delalloc extents */
+			*nbytes += XFS_IFORK_DSIZE(ip);
+			*nvecs += 1;
+		}
 		break;
 
 	case XFS_DINODE_FMT_BTREE:
 		if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
-		    ip->i_df.if_broot_bytes > 0)
-			nvecs++;
+		    ip->i_df.if_broot_bytes > 0) {
+			*nbytes += ip->i_df.if_broot_bytes;
+			*nvecs += 1;
+		}
 		break;
 
 	case XFS_DINODE_FMT_LOCAL:
 		if ((iip->ili_fields & XFS_ILOG_DDATA) &&
-		    ip->i_df.if_bytes > 0)
-			nvecs++;
+		    ip->i_df.if_bytes > 0) {
+			*nbytes += roundup(ip->i_df.if_bytes, 4);
+			*nvecs += 1;
+		}
 		break;
 
 	case XFS_DINODE_FMT_DEV:
@@ -85,7 +97,7 @@
 	}
 
 	if (!XFS_IFORK_Q(ip))
-		return nvecs;
+		return;
 
 
 	/*
@@ -95,28 +107,33 @@
 	case XFS_DINODE_FMT_EXTENTS:
 		if ((iip->ili_fields & XFS_ILOG_AEXT) &&
 		    ip->i_d.di_anextents > 0 &&
-		    ip->i_afp->if_bytes > 0)
-			nvecs++;
+		    ip->i_afp->if_bytes > 0) {
+			/* worst case, doesn't subtract unused space */
+			*nbytes += XFS_IFORK_ASIZE(ip);
+			*nvecs += 1;
+		}
 		break;
 
 	case XFS_DINODE_FMT_BTREE:
 		if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
-		    ip->i_afp->if_broot_bytes > 0)
-			nvecs++;
+		    ip->i_afp->if_broot_bytes > 0) {
+			*nbytes += ip->i_afp->if_broot_bytes;
+			*nvecs += 1;
+		}
 		break;
 
 	case XFS_DINODE_FMT_LOCAL:
 		if ((iip->ili_fields & XFS_ILOG_ADATA) &&
-		    ip->i_afp->if_bytes > 0)
-			nvecs++;
+		    ip->i_afp->if_bytes > 0) {
+			*nbytes += roundup(ip->i_afp->if_bytes, 4);
+			*nvecs += 1;
+		}
 		break;
 
 	default:
 		ASSERT(0);
 		break;
 	}
-
-	return nvecs;
 }
 
 /*
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 779812f..dce4d65 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -18,123 +18,13 @@
 #ifndef	__XFS_INODE_ITEM_H__
 #define	__XFS_INODE_ITEM_H__
 
-/*
- * This is the structure used to lay out an inode log item in the
- * log.  The size of the inline data/extents/b-tree root to be logged
- * (if any) is indicated in the ilf_dsize field.  Changes to this structure
- * must be added on to the end.
- */
-typedef struct xfs_inode_log_format {
-	__uint16_t		ilf_type;	/* inode log item type */
-	__uint16_t		ilf_size;	/* size of this item */
-	__uint32_t		ilf_fields;	/* flags for fields logged */
-	__uint16_t		ilf_asize;	/* size of attr d/ext/root */
-	__uint16_t		ilf_dsize;	/* size of data/ext/root */
-	__uint64_t		ilf_ino;	/* inode number */
-	union {
-		__uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
-		uuid_t		ilfu_uuid;	/* mount point value */
-	} ilf_u;
-	__int64_t		ilf_blkno;	/* blkno of inode buffer */
-	__int32_t		ilf_len;	/* len of inode buffer */
-	__int32_t		ilf_boffset;	/* off of inode in buffer */
-} xfs_inode_log_format_t;
-
-typedef struct xfs_inode_log_format_32 {
-	__uint16_t		ilf_type;	/* inode log item type */
-	__uint16_t		ilf_size;	/* size of this item */
-	__uint32_t		ilf_fields;	/* flags for fields logged */
-	__uint16_t		ilf_asize;	/* size of attr d/ext/root */
-	__uint16_t		ilf_dsize;	/* size of data/ext/root */
-	__uint64_t		ilf_ino;	/* inode number */
-	union {
-		__uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
-		uuid_t		ilfu_uuid;	/* mount point value */
-	} ilf_u;
-	__int64_t		ilf_blkno;	/* blkno of inode buffer */
-	__int32_t		ilf_len;	/* len of inode buffer */
-	__int32_t		ilf_boffset;	/* off of inode in buffer */
-} __attribute__((packed)) xfs_inode_log_format_32_t;
-
-typedef struct xfs_inode_log_format_64 {
-	__uint16_t		ilf_type;	/* inode log item type */
-	__uint16_t		ilf_size;	/* size of this item */
-	__uint32_t		ilf_fields;	/* flags for fields logged */
-	__uint16_t		ilf_asize;	/* size of attr d/ext/root */
-	__uint16_t		ilf_dsize;	/* size of data/ext/root */
-	__uint32_t		ilf_pad;	/* pad for 64 bit boundary */
-	__uint64_t		ilf_ino;	/* inode number */
-	union {
-		__uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
-		uuid_t		ilfu_uuid;	/* mount point value */
-	} ilf_u;
-	__int64_t		ilf_blkno;	/* blkno of inode buffer */
-	__int32_t		ilf_len;	/* len of inode buffer */
-	__int32_t		ilf_boffset;	/* off of inode in buffer */
-} xfs_inode_log_format_64_t;
-
-/*
- * Flags for xfs_trans_log_inode flags field.
- */
-#define	XFS_ILOG_CORE	0x001	/* log standard inode fields */
-#define	XFS_ILOG_DDATA	0x002	/* log i_df.if_data */
-#define	XFS_ILOG_DEXT	0x004	/* log i_df.if_extents */
-#define	XFS_ILOG_DBROOT	0x008	/* log i_df.i_broot */
-#define	XFS_ILOG_DEV	0x010	/* log the dev field */
-#define	XFS_ILOG_UUID	0x020	/* log the uuid field */
-#define	XFS_ILOG_ADATA	0x040	/* log i_af.if_data */
-#define	XFS_ILOG_AEXT	0x080	/* log i_af.if_extents */
-#define	XFS_ILOG_ABROOT	0x100	/* log i_af.i_broot */
-
-
-/*
- * The timestamps are dirty, but not necessarily anything else in the inode
- * core.  Unlike the other fields above this one must never make it to disk
- * in the ilf_fields of the inode_log_format, but is purely store in-memory in
- * ili_fields in the inode_log_item.
- */
-#define XFS_ILOG_TIMESTAMP	0x4000
-
-#define	XFS_ILOG_NONCORE	(XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
-				 XFS_ILOG_DBROOT | XFS_ILOG_DEV | \
-				 XFS_ILOG_UUID | XFS_ILOG_ADATA | \
-				 XFS_ILOG_AEXT | XFS_ILOG_ABROOT)
-
-#define	XFS_ILOG_DFORK		(XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
-				 XFS_ILOG_DBROOT)
-
-#define	XFS_ILOG_AFORK		(XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
-				 XFS_ILOG_ABROOT)
-
-#define	XFS_ILOG_ALL		(XFS_ILOG_CORE | XFS_ILOG_DDATA | \
-				 XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \
-				 XFS_ILOG_DEV | XFS_ILOG_UUID | \
-				 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
-				 XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP)
-
-static inline int xfs_ilog_fbroot(int w)
-{
-	return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT);
-}
-
-static inline int xfs_ilog_fext(int w)
-{
-	return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT);
-}
-
-static inline int xfs_ilog_fdata(int w)
-{
-	return (w == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA);
-}
-
-#ifdef __KERNEL__
+/* kernel only definitions */
 
 struct xfs_buf;
 struct xfs_bmbt_rec;
 struct xfs_inode;
 struct xfs_mount;
 
-
 typedef struct xfs_inode_log_item {
 	xfs_log_item_t		ili_item;	   /* common portion */
 	struct xfs_inode	*ili_inode;	   /* inode ptr */
@@ -151,7 +41,6 @@
 	xfs_inode_log_format_t	ili_format;	   /* logged structure */
 } xfs_inode_log_item_t;
 
-
 static inline int xfs_inode_clean(xfs_inode_t *ip)
 {
 	return !ip->i_itemp || !(ip->i_itemp->ili_fields & XFS_ILOG_ALL);
@@ -165,6 +54,6 @@
 extern int xfs_inode_item_format_convert(xfs_log_iovec_t *,
 					 xfs_inode_log_format_t *);
 
-#endif	/* __KERNEL__ */
+extern struct kmem_zone	*xfs_ili_zone;
 
 #endif	/* __XFS_INODE_ITEM_H__ */
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 6e2bca5..bdebc21 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -17,6 +17,7 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_format.h"
 #include "xfs_log.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
@@ -32,17 +33,16 @@
 #include "xfs_error.h"
 #include "xfs_attr.h"
 #include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
 #include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_dfrag.h"
 #include "xfs_fsops.h"
-#include "xfs_vnodeops.h"
 #include "xfs_discard.h"
 #include "xfs_quota.h"
 #include "xfs_inode_item.h"
 #include "xfs_export.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
+#include "xfs_symlink.h"
 
 #include <linux/capability.h>
 #include <linux/dcache.h>
@@ -350,6 +350,40 @@
 	return error;
 }
 
+int
+xfs_set_dmattrs(
+	xfs_inode_t     *ip,
+	u_int		evmask,
+	u_int16_t	state)
+{
+	xfs_mount_t	*mp = ip->i_mount;
+	xfs_trans_t	*tp;
+	int		error;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return XFS_ERROR(EPERM);
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return XFS_ERROR(EIO);
+
+	tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS);
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
+	if (error) {
+		xfs_trans_cancel(tp, 0);
+		return error;
+	}
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+
+	ip->i_d.di_dmevmask = evmask;
+	ip->i_d.di_dmstate  = state;
+
+	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+	error = xfs_trans_commit(tp, 0);
+
+	return error;
+}
+
 STATIC int
 xfs_fssetdm_by_handle(
 	struct file		*parfilp,
@@ -967,7 +1001,7 @@
 	 * first do an error checking pass.
 	 */
 	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
-	code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+	code = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
 	if (code)
 		goto error_return;
 
@@ -981,15 +1015,22 @@
 	 * to the file owner ID, except in cases where the
 	 * CAP_FSETID capability is applicable.
 	 */
-	if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) {
+	if (!inode_owner_or_capable(VFS_I(ip))) {
 		code = XFS_ERROR(EPERM);
 		goto error_return;
 	}
 
 	/*
 	 * Do a quota reservation only if projid is actually going to change.
+	 * Only allow changing of projid from init_user_ns since it is a
+	 * non user namespace aware identifier.
 	 */
 	if (mask & FSX_PROJID) {
+		if (current_user_ns() != &init_user_ns) {
+			code = XFS_ERROR(EINVAL);
+			goto error_return;
+		}
+
 		if (XFS_IS_QUOTA_RUNNING(mp) &&
 		    XFS_IS_PQUOTA_ON(mp) &&
 		    xfs_get_projid(ip) != fa->fsx_projid) {
@@ -1103,7 +1144,7 @@
 		 * cleared upon successful return from chown()
 		 */
 		if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
-		    !capable(CAP_FSETID))
+		    !inode_capable(VFS_I(ip), CAP_FSETID))
 			ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
 
 		/*
@@ -1328,6 +1369,75 @@
 	return 0;
 }
 
+int
+xfs_ioc_swapext(
+	xfs_swapext_t	*sxp)
+{
+	xfs_inode_t     *ip, *tip;
+	struct fd	f, tmp;
+	int		error = 0;
+
+	/* Pull information for the target fd */
+	f = fdget((int)sxp->sx_fdtarget);
+	if (!f.file) {
+		error = XFS_ERROR(EINVAL);
+		goto out;
+	}
+
+	if (!(f.file->f_mode & FMODE_WRITE) ||
+	    !(f.file->f_mode & FMODE_READ) ||
+	    (f.file->f_flags & O_APPEND)) {
+		error = XFS_ERROR(EBADF);
+		goto out_put_file;
+	}
+
+	tmp = fdget((int)sxp->sx_fdtmp);
+	if (!tmp.file) {
+		error = XFS_ERROR(EINVAL);
+		goto out_put_file;
+	}
+
+	if (!(tmp.file->f_mode & FMODE_WRITE) ||
+	    !(tmp.file->f_mode & FMODE_READ) ||
+	    (tmp.file->f_flags & O_APPEND)) {
+		error = XFS_ERROR(EBADF);
+		goto out_put_tmp_file;
+	}
+
+	if (IS_SWAPFILE(file_inode(f.file)) ||
+	    IS_SWAPFILE(file_inode(tmp.file))) {
+		error = XFS_ERROR(EINVAL);
+		goto out_put_tmp_file;
+	}
+
+	ip = XFS_I(file_inode(f.file));
+	tip = XFS_I(file_inode(tmp.file));
+
+	if (ip->i_mount != tip->i_mount) {
+		error = XFS_ERROR(EINVAL);
+		goto out_put_tmp_file;
+	}
+
+	if (ip->i_ino == tip->i_ino) {
+		error = XFS_ERROR(EINVAL);
+		goto out_put_tmp_file;
+	}
+
+	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+		error = XFS_ERROR(EIO);
+		goto out_put_tmp_file;
+	}
+
+	error = xfs_swap_extents(ip, tip, sxp);
+
+ out_put_tmp_file:
+	fdput(tmp);
+ out_put_file:
+	fdput(f);
+ out:
+	return error;
+}
+
 /*
  * Note: some of the ioctl's return positive numbers as a
  * byte count indicating success, such as readlink_by_handle.
@@ -1472,7 +1582,7 @@
 		error = mnt_want_write_file(filp);
 		if (error)
 			return error;
-		error = xfs_swapext(&sxp);
+		error = xfs_ioc_swapext(&sxp);
 		mnt_drop_write_file(filp);
 		return -error;
 	}
@@ -1610,23 +1720,23 @@
 		return -error;
 
 	case XFS_IOC_FREE_EOFBLOCKS: {
-		struct xfs_eofblocks eofb;
+		struct xfs_fs_eofblocks eofb;
+		struct xfs_eofblocks keofb;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (mp->m_flags & XFS_MOUNT_RDONLY)
+			return -XFS_ERROR(EROFS);
 
 		if (copy_from_user(&eofb, arg, sizeof(eofb)))
 			return -XFS_ERROR(EFAULT);
 
-		if (eofb.eof_version != XFS_EOFBLOCKS_VERSION)
-			return -XFS_ERROR(EINVAL);
+		error = xfs_fs_eofblocks_from_user(&eofb, &keofb);
+		if (error)
+			return -error;
 
-		if (eofb.eof_flags & ~XFS_EOF_FLAGS_VALID)
-			return -XFS_ERROR(EINVAL);
-
-		if (memchr_inv(&eofb.pad32, 0, sizeof(eofb.pad32)) ||
-		    memchr_inv(eofb.pad64, 0, sizeof(eofb.pad64)))
-			return -XFS_ERROR(EINVAL);
-
-		error = xfs_icache_free_eofblocks(mp, &eofb);
-		return -error;
+		return -xfs_icache_free_eofblocks(mp, &keofb);
 	}
 
 	default:
diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h
index d56173b..77c02c7 100644
--- a/fs/xfs/xfs_ioctl.h
+++ b/fs/xfs/xfs_ioctl.h
@@ -27,6 +27,10 @@
 	unsigned int		cmd,
 	xfs_flock64_t		*bf);
 
+int
+xfs_ioc_swapext(
+	xfs_swapext_t	*sxp);
+
 extern int
 xfs_find_handle(
 	unsigned int		cmd,
@@ -82,4 +86,10 @@
 	unsigned int		cmd,
 	unsigned long		arg);
 
+extern int
+xfs_set_dmattrs(
+	struct xfs_inode	*ip,
+	u_int			evmask,
+	u_int16_t		state);
+
 #endif
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index c0c6625..d3ab953 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -33,8 +33,6 @@
 #include "xfs_inode.h"
 #include "xfs_itable.h"
 #include "xfs_error.h"
-#include "xfs_dfrag.h"
-#include "xfs_vnodeops.h"
 #include "xfs_fsops.h"
 #include "xfs_alloc.h"
 #include "xfs_rtalloc.h"
@@ -644,7 +642,7 @@
 		error = mnt_want_write_file(filp);
 		if (error)
 			return error;
-		error = xfs_swapext(&sxp);
+		error = xfs_ioc_swapext(&sxp);
 		mnt_drop_write_file(filp);
 		return -error;
 	}
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 6a70964..8d4d49b 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -17,6 +17,7 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_format.h"
 #include "xfs_log.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
@@ -32,13 +33,13 @@
 #include "xfs_inode_item.h"
 #include "xfs_btree.h"
 #include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_itable.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_space.h"
-#include "xfs_utils.h"
 #include "xfs_iomap.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
@@ -187,10 +188,8 @@
 	 * Allocate and setup the transaction
 	 */
 	tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-	error = xfs_trans_reserve(tp, resblks,
-			XFS_WRITE_LOG_RES(mp), resrtextents,
-			XFS_TRANS_PERM_LOG_RES,
-			XFS_WRITE_LOG_COUNT);
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
+				  resblks, resrtextents);
 	/*
 	 * Check for running out of space, note: need lock to return
 	 */
@@ -698,10 +697,8 @@
 			tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
 			tp->t_flags |= XFS_TRANS_RESERVE;
 			nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
-			error = xfs_trans_reserve(tp, nres,
-					XFS_WRITE_LOG_RES(mp),
-					0, XFS_TRANS_PERM_LOG_RES,
-					XFS_WRITE_LOG_COUNT);
+			error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
+						  nres, 0);
 			if (error) {
 				xfs_trans_cancel(tp, 0);
 				return XFS_ERROR(error);
@@ -864,10 +861,8 @@
 		sb_start_intwrite(mp->m_super);
 		tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS);
 		tp->t_flags |= XFS_TRANS_RESERVE | XFS_TRANS_FREEZE_PROT;
-		error = xfs_trans_reserve(tp, resblks,
-				XFS_WRITE_LOG_RES(mp), 0,
-				XFS_TRANS_PERM_LOG_RES,
-				XFS_WRITE_LOG_COUNT);
+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
+					  resblks, 0);
 		if (error) {
 			xfs_trans_cancel(tp, 0);
 			return XFS_ERROR(error);
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 96dda62..2b8952d 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -17,6 +17,7 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_format.h"
 #include "xfs_acl.h"
 #include "xfs_log.h"
 #include "xfs_trans.h"
@@ -29,16 +30,19 @@
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
 #include "xfs_rtalloc.h"
 #include "xfs_error.h"
 #include "xfs_itable.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_vnodeops.h"
 #include "xfs_inode_item.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
+#include "xfs_symlink.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2_priv.h"
 
 #include <linux/capability.h>
 #include <linux/xattr.h>
@@ -87,10 +91,12 @@
 static void
 xfs_dentry_to_name(
 	struct xfs_name	*namep,
-	struct dentry	*dentry)
+	struct dentry	*dentry,
+	int		mode)
 {
 	namep->name = dentry->d_name.name;
 	namep->len = dentry->d_name.len;
+	namep->type = xfs_mode_to_ftype[(mode & S_IFMT) >> S_SHIFT];
 }
 
 STATIC void
@@ -106,7 +112,7 @@
 	 * xfs_init_security we must back out.
 	 * ENOSPC can hit here, among other things.
 	 */
-	xfs_dentry_to_name(&teardown, dentry);
+	xfs_dentry_to_name(&teardown, dentry, 0);
 
 	xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
 	iput(inode);
@@ -146,7 +152,7 @@
 			mode &= ~current_umask();
 	}
 
-	xfs_dentry_to_name(&name, dentry);
+	xfs_dentry_to_name(&name, dentry, mode);
 	error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
 	if (unlikely(error))
 		goto out_free_acl;
@@ -207,7 +213,7 @@
 	if (dentry->d_name.len >= MAXNAMELEN)
 		return ERR_PTR(-ENAMETOOLONG);
 
-	xfs_dentry_to_name(&name, dentry);
+	xfs_dentry_to_name(&name, dentry, 0);
 	error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
 	if (unlikely(error)) {
 		if (unlikely(error != ENOENT))
@@ -234,7 +240,7 @@
 	if (dentry->d_name.len >= MAXNAMELEN)
 		return ERR_PTR(-ENAMETOOLONG);
 
-	xfs_dentry_to_name(&xname, dentry);
+	xfs_dentry_to_name(&xname, dentry, 0);
 	error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
 	if (unlikely(error)) {
 		if (unlikely(error != ENOENT))
@@ -269,7 +275,7 @@
 	struct xfs_name	name;
 	int		error;
 
-	xfs_dentry_to_name(&name, dentry);
+	xfs_dentry_to_name(&name, dentry, inode->i_mode);
 
 	error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
 	if (unlikely(error))
@@ -288,7 +294,7 @@
 	struct xfs_name	name;
 	int		error;
 
-	xfs_dentry_to_name(&name, dentry);
+	xfs_dentry_to_name(&name, dentry, 0);
 
 	error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
 	if (error)
@@ -318,7 +324,7 @@
 
 	mode = S_IFLNK |
 		(irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
-	xfs_dentry_to_name(&name, dentry);
+	xfs_dentry_to_name(&name, dentry, mode);
 
 	error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
 	if (unlikely(error))
@@ -350,12 +356,12 @@
 	struct xfs_name	oname;
 	struct xfs_name	nname;
 
-	xfs_dentry_to_name(&oname, odentry);
-	xfs_dentry_to_name(&nname, ndentry);
+	xfs_dentry_to_name(&oname, odentry, 0);
+	xfs_dentry_to_name(&nname, ndentry, odentry->d_inode->i_mode);
 
 	return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
 			   XFS_I(ndir), &nname, new_inode ?
-			   			XFS_I(new_inode) : NULL);
+						XFS_I(new_inode) : NULL);
 }
 
 /*
@@ -420,8 +426,8 @@
 	stat->dev = inode->i_sb->s_dev;
 	stat->mode = ip->i_d.di_mode;
 	stat->nlink = ip->i_d.di_nlink;
-	stat->uid = ip->i_d.di_uid;
-	stat->gid = ip->i_d.di_gid;
+	stat->uid = inode->i_uid;
+	stat->gid = inode->i_gid;
 	stat->ino = ip->i_ino;
 	stat->atime = inode->i_atime;
 	stat->mtime = inode->i_mtime;
@@ -485,8 +491,8 @@
 	int			mask = iattr->ia_valid;
 	xfs_trans_t		*tp;
 	int			error;
-	uid_t			uid = 0, iuid = 0;
-	gid_t			gid = 0, igid = 0;
+	kuid_t			uid = GLOBAL_ROOT_UID, iuid = GLOBAL_ROOT_UID;
+	kgid_t			gid = GLOBAL_ROOT_GID, igid = GLOBAL_ROOT_GID;
 	struct xfs_dquot	*udqp = NULL, *gdqp = NULL;
 	struct xfs_dquot	*olddquot1 = NULL, *olddquot2 = NULL;
 
@@ -522,13 +528,13 @@
 			uid = iattr->ia_uid;
 			qflags |= XFS_QMOPT_UQUOTA;
 		} else {
-			uid = ip->i_d.di_uid;
+			uid = inode->i_uid;
 		}
 		if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
 			gid = iattr->ia_gid;
 			qflags |= XFS_QMOPT_GQUOTA;
 		}  else {
-			gid = ip->i_d.di_gid;
+			gid = inode->i_gid;
 		}
 
 		/*
@@ -538,14 +544,16 @@
 		 */
 		ASSERT(udqp == NULL);
 		ASSERT(gdqp == NULL);
-		error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
-					 qflags, &udqp, &gdqp, NULL);
+		error = xfs_qm_vop_dqalloc(ip, xfs_kuid_to_uid(uid),
+					   xfs_kgid_to_gid(gid),
+					   xfs_get_projid(ip),
+					   qflags, &udqp, &gdqp, NULL);
 		if (error)
 			return error;
 	}
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
-	error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
 	if (error)
 		goto out_dqrele;
 
@@ -561,8 +569,8 @@
 		 * while we didn't have the inode locked, inode's dquot(s)
 		 * would have changed also.
 		 */
-		iuid = ip->i_d.di_uid;
-		igid = ip->i_d.di_gid;
+		iuid = inode->i_uid;
+		igid = inode->i_gid;
 		gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
 		uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
 
@@ -571,8 +579,8 @@
 		 * going to change.
 		 */
 		if (XFS_IS_QUOTA_RUNNING(mp) &&
-		    ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
-		     (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
+		    ((XFS_IS_UQUOTA_ON(mp) && !uid_eq(iuid, uid)) ||
+		     (XFS_IS_GQUOTA_ON(mp) && !gid_eq(igid, gid)))) {
 			ASSERT(tp);
 			error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
 						NULL, capable(CAP_FOWNER) ?
@@ -602,17 +610,17 @@
 		 * Change the ownerships and register quota modifications
 		 * in the transaction.
 		 */
-		if (iuid != uid) {
+		if (!uid_eq(iuid, uid)) {
 			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
 				ASSERT(mask & ATTR_UID);
 				ASSERT(udqp);
 				olddquot1 = xfs_qm_vop_chown(tp, ip,
 							&ip->i_udquot, udqp);
 			}
-			ip->i_d.di_uid = uid;
+			ip->i_d.di_uid = xfs_kuid_to_uid(uid);
 			inode->i_uid = uid;
 		}
-		if (igid != gid) {
+		if (!gid_eq(igid, gid)) {
 			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
 				ASSERT(!XFS_IS_PQUOTA_ON(mp));
 				ASSERT(mask & ATTR_GID);
@@ -620,7 +628,7 @@
 				olddquot2 = xfs_qm_vop_chown(tp, ip,
 							&ip->i_gdquot, gdqp);
 			}
-			ip->i_d.di_gid = gid;
+			ip->i_d.di_gid = xfs_kgid_to_gid(gid);
 			inode->i_gid = gid;
 		}
 	}
@@ -807,9 +815,7 @@
 		goto out_unlock;
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
-	error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
-				 XFS_TRANS_PERM_LOG_RES,
-				 XFS_ITRUNCATE_LOG_COUNT);
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
 	if (error)
 		goto out_trans_cancel;
 
@@ -932,7 +938,7 @@
 	trace_xfs_update_time(ip);
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
-	error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
 	if (error) {
 		xfs_trans_cancel(tp, 0);
 		return -error;
@@ -1173,8 +1179,8 @@
 
 	inode->i_mode	= ip->i_d.di_mode;
 	set_nlink(inode, ip->i_d.di_nlink);
-	inode->i_uid	= ip->i_d.di_uid;
-	inode->i_gid	= ip->i_d.di_gid;
+	inode->i_uid    = xfs_uid_to_kuid(ip->i_d.di_uid);
+	inode->i_gid    = xfs_gid_to_kgid(ip->i_d.di_gid);
 
 	switch (inode->i_mode & S_IFMT) {
 	case S_IFBLK:
diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h
index ef41c92..d81fb41 100644
--- a/fs/xfs/xfs_iops.h
+++ b/fs/xfs/xfs_iops.h
@@ -27,4 +27,17 @@
 
 extern void xfs_setup_inode(struct xfs_inode *);
 
+/*
+ * Internal setattr interfaces.
+ */
+#define	XFS_ATTR_DMI		0x01	/* invocation from a DMI function */
+#define	XFS_ATTR_NONBLOCK	0x02	/* return EAGAIN if op would block */
+#define XFS_ATTR_NOLOCK		0x04	/* Don't grab any conflicting locks */
+#define XFS_ATTR_NOACL		0x08	/* Don't call xfs_acl_chmod */
+#define XFS_ATTR_SYNC		0x10	/* synchronous operation required */
+
+extern int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap,
+			       int flags);
+extern int xfs_setattr_size(struct xfs_inode *ip, struct iattr *vap, int flags);
+
 #endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 800f896..f9bb590 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -32,6 +32,38 @@
 # define XFS_BIG_INUMS	0
 #endif
 
+/*
+ * Kernel specific type declarations for XFS
+ */
+typedef signed char		__int8_t;
+typedef unsigned char		__uint8_t;
+typedef signed short int	__int16_t;
+typedef unsigned short int	__uint16_t;
+typedef signed int		__int32_t;
+typedef unsigned int		__uint32_t;
+typedef signed long long int	__int64_t;
+typedef unsigned long long int	__uint64_t;
+
+typedef __uint32_t		inst_t;		/* an instruction */
+
+typedef __s64			xfs_off_t;	/* <file offset> type */
+typedef unsigned long long	xfs_ino_t;	/* <inode> type */
+typedef __s64			xfs_daddr_t;	/* <disk address> type */
+typedef char *			xfs_caddr_t;	/* <core address> type */
+typedef __u32			xfs_dev_t;
+typedef __u32			xfs_nlink_t;
+
+/* __psint_t is the same size as a pointer */
+#if (BITS_PER_LONG == 32)
+typedef __int32_t __psint_t;
+typedef __uint32_t __psunsigned_t;
+#elif (BITS_PER_LONG == 64)
+typedef __int64_t __psint_t;
+typedef __uint64_t __psunsigned_t;
+#else
+#error BITS_PER_LONG must be 32 or 64
+#endif
+
 #include "xfs_types.h"
 
 #include "kmem.h"
@@ -114,8 +146,6 @@
 #define xfs_inherit_sync	xfs_params.inherit_sync.val
 #define xfs_inherit_nodump	xfs_params.inherit_nodump.val
 #define xfs_inherit_noatime	xfs_params.inherit_noatim.val
-#define xfs_buf_timer_centisecs	xfs_params.xfs_buf_timer.val
-#define xfs_buf_age_centisecs	xfs_params.xfs_buf_age.val
 #define xfs_inherit_nosymlinks	xfs_params.inherit_nosym.val
 #define xfs_rotorstep		xfs_params.rotorstep.val
 #define xfs_inherit_nodefrag	xfs_params.inherit_nodfrg.val
@@ -159,6 +189,32 @@
 #define MAX(a,b)	(max(a,b))
 #define howmany(x, y)	(((x)+((y)-1))/(y))
 
+/* Kernel uid/gid conversion. These are used to convert to/from the on disk
+ * uid_t/gid_t types to the kuid_t/kgid_t types that the kernel uses internally.
+ * The conversion here is type only, the value will remain the same since we
+ * are converting to the init_user_ns. The uid is later mapped to a particular
+ * user namespace value when crossing the kernel/user boundary.
+ */
+static inline __uint32_t xfs_kuid_to_uid(kuid_t uid)
+{
+	return from_kuid(&init_user_ns, uid);
+}
+
+static inline kuid_t xfs_uid_to_kuid(__uint32_t uid)
+{
+	return make_kuid(&init_user_ns, uid);
+}
+
+static inline __uint32_t xfs_kgid_to_gid(kgid_t gid)
+{
+	return from_kgid(&init_user_ns, gid);
+}
+
+static inline kgid_t xfs_gid_to_kgid(__uint32_t gid)
+{
+	return make_kgid(&init_user_ns, gid);
+}
+
 /*
  * Various platform dependent calls that don't fit anywhere else
  */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index d852a2b..5372d58 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -614,7 +614,8 @@
 	xfs_daddr_t	blk_offset,
 	int		num_bblks)
 {
-	int		error;
+	int		error = 0;
+	int		min_logfsbs;
 
 	if (!(mp->m_flags & XFS_MOUNT_NORECOVERY))
 		xfs_notice(mp, "Mounting Filesystem");
@@ -631,6 +632,50 @@
 	}
 
 	/*
+	 * Validate the given log space and drop a critical message via syslog
+	 * if the log size is too small that would lead to some unexpected
+	 * situations in transaction log space reservation stage.
+	 *
+	 * Note: we can't just reject the mount if the validation fails.  This
+	 * would mean that people would have to downgrade their kernel just to
+	 * remedy the situation as there is no way to grow the log (short of
+	 * black magic surgery with xfs_db).
+	 *
+	 * We can, however, reject mounts for CRC format filesystems, as the
+	 * mkfs binary being used to make the filesystem should never create a
+	 * filesystem with a log that is too small.
+	 */
+	min_logfsbs = xfs_log_calc_minimum_size(mp);
+
+	if (mp->m_sb.sb_logblocks < min_logfsbs) {
+		xfs_warn(mp,
+		"Log size %d blocks too small, minimum size is %d blocks",
+			 mp->m_sb.sb_logblocks, min_logfsbs);
+		error = EINVAL;
+	} else if (mp->m_sb.sb_logblocks > XFS_MAX_LOG_BLOCKS) {
+		xfs_warn(mp,
+		"Log size %d blocks too large, maximum size is %lld blocks",
+			 mp->m_sb.sb_logblocks, XFS_MAX_LOG_BLOCKS);
+		error = EINVAL;
+	} else if (XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks) > XFS_MAX_LOG_BYTES) {
+		xfs_warn(mp,
+		"log size %lld bytes too large, maximum size is %lld bytes",
+			 XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks),
+			 XFS_MAX_LOG_BYTES);
+		error = EINVAL;
+	}
+	if (error) {
+		if (xfs_sb_version_hascrc(&mp->m_sb)) {
+			xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!");
+			ASSERT(0);
+			goto out_free_log;
+		}
+		xfs_crit(mp,
+"Log size out of supported range. Continuing onwards, but if log hangs are\n"
+"experienced then please report this message in the bug report.");
+	}
+
+	/*
 	 * Initialize the AIL now we have a log.
 	 */
 	error = xfs_trans_ail_init(mp);
@@ -720,7 +765,7 @@
  * Unmount record used to have a string "Unmount filesystem--" in the
  * data section where the "Un" was really a magic number (XLOG_UNMOUNT_TYPE).
  * We just write the magic number now since that particular field isn't
- * currently architecture converted and "nUmount" is a bit foo.
+ * currently architecture converted and "Unmount" is a bit foo.
  * As far as I know, there weren't any dependencies on the old behaviour.
  */
 
@@ -1941,7 +1986,7 @@
 
 	xfs_alert_tag(mp, XFS_PTAG_LOGRES,
 		"xlog_write: reservation ran out. Need to up reservation");
-	xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+	xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
 }
 
 /*
@@ -2044,7 +2089,7 @@
  * Set up the parameters of the region copy into the log. This has
  * to handle region write split across multiple log buffers - this
  * state is kept external to this function so that this code can
- * can be written in an obvious, self documenting manner.
+ * be written in an obvious, self documenting manner.
  */
 static int
 xlog_write_setup_copy(
@@ -3391,24 +3436,17 @@
 }
 
 /*
- * Allocate and initialise a new log ticket.
+ * Figure out the total log space unit (in bytes) that would be
+ * required for a log ticket.
  */
-struct xlog_ticket *
-xlog_ticket_alloc(
-	struct xlog	*log,
-	int		unit_bytes,
-	int		cnt,
-	char		client,
-	bool		permanent,
-	xfs_km_flags_t	alloc_flags)
+int
+xfs_log_calc_unit_res(
+	struct xfs_mount	*mp,
+	int			unit_bytes)
 {
-	struct xlog_ticket *tic;
-	uint		num_headers;
-	int		iclog_space;
-
-	tic = kmem_zone_zalloc(xfs_log_ticket_zone, alloc_flags);
-	if (!tic)
-		return NULL;
+	struct xlog		*log = mp->m_log;
+	int			iclog_space;
+	uint			num_headers;
 
 	/*
 	 * Permanent reservations have up to 'cnt'-1 active log operations
@@ -3483,20 +3521,43 @@
 	unit_bytes += log->l_iclog_hsize;
 
 	/* for roundoff padding for transaction data and one for commit record */
-	if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
-	    log->l_mp->m_sb.sb_logsunit > 1) {
+	if (xfs_sb_version_haslogv2(&mp->m_sb) && mp->m_sb.sb_logsunit > 1) {
 		/* log su roundoff */
-		unit_bytes += 2*log->l_mp->m_sb.sb_logsunit;
+		unit_bytes += 2 * mp->m_sb.sb_logsunit;
 	} else {
 		/* BB roundoff */
-		unit_bytes += 2*BBSIZE;
+		unit_bytes += 2 * BBSIZE;
         }
 
+	return unit_bytes;
+}
+
+/*
+ * Allocate and initialise a new log ticket.
+ */
+struct xlog_ticket *
+xlog_ticket_alloc(
+	struct xlog		*log,
+	int			unit_bytes,
+	int			cnt,
+	char			client,
+	bool			permanent,
+	xfs_km_flags_t		alloc_flags)
+{
+	struct xlog_ticket	*tic;
+	int			unit_res;
+
+	tic = kmem_zone_zalloc(xfs_log_ticket_zone, alloc_flags);
+	if (!tic)
+		return NULL;
+
+	unit_res = xfs_log_calc_unit_res(log->l_mp, unit_bytes);
+
 	atomic_set(&tic->t_ref, 1);
 	tic->t_task		= current;
 	INIT_LIST_HEAD(&tic->t_queue);
-	tic->t_unit_res		= unit_bytes;
-	tic->t_curr_res		= unit_bytes;
+	tic->t_unit_res		= unit_res;
+	tic->t_curr_res		= unit_res;
 	tic->t_cnt		= cnt;
 	tic->t_ocnt		= cnt;
 	tic->t_tid		= prandom_u32();
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index fb630e4..1c45848 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -18,14 +18,30 @@
 #ifndef	__XFS_LOG_H__
 #define __XFS_LOG_H__
 
-/* get lsn fields */
-#define CYCLE_LSN(lsn) ((uint)((lsn)>>32))
-#define BLOCK_LSN(lsn) ((uint)(lsn))
+#include "xfs_log_format.h"
 
-/* this is used in a spot where we might otherwise double-endian-flip */
-#define CYCLE_LSN_DISK(lsn) (((__be32 *)&(lsn))[0])
+struct xfs_log_vec {
+	struct xfs_log_vec	*lv_next;	/* next lv in build list */
+	int			lv_niovecs;	/* number of iovecs in lv */
+	struct xfs_log_iovec	*lv_iovecp;	/* iovec array */
+	struct xfs_log_item	*lv_item;	/* owner */
+	char			*lv_buf;	/* formatted buffer */
+	int			lv_buf_len;	/* size of formatted buffer */
+	int			lv_size;	/* size of allocated lv */
+};
 
-#ifdef __KERNEL__
+#define XFS_LOG_VEC_ORDERED	(-1)
+
+/*
+ * Structure used to pass callback function and the function's argument
+ * to the log manager.
+ */
+typedef struct xfs_log_callback {
+	struct xfs_log_callback	*cb_next;
+	void			(*cb_func)(void *, int);
+	void			*cb_arg;
+} xfs_log_callback_t;
+
 /*
  * By comparing each component, we don't have to worry about extra
  * endian issues in treating two 32 bit numbers as one 64 bit number
@@ -59,67 +75,6 @@
  */
 #define XFS_LOG_SYNC		0x1
 
-#endif	/* __KERNEL__ */
-
-
-/* Log Clients */
-#define XFS_TRANSACTION		0x69
-#define XFS_VOLUME		0x2
-#define XFS_LOG			0xaa
-
-
-/* Region types for iovec's i_type */
-#define XLOG_REG_TYPE_BFORMAT		1
-#define XLOG_REG_TYPE_BCHUNK		2
-#define XLOG_REG_TYPE_EFI_FORMAT	3
-#define XLOG_REG_TYPE_EFD_FORMAT	4
-#define XLOG_REG_TYPE_IFORMAT		5
-#define XLOG_REG_TYPE_ICORE		6
-#define XLOG_REG_TYPE_IEXT		7
-#define XLOG_REG_TYPE_IBROOT		8
-#define XLOG_REG_TYPE_ILOCAL		9
-#define XLOG_REG_TYPE_IATTR_EXT		10
-#define XLOG_REG_TYPE_IATTR_BROOT	11
-#define XLOG_REG_TYPE_IATTR_LOCAL	12
-#define XLOG_REG_TYPE_QFORMAT		13
-#define XLOG_REG_TYPE_DQUOT		14
-#define XLOG_REG_TYPE_QUOTAOFF		15
-#define XLOG_REG_TYPE_LRHEADER		16
-#define XLOG_REG_TYPE_UNMOUNT		17
-#define XLOG_REG_TYPE_COMMIT		18
-#define XLOG_REG_TYPE_TRANSHDR		19
-#define XLOG_REG_TYPE_ICREATE		20
-#define XLOG_REG_TYPE_MAX		20
-
-typedef struct xfs_log_iovec {
-	void		*i_addr;	/* beginning address of region */
-	int		i_len;		/* length in bytes of region */
-	uint		i_type;		/* type of region */
-} xfs_log_iovec_t;
-
-struct xfs_log_vec {
-	struct xfs_log_vec	*lv_next;	/* next lv in build list */
-	int			lv_niovecs;	/* number of iovecs in lv */
-	struct xfs_log_iovec	*lv_iovecp;	/* iovec array */
-	struct xfs_log_item	*lv_item;	/* owner */
-	char			*lv_buf;	/* formatted buffer */
-	int			lv_buf_len;	/* size of formatted buffer */
-};
-
-#define XFS_LOG_VEC_ORDERED	(-1)
-
-/*
- * Structure used to pass callback function and the function's argument
- * to the log manager.
- */
-typedef struct xfs_log_callback {
-	struct xfs_log_callback	*cb_next;
-	void			(*cb_func)(void *, int);
-	void			*cb_arg;
-} xfs_log_callback_t;
-
-
-#ifdef __KERNEL__
 /* Log manager interfaces */
 struct xfs_mount;
 struct xlog_in_core;
@@ -188,5 +143,4 @@
 void	xfs_log_worker(struct work_struct *work);
 void	xfs_log_quiesce(struct xfs_mount *mp);
 
-#endif
 #endif	/* __XFS_LOG_H__ */
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 02b9cf3..cfe9797 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -80,6 +80,83 @@
 								log->l_curr_block);
 }
 
+STATIC int
+xlog_cil_lv_item_format(
+	struct xfs_log_item	*lip,
+	struct xfs_log_vec	*lv)
+{
+	int	index;
+	char	*ptr;
+
+	/* format new vectors into array */
+	lip->li_ops->iop_format(lip, lv->lv_iovecp);
+
+	/* copy data into existing array */
+	ptr = lv->lv_buf;
+	for (index = 0; index < lv->lv_niovecs; index++) {
+		struct xfs_log_iovec *vec = &lv->lv_iovecp[index];
+
+		memcpy(ptr, vec->i_addr, vec->i_len);
+		vec->i_addr = ptr;
+		ptr += vec->i_len;
+	}
+
+	/*
+	 * some size calculations for log vectors over-estimate, so the caller
+	 * doesn't know the amount of space actually used by the item. Return
+	 * the byte count to the caller so they can check and store it
+	 * appropriately.
+	 */
+	return ptr - lv->lv_buf;
+}
+
+/*
+ * Prepare the log item for insertion into the CIL. Calculate the difference in
+ * log space and vectors it will consume, and if it is a new item pin it as
+ * well.
+ */
+STATIC void
+xfs_cil_prepare_item(
+	struct xlog		*log,
+	struct xfs_log_vec	*lv,
+	struct xfs_log_vec	*old_lv,
+	int			*diff_len,
+	int			*diff_iovecs)
+{
+	/* Account for the new LV being passed in */
+	if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) {
+		*diff_len += lv->lv_buf_len;
+		*diff_iovecs += lv->lv_niovecs;
+	}
+
+	/*
+	 * If there is no old LV, this is the first time we've seen the item in
+	 * this CIL context and so we need to pin it. If we are replacing the
+	 * old_lv, then remove the space it accounts for and free it.
+	 */
+	if (!old_lv)
+		lv->lv_item->li_ops->iop_pin(lv->lv_item);
+	else if (old_lv != lv) {
+		ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
+
+		*diff_len -= old_lv->lv_buf_len;
+		*diff_iovecs -= old_lv->lv_niovecs;
+		kmem_free(old_lv);
+	}
+
+	/* attach new log vector to log item */
+	lv->lv_item->li_lv = lv;
+
+	/*
+	 * If this is the first time the item is being committed to the
+	 * CIL, store the sequence number on the log item so we can
+	 * tell in future commits whether this is the first checkpoint
+	 * the item is being committed into.
+	 */
+	if (!lv->lv_item->li_seq)
+		lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
+}
+
 /*
  * Format log item into a flat buffers
  *
@@ -106,35 +183,39 @@
  * format the regions into the iclog as though they are being formatted
  * directly out of the objects themselves.
  */
-static struct xfs_log_vec *
-xlog_cil_prepare_log_vecs(
-	struct xfs_trans	*tp)
+static void
+xlog_cil_insert_format_items(
+	struct xlog		*log,
+	struct xfs_trans	*tp,
+	int			*diff_len,
+	int			*diff_iovecs)
 {
 	struct xfs_log_item_desc *lidp;
-	struct xfs_log_vec	*lv = NULL;
-	struct xfs_log_vec	*ret_lv = NULL;
 
 
 	/* Bail out if we didn't find a log item.  */
 	if (list_empty(&tp->t_items)) {
 		ASSERT(0);
-		return NULL;
+		return;
 	}
 
 	list_for_each_entry(lidp, &tp->t_items, lid_trans) {
-		struct xfs_log_vec *new_lv;
-		void	*ptr;
-		int	index;
-		int	len = 0;
-		uint	niovecs;
+		struct xfs_log_item *lip = lidp->lid_item;
+		struct xfs_log_vec *lv;
+		struct xfs_log_vec *old_lv;
+		int	niovecs = 0;
+		int	nbytes = 0;
+		int	buf_size;
 		bool	ordered = false;
 
 		/* Skip items which aren't dirty in this transaction. */
 		if (!(lidp->lid_flags & XFS_LID_DIRTY))
 			continue;
 
+		/* get number of vecs and size of data to be stored */
+		lip->li_ops->iop_size(lip, &niovecs, &nbytes);
+
 		/* Skip items that do not have any vectors for writing */
-		niovecs = IOP_SIZE(lidp->lid_item);
 		if (!niovecs)
 			continue;
 
@@ -146,109 +227,63 @@
 		if (niovecs == XFS_LOG_VEC_ORDERED) {
 			ordered = true;
 			niovecs = 0;
+			nbytes = 0;
 		}
 
-		new_lv = kmem_zalloc(sizeof(*new_lv) +
-				niovecs * sizeof(struct xfs_log_iovec),
-				KM_SLEEP|KM_NOFS);
+		/* grab the old item if it exists for reservation accounting */
+		old_lv = lip->li_lv;
 
-		new_lv->lv_item = lidp->lid_item;
-		new_lv->lv_niovecs = niovecs;
+		/* calc buffer size */
+		buf_size = sizeof(struct xfs_log_vec) + nbytes +
+				niovecs * sizeof(struct xfs_log_iovec);
+
+		/* compare to existing item size */
+		if (lip->li_lv && buf_size <= lip->li_lv->lv_size) {
+			/* same or smaller, optimise common overwrite case */
+			lv = lip->li_lv;
+			lv->lv_next = NULL;
+
+			if (ordered)
+				goto insert;
+
+			/*
+			 * set the item up as though it is a new insertion so
+			 * that the space reservation accounting is correct.
+			 */
+			*diff_iovecs -= lv->lv_niovecs;
+			*diff_len -= lv->lv_buf_len;
+
+			/* Ensure the lv is set up according to ->iop_size */
+			lv->lv_niovecs = niovecs;
+			lv->lv_buf = (char *)lv + buf_size - nbytes;
+
+			lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv);
+			goto insert;
+		}
+
+		/* allocate new data chunk */
+		lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS);
+		lv->lv_item = lip;
+		lv->lv_size = buf_size;
+		lv->lv_niovecs = niovecs;
 		if (ordered) {
 			/* track as an ordered logvec */
-			new_lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
-			goto next;
+			ASSERT(lip->li_lv == NULL);
+			lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
+			goto insert;
 		}
 
 		/* The allocated iovec region lies beyond the log vector. */
-		new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1];
+		lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1];
 
-		/* build the vector array and calculate it's length */
-		IOP_FORMAT(new_lv->lv_item, new_lv->lv_iovecp);
-		for (index = 0; index < new_lv->lv_niovecs; index++)
-			len += new_lv->lv_iovecp[index].i_len;
+		/* The allocated data region lies beyond the iovec region */
+		lv->lv_buf = (char *)lv + buf_size - nbytes;
 
-		new_lv->lv_buf_len = len;
-		new_lv->lv_buf = kmem_alloc(new_lv->lv_buf_len,
-				KM_SLEEP|KM_NOFS);
-		ptr = new_lv->lv_buf;
-
-		for (index = 0; index < new_lv->lv_niovecs; index++) {
-			struct xfs_log_iovec *vec = &new_lv->lv_iovecp[index];
-
-			memcpy(ptr, vec->i_addr, vec->i_len);
-			vec->i_addr = ptr;
-			ptr += vec->i_len;
-		}
-		ASSERT(ptr == new_lv->lv_buf + new_lv->lv_buf_len);
-
-next:
-		if (!ret_lv)
-			ret_lv = new_lv;
-		else
-			lv->lv_next = new_lv;
-		lv = new_lv;
+		lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv);
+insert:
+		ASSERT(lv->lv_buf_len <= nbytes);
+		xfs_cil_prepare_item(log, lv, old_lv, diff_len, diff_iovecs);
 	}
-
-	return ret_lv;
-}
-
-/*
- * Prepare the log item for insertion into the CIL. Calculate the difference in
- * log space and vectors it will consume, and if it is a new item pin it as
- * well.
- */
-STATIC void
-xfs_cil_prepare_item(
-	struct xlog		*log,
-	struct xfs_log_vec	*lv,
-	int			*len,
-	int			*diff_iovecs)
-{
-	struct xfs_log_vec	*old = lv->lv_item->li_lv;
-
-	if (old) {
-		/* existing lv on log item, space used is a delta */
-		ASSERT((old->lv_buf && old->lv_buf_len && old->lv_niovecs) ||
-			old->lv_buf_len == XFS_LOG_VEC_ORDERED);
-
-		/*
-		 * If the new item is ordered, keep the old one that is already
-		 * tracking dirty or ordered regions
-		 */
-		if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) {
-			ASSERT(!lv->lv_buf);
-			kmem_free(lv);
-			return;
-		}
-
-		*len += lv->lv_buf_len - old->lv_buf_len;
-		*diff_iovecs += lv->lv_niovecs - old->lv_niovecs;
-		kmem_free(old->lv_buf);
-		kmem_free(old);
-	} else {
-		/* new lv, must pin the log item */
-		ASSERT(!lv->lv_item->li_lv);
-
-		if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) {
-			*len += lv->lv_buf_len;
-			*diff_iovecs += lv->lv_niovecs;
-		}
-		IOP_PIN(lv->lv_item);
-
-	}
-
-	/* attach new log vector to log item */
-	lv->lv_item->li_lv = lv;
-
-	/*
-	 * If this is the first time the item is being committed to the
-	 * CIL, store the sequence number on the log item so we can
-	 * tell in future commits whether this is the first checkpoint
-	 * the item is being committed into.
-	 */
-	if (!lv->lv_item->li_seq)
-		lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
 }
 
 /*
@@ -261,53 +296,47 @@
 static void
 xlog_cil_insert_items(
 	struct xlog		*log,
-	struct xfs_log_vec	*log_vector,
-	struct xlog_ticket	*ticket)
+	struct xfs_trans	*tp)
 {
 	struct xfs_cil		*cil = log->l_cilp;
 	struct xfs_cil_ctx	*ctx = cil->xc_ctx;
-	struct xfs_log_vec	*lv;
+	struct xfs_log_item_desc *lidp;
 	int			len = 0;
 	int			diff_iovecs = 0;
 	int			iclog_space;
 
-	ASSERT(log_vector);
+	ASSERT(tp);
 
 	/*
-	 * Do all the accounting aggregation and switching of log vectors
-	 * around in a separate loop to the insertion of items into the CIL.
-	 * Then we can do a separate loop to update the CIL within a single
-	 * lock/unlock pair. This reduces the number of round trips on the CIL
-	 * lock from O(nr_logvectors) to O(1) and greatly reduces the overall
-	 * hold time for the transaction commit.
-	 *
-	 * If this is the first time the item is being placed into the CIL in
-	 * this context, pin it so it can't be written to disk until the CIL is
-	 * flushed to the iclog and the iclog written to disk.
-	 *
 	 * We can do this safely because the context can't checkpoint until we
 	 * are done so it doesn't matter exactly how we update the CIL.
 	 */
+	xlog_cil_insert_format_items(log, tp, &len, &diff_iovecs);
+
+	/*
+	 * Now (re-)position everything modified at the tail of the CIL.
+	 * We do this here so we only need to take the CIL lock once during
+	 * the transaction commit.
+	 */
 	spin_lock(&cil->xc_cil_lock);
-	for (lv = log_vector; lv; ) {
-		struct xfs_log_vec *next = lv->lv_next;
+	list_for_each_entry(lidp, &tp->t_items, lid_trans) {
+		struct xfs_log_item	*lip = lidp->lid_item;
 
-		ASSERT(lv->lv_item->li_lv || list_empty(&lv->lv_item->li_cil));
-		lv->lv_next = NULL;
+		/* Skip items which aren't dirty in this transaction. */
+		if (!(lidp->lid_flags & XFS_LID_DIRTY))
+			continue;
 
-		/*
-		 * xfs_cil_prepare_item() may free the lv, so move the item on
-		 * the CIL first.
-		 */
-		list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil);
-		xfs_cil_prepare_item(log, lv, &len, &diff_iovecs);
-		lv = next;
+		list_move_tail(&lip->li_cil, &cil->xc_cil);
 	}
 
 	/* account for space used by new iovec headers  */
 	len += diff_iovecs * sizeof(xlog_op_header_t);
 	ctx->nvecs += diff_iovecs;
 
+	/* attach the transaction to the CIL if it has any busy extents */
+	if (!list_empty(&tp->t_busy))
+		list_splice_init(&tp->t_busy, &ctx->busy_extents);
+
 	/*
 	 * Now transfer enough transaction reservation to the context ticket
 	 * for the checkpoint. The context ticket is special - the unit
@@ -316,10 +345,8 @@
 	 * during the transaction commit.
 	 */
 	if (ctx->ticket->t_curr_res == 0) {
-		/* first commit in checkpoint, steal the header reservation */
-		ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len);
 		ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
-		ticket->t_curr_res -= ctx->ticket->t_unit_res;
+		tp->t_ticket->t_curr_res -= ctx->ticket->t_unit_res;
 	}
 
 	/* do we need space for more log record headers? */
@@ -333,10 +360,10 @@
 		hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
 		ctx->ticket->t_unit_res += hdrs;
 		ctx->ticket->t_curr_res += hdrs;
-		ticket->t_curr_res -= hdrs;
-		ASSERT(ticket->t_curr_res >= len);
+		tp->t_ticket->t_curr_res -= hdrs;
+		ASSERT(tp->t_ticket->t_curr_res >= len);
 	}
-	ticket->t_curr_res -= len;
+	tp->t_ticket->t_curr_res -= len;
 	ctx->space_used += len;
 
 	spin_unlock(&cil->xc_cil_lock);
@@ -350,7 +377,6 @@
 
 	for (lv = log_vector; lv; ) {
 		struct xfs_log_vec *next = lv->lv_next;
-		kmem_free(lv->lv_buf);
 		kmem_free(lv);
 		lv = next;
 	}
@@ -376,9 +402,9 @@
 	xfs_extent_busy_clear(mp, &ctx->busy_extents,
 			     (mp->m_flags & XFS_MOUNT_DISCARD) && !abort);
 
-	spin_lock(&ctx->cil->xc_cil_lock);
+	spin_lock(&ctx->cil->xc_push_lock);
 	list_del(&ctx->committing);
-	spin_unlock(&ctx->cil->xc_cil_lock);
+	spin_unlock(&ctx->cil->xc_push_lock);
 
 	xlog_cil_free_logvec(ctx->lv_chain);
 
@@ -433,7 +459,7 @@
 	down_write(&cil->xc_ctx_lock);
 	ctx = cil->xc_ctx;
 
-	spin_lock(&cil->xc_cil_lock);
+	spin_lock(&cil->xc_push_lock);
 	push_seq = cil->xc_push_seq;
 	ASSERT(push_seq <= ctx->sequence);
 
@@ -444,10 +470,10 @@
 	 */
 	if (list_empty(&cil->xc_cil)) {
 		cil->xc_push_seq = 0;
-		spin_unlock(&cil->xc_cil_lock);
+		spin_unlock(&cil->xc_push_lock);
 		goto out_skip;
 	}
-	spin_unlock(&cil->xc_cil_lock);
+	spin_unlock(&cil->xc_push_lock);
 
 
 	/* check for a previously pushed seqeunce */
@@ -515,9 +541,9 @@
 	 * that higher sequences will wait for us to write out a commit record
 	 * before they do.
 	 */
-	spin_lock(&cil->xc_cil_lock);
+	spin_lock(&cil->xc_push_lock);
 	list_add(&ctx->committing, &cil->xc_committing);
-	spin_unlock(&cil->xc_cil_lock);
+	spin_unlock(&cil->xc_push_lock);
 	up_write(&cil->xc_ctx_lock);
 
 	/*
@@ -552,7 +578,7 @@
 	 * order the commit records so replay will get them in the right order.
 	 */
 restart:
-	spin_lock(&cil->xc_cil_lock);
+	spin_lock(&cil->xc_push_lock);
 	list_for_each_entry(new_ctx, &cil->xc_committing, committing) {
 		/*
 		 * Higher sequences will wait for this one so skip them.
@@ -565,11 +591,11 @@
 			 * It is still being pushed! Wait for the push to
 			 * complete, then start again from the beginning.
 			 */
-			xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock);
+			xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock);
 			goto restart;
 		}
 	}
-	spin_unlock(&cil->xc_cil_lock);
+	spin_unlock(&cil->xc_push_lock);
 
 	/* xfs_log_done always frees the ticket on error. */
 	commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0);
@@ -588,10 +614,10 @@
 	 * callbacks to the iclog we can assign the commit LSN to the context
 	 * and wake up anyone who is waiting for the commit to complete.
 	 */
-	spin_lock(&cil->xc_cil_lock);
+	spin_lock(&cil->xc_push_lock);
 	ctx->commit_lsn = commit_lsn;
 	wake_up_all(&cil->xc_commit_wait);
-	spin_unlock(&cil->xc_cil_lock);
+	spin_unlock(&cil->xc_push_lock);
 
 	/* release the hounds! */
 	return xfs_log_release_iclog(log->l_mp, commit_iclog);
@@ -644,12 +670,12 @@
 	if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
 		return;
 
-	spin_lock(&cil->xc_cil_lock);
+	spin_lock(&cil->xc_push_lock);
 	if (cil->xc_push_seq < cil->xc_current_sequence) {
 		cil->xc_push_seq = cil->xc_current_sequence;
 		queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
 	}
-	spin_unlock(&cil->xc_cil_lock);
+	spin_unlock(&cil->xc_push_lock);
 
 }
 
@@ -672,14 +698,14 @@
 	 * If the CIL is empty or we've already pushed the sequence then
 	 * there's no work we need to do.
 	 */
-	spin_lock(&cil->xc_cil_lock);
+	spin_lock(&cil->xc_push_lock);
 	if (list_empty(&cil->xc_cil) || push_seq <= cil->xc_push_seq) {
-		spin_unlock(&cil->xc_cil_lock);
+		spin_unlock(&cil->xc_push_lock);
 		return;
 	}
 
 	cil->xc_push_seq = push_seq;
-	spin_unlock(&cil->xc_cil_lock);
+	spin_unlock(&cil->xc_push_lock);
 
 	/* do the push now */
 	xlog_cil_push(log);
@@ -706,43 +732,25 @@
 	int			flags)
 {
 	struct xlog		*log = mp->m_log;
+	struct xfs_cil		*cil = log->l_cilp;
 	int			log_flags = 0;
-	struct xfs_log_vec	*log_vector;
 
 	if (flags & XFS_TRANS_RELEASE_LOG_RES)
 		log_flags = XFS_LOG_REL_PERM_RESERV;
 
-	/*
-	 * Do all the hard work of formatting items (including memory
-	 * allocation) outside the CIL context lock. This prevents stalling CIL
-	 * pushes when we are low on memory and a transaction commit spends a
-	 * lot of time in memory reclaim.
-	 */
-	log_vector = xlog_cil_prepare_log_vecs(tp);
-	if (!log_vector)
-		return ENOMEM;
-
 	/* lock out background commit */
-	down_read(&log->l_cilp->xc_ctx_lock);
-	if (commit_lsn)
-		*commit_lsn = log->l_cilp->xc_ctx->sequence;
+	down_read(&cil->xc_ctx_lock);
 
-	/* xlog_cil_insert_items() destroys log_vector list */
-	xlog_cil_insert_items(log, log_vector, tp->t_ticket);
+	xlog_cil_insert_items(log, tp);
 
 	/* check we didn't blow the reservation */
 	if (tp->t_ticket->t_curr_res < 0)
-		xlog_print_tic_res(log->l_mp, tp->t_ticket);
+		xlog_print_tic_res(mp, tp->t_ticket);
 
-	/* attach the transaction to the CIL if it has any busy extents */
-	if (!list_empty(&tp->t_busy)) {
-		spin_lock(&log->l_cilp->xc_cil_lock);
-		list_splice_init(&tp->t_busy,
-					&log->l_cilp->xc_ctx->busy_extents);
-		spin_unlock(&log->l_cilp->xc_cil_lock);
-	}
+	tp->t_commit_lsn = cil->xc_ctx->sequence;
+	if (commit_lsn)
+		*commit_lsn = tp->t_commit_lsn;
 
-	tp->t_commit_lsn = *commit_lsn;
 	xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
 	xfs_trans_unreserve_and_mod_sb(tp);
 
@@ -757,11 +765,11 @@
 	 * the log items. This affects (at least) processing of stale buffers,
 	 * inodes and EFIs.
 	 */
-	xfs_trans_free_items(tp, *commit_lsn, 0);
+	xfs_trans_free_items(tp, tp->t_commit_lsn, 0);
 
 	xlog_cil_push_background(log);
 
-	up_read(&log->l_cilp->xc_ctx_lock);
+	up_read(&cil->xc_ctx_lock);
 	return 0;
 }
 
@@ -800,7 +808,7 @@
 	 * on commits for those as well.
 	 */
 restart:
-	spin_lock(&cil->xc_cil_lock);
+	spin_lock(&cil->xc_push_lock);
 	list_for_each_entry(ctx, &cil->xc_committing, committing) {
 		if (ctx->sequence > sequence)
 			continue;
@@ -809,7 +817,7 @@
 			 * It is still being pushed! Wait for the push to
 			 * complete, then start again from the beginning.
 			 */
-			xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock);
+			xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock);
 			goto restart;
 		}
 		if (ctx->sequence != sequence)
@@ -817,7 +825,7 @@
 		/* found it! */
 		commit_lsn = ctx->commit_lsn;
 	}
-	spin_unlock(&cil->xc_cil_lock);
+	spin_unlock(&cil->xc_push_lock);
 	return commit_lsn;
 }
 
@@ -875,6 +883,7 @@
 	INIT_LIST_HEAD(&cil->xc_cil);
 	INIT_LIST_HEAD(&cil->xc_committing);
 	spin_lock_init(&cil->xc_cil_lock);
+	spin_lock_init(&cil->xc_push_lock);
 	init_rwsem(&cil->xc_ctx_lock);
 	init_waitqueue_head(&cil->xc_commit_wait);
 
diff --git a/fs/xfs/xfs_log_format.h b/fs/xfs/xfs_log_format.h
new file mode 100644
index 0000000..31e3a06
--- /dev/null
+++ b/fs/xfs/xfs_log_format.h
@@ -0,0 +1,852 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef	__XFS_LOG_FORMAT_H__
+#define __XFS_LOG_FORMAT_H__
+
+struct xfs_mount;
+struct xfs_trans_res;
+
+/*
+ * On-disk Log Format definitions.
+ *
+ * This file contains all the on-disk format definitions used within the log. It
+ * includes the physical log structure itself, as well as all the log item
+ * format structures that are written into the log and intepreted by log
+ * recovery. We start with the physical log format definitions, and then work
+ * through all the log items definitions and everything they encode into the
+ * log.
+ */
+typedef __uint32_t xlog_tid_t;
+
+#define XLOG_MIN_ICLOGS		2
+#define XLOG_MAX_ICLOGS		8
+#define XLOG_HEADER_MAGIC_NUM	0xFEEDbabe	/* Invalid cycle number */
+#define XLOG_VERSION_1		1
+#define XLOG_VERSION_2		2		/* Large IClogs, Log sunit */
+#define XLOG_VERSION_OKBITS	(XLOG_VERSION_1 | XLOG_VERSION_2)
+#define XLOG_MIN_RECORD_BSIZE	(16*1024)	/* eventually 32k */
+#define XLOG_BIG_RECORD_BSIZE	(32*1024)	/* 32k buffers */
+#define XLOG_MAX_RECORD_BSIZE	(256*1024)
+#define XLOG_HEADER_CYCLE_SIZE	(32*1024)	/* cycle data in header */
+#define XLOG_MIN_RECORD_BSHIFT	14		/* 16384 == 1 << 14 */
+#define XLOG_BIG_RECORD_BSHIFT	15		/* 32k == 1 << 15 */
+#define XLOG_MAX_RECORD_BSHIFT	18		/* 256k == 1 << 18 */
+#define XLOG_BTOLSUNIT(log, b)  (((b)+(log)->l_mp->m_sb.sb_logsunit-1) / \
+                                 (log)->l_mp->m_sb.sb_logsunit)
+#define XLOG_LSUNITTOB(log, su) ((su) * (log)->l_mp->m_sb.sb_logsunit)
+
+#define XLOG_HEADER_SIZE	512
+
+/* Minimum number of transactions that must fit in the log (defined by mkfs) */
+#define XFS_MIN_LOG_FACTOR	3
+
+#define XLOG_REC_SHIFT(log) \
+	BTOBB(1 << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
+	 XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
+#define XLOG_TOTAL_REC_SHIFT(log) \
+	BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
+	 XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
+
+/* get lsn fields */
+#define CYCLE_LSN(lsn) ((uint)((lsn)>>32))
+#define BLOCK_LSN(lsn) ((uint)(lsn))
+
+/* this is used in a spot where we might otherwise double-endian-flip */
+#define CYCLE_LSN_DISK(lsn) (((__be32 *)&(lsn))[0])
+
+static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block)
+{
+	return ((xfs_lsn_t)cycle << 32) | block;
+}
+
+static inline uint xlog_get_cycle(char *ptr)
+{
+	if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM)
+		return be32_to_cpu(*((__be32 *)ptr + 1));
+	else
+		return be32_to_cpu(*(__be32 *)ptr);
+}
+
+/* Log Clients */
+#define XFS_TRANSACTION		0x69
+#define XFS_VOLUME		0x2
+#define XFS_LOG			0xaa
+
+#define XLOG_UNMOUNT_TYPE	0x556e	/* Un for Unmount */
+
+/* Region types for iovec's i_type */
+#define XLOG_REG_TYPE_BFORMAT		1
+#define XLOG_REG_TYPE_BCHUNK		2
+#define XLOG_REG_TYPE_EFI_FORMAT	3
+#define XLOG_REG_TYPE_EFD_FORMAT	4
+#define XLOG_REG_TYPE_IFORMAT		5
+#define XLOG_REG_TYPE_ICORE		6
+#define XLOG_REG_TYPE_IEXT		7
+#define XLOG_REG_TYPE_IBROOT		8
+#define XLOG_REG_TYPE_ILOCAL		9
+#define XLOG_REG_TYPE_IATTR_EXT		10
+#define XLOG_REG_TYPE_IATTR_BROOT	11
+#define XLOG_REG_TYPE_IATTR_LOCAL	12
+#define XLOG_REG_TYPE_QFORMAT		13
+#define XLOG_REG_TYPE_DQUOT		14
+#define XLOG_REG_TYPE_QUOTAOFF		15
+#define XLOG_REG_TYPE_LRHEADER		16
+#define XLOG_REG_TYPE_UNMOUNT		17
+#define XLOG_REG_TYPE_COMMIT		18
+#define XLOG_REG_TYPE_TRANSHDR		19
+#define XLOG_REG_TYPE_ICREATE		20
+#define XLOG_REG_TYPE_MAX		20
+
+/*
+ * Flags to log operation header
+ *
+ * The first write of a new transaction will be preceded with a start
+ * record, XLOG_START_TRANS.  Once a transaction is committed, a commit
+ * record is written, XLOG_COMMIT_TRANS.  If a single region can not fit into
+ * the remainder of the current active in-core log, it is split up into
+ * multiple regions.  Each partial region will be marked with a
+ * XLOG_CONTINUE_TRANS until the last one, which gets marked with XLOG_END_TRANS.
+ *
+ */
+#define XLOG_START_TRANS	0x01	/* Start a new transaction */
+#define XLOG_COMMIT_TRANS	0x02	/* Commit this transaction */
+#define XLOG_CONTINUE_TRANS	0x04	/* Cont this trans into new region */
+#define XLOG_WAS_CONT_TRANS	0x08	/* Cont this trans into new region */
+#define XLOG_END_TRANS		0x10	/* End a continued transaction */
+#define XLOG_UNMOUNT_TRANS	0x20	/* Unmount a filesystem transaction */
+
+
+typedef struct xlog_op_header {
+	__be32	   oh_tid;	/* transaction id of operation	:  4 b */
+	__be32	   oh_len;	/* bytes in data region		:  4 b */
+	__u8	   oh_clientid;	/* who sent me this		:  1 b */
+	__u8	   oh_flags;	/*				:  1 b */
+	__u16	   oh_res2;	/* 32 bit align			:  2 b */
+} xlog_op_header_t;
+
+/* valid values for h_fmt */
+#define XLOG_FMT_UNKNOWN  0
+#define XLOG_FMT_LINUX_LE 1
+#define XLOG_FMT_LINUX_BE 2
+#define XLOG_FMT_IRIX_BE  3
+
+/* our fmt */
+#ifdef XFS_NATIVE_HOST
+#define XLOG_FMT XLOG_FMT_LINUX_BE
+#else
+#define XLOG_FMT XLOG_FMT_LINUX_LE
+#endif
+
+typedef struct xlog_rec_header {
+	__be32	  h_magicno;	/* log record (LR) identifier		:  4 */
+	__be32	  h_cycle;	/* write cycle of log			:  4 */
+	__be32	  h_version;	/* LR version				:  4 */
+	__be32	  h_len;	/* len in bytes; should be 64-bit aligned: 4 */
+	__be64	  h_lsn;	/* lsn of this LR			:  8 */
+	__be64	  h_tail_lsn;	/* lsn of 1st LR w/ buffers not committed: 8 */
+	__le32	  h_crc;	/* crc of log record                    :  4 */
+	__be32	  h_prev_block; /* block number to previous LR		:  4 */
+	__be32	  h_num_logops;	/* number of log operations in this LR	:  4 */
+	__be32	  h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE];
+	/* new fields */
+	__be32    h_fmt;        /* format of log record                 :  4 */
+	uuid_t	  h_fs_uuid;    /* uuid of FS                           : 16 */
+	__be32	  h_size;	/* iclog size				:  4 */
+} xlog_rec_header_t;
+
+typedef struct xlog_rec_ext_header {
+	__be32	  xh_cycle;	/* write cycle of log			: 4 */
+	__be32	  xh_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /*	: 256 */
+} xlog_rec_ext_header_t;
+
+/*
+ * Quite misnamed, because this union lays out the actual on-disk log buffer.
+ */
+typedef union xlog_in_core2 {
+	xlog_rec_header_t	hic_header;
+	xlog_rec_ext_header_t	hic_xheader;
+	char			hic_sector[XLOG_HEADER_SIZE];
+} xlog_in_core_2_t;
+
+/* not an on-disk structure, but needed by log recovery in userspace */
+typedef struct xfs_log_iovec {
+	void		*i_addr;	/* beginning address of region */
+	int		i_len;		/* length in bytes of region */
+	uint		i_type;		/* type of region */
+} xfs_log_iovec_t;
+
+
+/*
+ * Transaction Header definitions.
+ *
+ * This is the structure written in the log at the head of every transaction. It
+ * identifies the type and id of the transaction, and contains the number of
+ * items logged by the transaction so we know how many to expect during
+ * recovery.
+ *
+ * Do not change the below structure without redoing the code in
+ * xlog_recover_add_to_trans() and xlog_recover_add_to_cont_trans().
+ */
+typedef struct xfs_trans_header {
+	uint		th_magic;		/* magic number */
+	uint		th_type;		/* transaction type */
+	__int32_t	th_tid;			/* transaction id (unused) */
+	uint		th_num_items;		/* num items logged by trans */
+} xfs_trans_header_t;
+
+#define	XFS_TRANS_HEADER_MAGIC	0x5452414e	/* TRAN */
+
+/*
+ * Log item types.
+ */
+#define	XFS_LI_EFI		0x1236
+#define	XFS_LI_EFD		0x1237
+#define	XFS_LI_IUNLINK		0x1238
+#define	XFS_LI_INODE		0x123b	/* aligned ino chunks, var-size ibufs */
+#define	XFS_LI_BUF		0x123c	/* v2 bufs, variable sized inode bufs */
+#define	XFS_LI_DQUOT		0x123d
+#define	XFS_LI_QUOTAOFF		0x123e
+#define	XFS_LI_ICREATE		0x123f
+
+#define XFS_LI_TYPE_DESC \
+	{ XFS_LI_EFI,		"XFS_LI_EFI" }, \
+	{ XFS_LI_EFD,		"XFS_LI_EFD" }, \
+	{ XFS_LI_IUNLINK,	"XFS_LI_IUNLINK" }, \
+	{ XFS_LI_INODE,		"XFS_LI_INODE" }, \
+	{ XFS_LI_BUF,		"XFS_LI_BUF" }, \
+	{ XFS_LI_DQUOT,		"XFS_LI_DQUOT" }, \
+	{ XFS_LI_QUOTAOFF,	"XFS_LI_QUOTAOFF" }, \
+	{ XFS_LI_ICREATE,	"XFS_LI_ICREATE" }
+
+/*
+ * Transaction types.  Used to distinguish types of buffers.
+ */
+#define XFS_TRANS_SETATTR_NOT_SIZE	1
+#define XFS_TRANS_SETATTR_SIZE		2
+#define XFS_TRANS_INACTIVE		3
+#define XFS_TRANS_CREATE		4
+#define XFS_TRANS_CREATE_TRUNC		5
+#define XFS_TRANS_TRUNCATE_FILE		6
+#define XFS_TRANS_REMOVE		7
+#define XFS_TRANS_LINK			8
+#define XFS_TRANS_RENAME		9
+#define XFS_TRANS_MKDIR			10
+#define XFS_TRANS_RMDIR			11
+#define XFS_TRANS_SYMLINK		12
+#define XFS_TRANS_SET_DMATTRS		13
+#define XFS_TRANS_GROWFS		14
+#define XFS_TRANS_STRAT_WRITE		15
+#define XFS_TRANS_DIOSTRAT		16
+/* 17 was XFS_TRANS_WRITE_SYNC */
+#define	XFS_TRANS_WRITEID		18
+#define	XFS_TRANS_ADDAFORK		19
+#define	XFS_TRANS_ATTRINVAL		20
+#define	XFS_TRANS_ATRUNCATE		21
+#define	XFS_TRANS_ATTR_SET		22
+#define	XFS_TRANS_ATTR_RM		23
+#define	XFS_TRANS_ATTR_FLAG		24
+#define	XFS_TRANS_CLEAR_AGI_BUCKET	25
+#define XFS_TRANS_QM_SBCHANGE		26
+/*
+ * Dummy entries since we use the transaction type to index into the
+ * trans_type[] in xlog_recover_print_trans_head()
+ */
+#define XFS_TRANS_DUMMY1		27
+#define XFS_TRANS_DUMMY2		28
+#define XFS_TRANS_QM_QUOTAOFF		29
+#define XFS_TRANS_QM_DQALLOC		30
+#define XFS_TRANS_QM_SETQLIM		31
+#define XFS_TRANS_QM_DQCLUSTER		32
+#define XFS_TRANS_QM_QINOCREATE		33
+#define XFS_TRANS_QM_QUOTAOFF_END	34
+#define XFS_TRANS_SB_UNIT		35
+#define XFS_TRANS_FSYNC_TS		36
+#define	XFS_TRANS_GROWFSRT_ALLOC	37
+#define	XFS_TRANS_GROWFSRT_ZERO		38
+#define	XFS_TRANS_GROWFSRT_FREE		39
+#define	XFS_TRANS_SWAPEXT		40
+#define	XFS_TRANS_SB_COUNT		41
+#define	XFS_TRANS_CHECKPOINT		42
+#define	XFS_TRANS_ICREATE		43
+#define	XFS_TRANS_TYPE_MAX		43
+/* new transaction types need to be reflected in xfs_logprint(8) */
+
+#define XFS_TRANS_TYPES \
+	{ XFS_TRANS_SETATTR_NOT_SIZE,	"SETATTR_NOT_SIZE" }, \
+	{ XFS_TRANS_SETATTR_SIZE,	"SETATTR_SIZE" }, \
+	{ XFS_TRANS_INACTIVE,		"INACTIVE" }, \
+	{ XFS_TRANS_CREATE,		"CREATE" }, \
+	{ XFS_TRANS_CREATE_TRUNC,	"CREATE_TRUNC" }, \
+	{ XFS_TRANS_TRUNCATE_FILE,	"TRUNCATE_FILE" }, \
+	{ XFS_TRANS_REMOVE,		"REMOVE" }, \
+	{ XFS_TRANS_LINK,		"LINK" }, \
+	{ XFS_TRANS_RENAME,		"RENAME" }, \
+	{ XFS_TRANS_MKDIR,		"MKDIR" }, \
+	{ XFS_TRANS_RMDIR,		"RMDIR" }, \
+	{ XFS_TRANS_SYMLINK,		"SYMLINK" }, \
+	{ XFS_TRANS_SET_DMATTRS,	"SET_DMATTRS" }, \
+	{ XFS_TRANS_GROWFS,		"GROWFS" }, \
+	{ XFS_TRANS_STRAT_WRITE,	"STRAT_WRITE" }, \
+	{ XFS_TRANS_DIOSTRAT,		"DIOSTRAT" }, \
+	{ XFS_TRANS_WRITEID,		"WRITEID" }, \
+	{ XFS_TRANS_ADDAFORK,		"ADDAFORK" }, \
+	{ XFS_TRANS_ATTRINVAL,		"ATTRINVAL" }, \
+	{ XFS_TRANS_ATRUNCATE,		"ATRUNCATE" }, \
+	{ XFS_TRANS_ATTR_SET,		"ATTR_SET" }, \
+	{ XFS_TRANS_ATTR_RM,		"ATTR_RM" }, \
+	{ XFS_TRANS_ATTR_FLAG,		"ATTR_FLAG" }, \
+	{ XFS_TRANS_CLEAR_AGI_BUCKET,	"CLEAR_AGI_BUCKET" }, \
+	{ XFS_TRANS_QM_SBCHANGE,	"QM_SBCHANGE" }, \
+	{ XFS_TRANS_QM_QUOTAOFF,	"QM_QUOTAOFF" }, \
+	{ XFS_TRANS_QM_DQALLOC,		"QM_DQALLOC" }, \
+	{ XFS_TRANS_QM_SETQLIM,		"QM_SETQLIM" }, \
+	{ XFS_TRANS_QM_DQCLUSTER,	"QM_DQCLUSTER" }, \
+	{ XFS_TRANS_QM_QINOCREATE,	"QM_QINOCREATE" }, \
+	{ XFS_TRANS_QM_QUOTAOFF_END,	"QM_QOFF_END" }, \
+	{ XFS_TRANS_SB_UNIT,		"SB_UNIT" }, \
+	{ XFS_TRANS_FSYNC_TS,		"FSYNC_TS" }, \
+	{ XFS_TRANS_GROWFSRT_ALLOC,	"GROWFSRT_ALLOC" }, \
+	{ XFS_TRANS_GROWFSRT_ZERO,	"GROWFSRT_ZERO" }, \
+	{ XFS_TRANS_GROWFSRT_FREE,	"GROWFSRT_FREE" }, \
+	{ XFS_TRANS_SWAPEXT,		"SWAPEXT" }, \
+	{ XFS_TRANS_SB_COUNT,		"SB_COUNT" }, \
+	{ XFS_TRANS_CHECKPOINT,		"CHECKPOINT" }, \
+	{ XFS_TRANS_DUMMY1,		"DUMMY1" }, \
+	{ XFS_TRANS_DUMMY2,		"DUMMY2" }, \
+	{ XLOG_UNMOUNT_REC_TYPE,	"UNMOUNT" }
+
+/*
+ * This structure is used to track log items associated with
+ * a transaction.  It points to the log item and keeps some
+ * flags to track the state of the log item.  It also tracks
+ * the amount of space needed to log the item it describes
+ * once we get to commit processing (see xfs_trans_commit()).
+ */
+struct xfs_log_item_desc {
+	struct xfs_log_item	*lid_item;
+	struct list_head	lid_trans;
+	unsigned char		lid_flags;
+};
+
+#define XFS_LID_DIRTY		0x1
+
+/*
+ * Values for t_flags.
+ */
+#define	XFS_TRANS_DIRTY		0x01	/* something needs to be logged */
+#define	XFS_TRANS_SB_DIRTY	0x02	/* superblock is modified */
+#define	XFS_TRANS_PERM_LOG_RES	0x04	/* xact took a permanent log res */
+#define	XFS_TRANS_SYNC		0x08	/* make commit synchronous */
+#define XFS_TRANS_DQ_DIRTY	0x10	/* at least one dquot in trx dirty */
+#define XFS_TRANS_RESERVE	0x20    /* OK to use reserved data blocks */
+#define XFS_TRANS_FREEZE_PROT	0x40	/* Transaction has elevated writer
+					   count in superblock */
+
+/*
+ * Values for call flags parameter.
+ */
+#define	XFS_TRANS_RELEASE_LOG_RES	0x4
+#define	XFS_TRANS_ABORT			0x8
+
+/*
+ * Field values for xfs_trans_mod_sb.
+ */
+#define	XFS_TRANS_SB_ICOUNT		0x00000001
+#define	XFS_TRANS_SB_IFREE		0x00000002
+#define	XFS_TRANS_SB_FDBLOCKS		0x00000004
+#define	XFS_TRANS_SB_RES_FDBLOCKS	0x00000008
+#define	XFS_TRANS_SB_FREXTENTS		0x00000010
+#define	XFS_TRANS_SB_RES_FREXTENTS	0x00000020
+#define	XFS_TRANS_SB_DBLOCKS		0x00000040
+#define	XFS_TRANS_SB_AGCOUNT		0x00000080
+#define	XFS_TRANS_SB_IMAXPCT		0x00000100
+#define	XFS_TRANS_SB_REXTSIZE		0x00000200
+#define	XFS_TRANS_SB_RBMBLOCKS		0x00000400
+#define	XFS_TRANS_SB_RBLOCKS		0x00000800
+#define	XFS_TRANS_SB_REXTENTS		0x00001000
+#define	XFS_TRANS_SB_REXTSLOG		0x00002000
+
+/*
+ * Here we centralize the specification of XFS meta-data buffer
+ * reference count values.  This determine how hard the buffer
+ * cache tries to hold onto the buffer.
+ */
+#define	XFS_AGF_REF		4
+#define	XFS_AGI_REF		4
+#define	XFS_AGFL_REF		3
+#define	XFS_INO_BTREE_REF	3
+#define	XFS_ALLOC_BTREE_REF	2
+#define	XFS_BMAP_BTREE_REF	2
+#define	XFS_DIR_BTREE_REF	2
+#define	XFS_INO_REF		2
+#define	XFS_ATTR_BTREE_REF	1
+#define	XFS_DQUOT_REF		1
+
+/*
+ * Flags for xfs_trans_ichgtime().
+ */
+#define	XFS_ICHGTIME_MOD	0x1	/* data fork modification timestamp */
+#define	XFS_ICHGTIME_CHG	0x2	/* inode field change timestamp */
+#define	XFS_ICHGTIME_CREATE	0x4	/* inode create timestamp */
+
+
+/*
+ * Inode Log Item Format definitions.
+ *
+ * This is the structure used to lay out an inode log item in the
+ * log.  The size of the inline data/extents/b-tree root to be logged
+ * (if any) is indicated in the ilf_dsize field.  Changes to this structure
+ * must be added on to the end.
+ */
+typedef struct xfs_inode_log_format {
+	__uint16_t		ilf_type;	/* inode log item type */
+	__uint16_t		ilf_size;	/* size of this item */
+	__uint32_t		ilf_fields;	/* flags for fields logged */
+	__uint16_t		ilf_asize;	/* size of attr d/ext/root */
+	__uint16_t		ilf_dsize;	/* size of data/ext/root */
+	__uint64_t		ilf_ino;	/* inode number */
+	union {
+		__uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
+		uuid_t		ilfu_uuid;	/* mount point value */
+	} ilf_u;
+	__int64_t		ilf_blkno;	/* blkno of inode buffer */
+	__int32_t		ilf_len;	/* len of inode buffer */
+	__int32_t		ilf_boffset;	/* off of inode in buffer */
+} xfs_inode_log_format_t;
+
+typedef struct xfs_inode_log_format_32 {
+	__uint16_t		ilf_type;	/* inode log item type */
+	__uint16_t		ilf_size;	/* size of this item */
+	__uint32_t		ilf_fields;	/* flags for fields logged */
+	__uint16_t		ilf_asize;	/* size of attr d/ext/root */
+	__uint16_t		ilf_dsize;	/* size of data/ext/root */
+	__uint64_t		ilf_ino;	/* inode number */
+	union {
+		__uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
+		uuid_t		ilfu_uuid;	/* mount point value */
+	} ilf_u;
+	__int64_t		ilf_blkno;	/* blkno of inode buffer */
+	__int32_t		ilf_len;	/* len of inode buffer */
+	__int32_t		ilf_boffset;	/* off of inode in buffer */
+} __attribute__((packed)) xfs_inode_log_format_32_t;
+
+typedef struct xfs_inode_log_format_64 {
+	__uint16_t		ilf_type;	/* inode log item type */
+	__uint16_t		ilf_size;	/* size of this item */
+	__uint32_t		ilf_fields;	/* flags for fields logged */
+	__uint16_t		ilf_asize;	/* size of attr d/ext/root */
+	__uint16_t		ilf_dsize;	/* size of data/ext/root */
+	__uint32_t		ilf_pad;	/* pad for 64 bit boundary */
+	__uint64_t		ilf_ino;	/* inode number */
+	union {
+		__uint32_t	ilfu_rdev;	/* rdev value for dev inode*/
+		uuid_t		ilfu_uuid;	/* mount point value */
+	} ilf_u;
+	__int64_t		ilf_blkno;	/* blkno of inode buffer */
+	__int32_t		ilf_len;	/* len of inode buffer */
+	__int32_t		ilf_boffset;	/* off of inode in buffer */
+} xfs_inode_log_format_64_t;
+
+/*
+ * Flags for xfs_trans_log_inode flags field.
+ */
+#define	XFS_ILOG_CORE	0x001	/* log standard inode fields */
+#define	XFS_ILOG_DDATA	0x002	/* log i_df.if_data */
+#define	XFS_ILOG_DEXT	0x004	/* log i_df.if_extents */
+#define	XFS_ILOG_DBROOT	0x008	/* log i_df.i_broot */
+#define	XFS_ILOG_DEV	0x010	/* log the dev field */
+#define	XFS_ILOG_UUID	0x020	/* log the uuid field */
+#define	XFS_ILOG_ADATA	0x040	/* log i_af.if_data */
+#define	XFS_ILOG_AEXT	0x080	/* log i_af.if_extents */
+#define	XFS_ILOG_ABROOT	0x100	/* log i_af.i_broot */
+
+
+/*
+ * The timestamps are dirty, but not necessarily anything else in the inode
+ * core.  Unlike the other fields above this one must never make it to disk
+ * in the ilf_fields of the inode_log_format, but is purely store in-memory in
+ * ili_fields in the inode_log_item.
+ */
+#define XFS_ILOG_TIMESTAMP	0x4000
+
+#define	XFS_ILOG_NONCORE	(XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
+				 XFS_ILOG_DBROOT | XFS_ILOG_DEV | \
+				 XFS_ILOG_UUID | XFS_ILOG_ADATA | \
+				 XFS_ILOG_AEXT | XFS_ILOG_ABROOT)
+
+#define	XFS_ILOG_DFORK		(XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
+				 XFS_ILOG_DBROOT)
+
+#define	XFS_ILOG_AFORK		(XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
+				 XFS_ILOG_ABROOT)
+
+#define	XFS_ILOG_ALL		(XFS_ILOG_CORE | XFS_ILOG_DDATA | \
+				 XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \
+				 XFS_ILOG_DEV | XFS_ILOG_UUID | \
+				 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
+				 XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP)
+
+static inline int xfs_ilog_fbroot(int w)
+{
+	return (w == XFS_DATA_FORK ? XFS_ILOG_DBROOT : XFS_ILOG_ABROOT);
+}
+
+static inline int xfs_ilog_fext(int w)
+{
+	return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT);
+}
+
+static inline int xfs_ilog_fdata(int w)
+{
+	return (w == XFS_DATA_FORK ? XFS_ILOG_DDATA : XFS_ILOG_ADATA);
+}
+
+/*
+ * Incore version of the on-disk inode core structures. We log this directly
+ * into the journal in host CPU format (for better or worse) and as such
+ * directly mirrors the xfs_dinode structure as it must contain all the same
+ * information.
+ */
+typedef struct xfs_ictimestamp {
+	__int32_t	t_sec;		/* timestamp seconds */
+	__int32_t	t_nsec;		/* timestamp nanoseconds */
+} xfs_ictimestamp_t;
+
+/*
+ * NOTE:  This structure must be kept identical to struct xfs_dinode
+ *	  in xfs_dinode.h except for the endianness annotations.
+ */
+typedef struct xfs_icdinode {
+	__uint16_t	di_magic;	/* inode magic # = XFS_DINODE_MAGIC */
+	__uint16_t	di_mode;	/* mode and type of file */
+	__int8_t	di_version;	/* inode version */
+	__int8_t	di_format;	/* format of di_c data */
+	__uint16_t	di_onlink;	/* old number of links to file */
+	__uint32_t	di_uid;		/* owner's user id */
+	__uint32_t	di_gid;		/* owner's group id */
+	__uint32_t	di_nlink;	/* number of links to file */
+	__uint16_t	di_projid_lo;	/* lower part of owner's project id */
+	__uint16_t	di_projid_hi;	/* higher part of owner's project id */
+	__uint8_t	di_pad[6];	/* unused, zeroed space */
+	__uint16_t	di_flushiter;	/* incremented on flush */
+	xfs_ictimestamp_t di_atime;	/* time last accessed */
+	xfs_ictimestamp_t di_mtime;	/* time last modified */
+	xfs_ictimestamp_t di_ctime;	/* time created/inode modified */
+	xfs_fsize_t	di_size;	/* number of bytes in file */
+	xfs_drfsbno_t	di_nblocks;	/* # of direct & btree blocks used */
+	xfs_extlen_t	di_extsize;	/* basic/minimum extent size for file */
+	xfs_extnum_t	di_nextents;	/* number of extents in data fork */
+	xfs_aextnum_t	di_anextents;	/* number of extents in attribute fork*/
+	__uint8_t	di_forkoff;	/* attr fork offs, <<3 for 64b align */
+	__int8_t	di_aformat;	/* format of attr fork's data */
+	__uint32_t	di_dmevmask;	/* DMIG event mask */
+	__uint16_t	di_dmstate;	/* DMIG state info */
+	__uint16_t	di_flags;	/* random flags, XFS_DIFLAG_... */
+	__uint32_t	di_gen;		/* generation number */
+
+	/* di_next_unlinked is the only non-core field in the old dinode */
+	xfs_agino_t	di_next_unlinked;/* agi unlinked list ptr */
+
+	/* start of the extended dinode, writable fields */
+	__uint32_t	di_crc;		/* CRC of the inode */
+	__uint64_t	di_changecount;	/* number of attribute changes */
+	xfs_lsn_t	di_lsn;		/* flush sequence */
+	__uint64_t	di_flags2;	/* more random flags */
+	__uint8_t	di_pad2[16];	/* more padding for future expansion */
+
+	/* fields only written to during inode creation */
+	xfs_ictimestamp_t di_crtime;	/* time created */
+	xfs_ino_t	di_ino;		/* inode number */
+	uuid_t		di_uuid;	/* UUID of the filesystem */
+
+	/* structure must be padded to 64 bit alignment */
+} xfs_icdinode_t;
+
+static inline uint xfs_icdinode_size(int version)
+{
+	if (version == 3)
+		return sizeof(struct xfs_icdinode);
+	return offsetof(struct xfs_icdinode, di_next_unlinked);
+}
+
+/*
+ * Buffer Log Format defintions
+ *
+ * These are the physical dirty bitmap defintions for the log format structure.
+ */
+#define	XFS_BLF_CHUNK		128
+#define	XFS_BLF_SHIFT		7
+#define	BIT_TO_WORD_SHIFT	5
+#define	NBWORD			(NBBY * sizeof(unsigned int))
+
+/*
+ * This flag indicates that the buffer contains on disk inodes
+ * and requires special recovery handling.
+ */
+#define	XFS_BLF_INODE_BUF	(1<<0)
+
+/*
+ * This flag indicates that the buffer should not be replayed
+ * during recovery because its blocks are being freed.
+ */
+#define	XFS_BLF_CANCEL		(1<<1)
+
+/*
+ * This flag indicates that the buffer contains on disk
+ * user or group dquots and may require special recovery handling.
+ */
+#define	XFS_BLF_UDQUOT_BUF	(1<<2)
+#define XFS_BLF_PDQUOT_BUF	(1<<3)
+#define	XFS_BLF_GDQUOT_BUF	(1<<4)
+
+/*
+ * This is the structure used to lay out a buf log item in the
+ * log.  The data map describes which 128 byte chunks of the buffer
+ * have been logged.
+ */
+#define XFS_BLF_DATAMAP_SIZE	((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / NBWORD)
+
+typedef struct xfs_buf_log_format {
+	unsigned short	blf_type;	/* buf log item type indicator */
+	unsigned short	blf_size;	/* size of this item */
+	ushort		blf_flags;	/* misc state */
+	ushort		blf_len;	/* number of blocks in this buf */
+	__int64_t	blf_blkno;	/* starting blkno of this buf */
+	unsigned int	blf_map_size;	/* used size of data bitmap in words */
+	unsigned int	blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */
+} xfs_buf_log_format_t;
+
+/*
+ * All buffers now need to tell recovery where the magic number
+ * is so that it can verify and calculate the CRCs on the buffer correctly
+ * once the changes have been replayed into the buffer.
+ *
+ * The type value is held in the upper 5 bits of the blf_flags field, which is
+ * an unsigned 16 bit field. Hence we need to shift it 11 bits up and down.
+ */
+#define XFS_BLFT_BITS	5
+#define XFS_BLFT_SHIFT	11
+#define XFS_BLFT_MASK	(((1 << XFS_BLFT_BITS) - 1) << XFS_BLFT_SHIFT)
+
+enum xfs_blft {
+	XFS_BLFT_UNKNOWN_BUF = 0,
+	XFS_BLFT_UDQUOT_BUF,
+	XFS_BLFT_PDQUOT_BUF,
+	XFS_BLFT_GDQUOT_BUF,
+	XFS_BLFT_BTREE_BUF,
+	XFS_BLFT_AGF_BUF,
+	XFS_BLFT_AGFL_BUF,
+	XFS_BLFT_AGI_BUF,
+	XFS_BLFT_DINO_BUF,
+	XFS_BLFT_SYMLINK_BUF,
+	XFS_BLFT_DIR_BLOCK_BUF,
+	XFS_BLFT_DIR_DATA_BUF,
+	XFS_BLFT_DIR_FREE_BUF,
+	XFS_BLFT_DIR_LEAF1_BUF,
+	XFS_BLFT_DIR_LEAFN_BUF,
+	XFS_BLFT_DA_NODE_BUF,
+	XFS_BLFT_ATTR_LEAF_BUF,
+	XFS_BLFT_ATTR_RMT_BUF,
+	XFS_BLFT_SB_BUF,
+	XFS_BLFT_MAX_BUF = (1 << XFS_BLFT_BITS),
+};
+
+static inline void
+xfs_blft_to_flags(struct xfs_buf_log_format *blf, enum xfs_blft type)
+{
+	ASSERT(type > XFS_BLFT_UNKNOWN_BUF && type < XFS_BLFT_MAX_BUF);
+	blf->blf_flags &= ~XFS_BLFT_MASK;
+	blf->blf_flags |= ((type << XFS_BLFT_SHIFT) & XFS_BLFT_MASK);
+}
+
+static inline __uint16_t
+xfs_blft_from_flags(struct xfs_buf_log_format *blf)
+{
+	return (blf->blf_flags & XFS_BLFT_MASK) >> XFS_BLFT_SHIFT;
+}
+
+/*
+ * EFI/EFD log format definitions
+ */
+typedef struct xfs_extent {
+	xfs_dfsbno_t	ext_start;
+	xfs_extlen_t	ext_len;
+} xfs_extent_t;
+
+/*
+ * Since an xfs_extent_t has types (start:64, len: 32)
+ * there are different alignments on 32 bit and 64 bit kernels.
+ * So we provide the different variants for use by a
+ * conversion routine.
+ */
+typedef struct xfs_extent_32 {
+	__uint64_t	ext_start;
+	__uint32_t	ext_len;
+} __attribute__((packed)) xfs_extent_32_t;
+
+typedef struct xfs_extent_64 {
+	__uint64_t	ext_start;
+	__uint32_t	ext_len;
+	__uint32_t	ext_pad;
+} xfs_extent_64_t;
+
+/*
+ * This is the structure used to lay out an efi log item in the
+ * log.  The efi_extents field is a variable size array whose
+ * size is given by efi_nextents.
+ */
+typedef struct xfs_efi_log_format {
+	__uint16_t		efi_type;	/* efi log item type */
+	__uint16_t		efi_size;	/* size of this item */
+	__uint32_t		efi_nextents;	/* # extents to free */
+	__uint64_t		efi_id;		/* efi identifier */
+	xfs_extent_t		efi_extents[1];	/* array of extents to free */
+} xfs_efi_log_format_t;
+
+typedef struct xfs_efi_log_format_32 {
+	__uint16_t		efi_type;	/* efi log item type */
+	__uint16_t		efi_size;	/* size of this item */
+	__uint32_t		efi_nextents;	/* # extents to free */
+	__uint64_t		efi_id;		/* efi identifier */
+	xfs_extent_32_t		efi_extents[1];	/* array of extents to free */
+} __attribute__((packed)) xfs_efi_log_format_32_t;
+
+typedef struct xfs_efi_log_format_64 {
+	__uint16_t		efi_type;	/* efi log item type */
+	__uint16_t		efi_size;	/* size of this item */
+	__uint32_t		efi_nextents;	/* # extents to free */
+	__uint64_t		efi_id;		/* efi identifier */
+	xfs_extent_64_t		efi_extents[1];	/* array of extents to free */
+} xfs_efi_log_format_64_t;
+
+/*
+ * This is the structure used to lay out an efd log item in the
+ * log.  The efd_extents array is a variable size array whose
+ * size is given by efd_nextents;
+ */
+typedef struct xfs_efd_log_format {
+	__uint16_t		efd_type;	/* efd log item type */
+	__uint16_t		efd_size;	/* size of this item */
+	__uint32_t		efd_nextents;	/* # of extents freed */
+	__uint64_t		efd_efi_id;	/* id of corresponding efi */
+	xfs_extent_t		efd_extents[1];	/* array of extents freed */
+} xfs_efd_log_format_t;
+
+typedef struct xfs_efd_log_format_32 {
+	__uint16_t		efd_type;	/* efd log item type */
+	__uint16_t		efd_size;	/* size of this item */
+	__uint32_t		efd_nextents;	/* # of extents freed */
+	__uint64_t		efd_efi_id;	/* id of corresponding efi */
+	xfs_extent_32_t		efd_extents[1];	/* array of extents freed */
+} __attribute__((packed)) xfs_efd_log_format_32_t;
+
+typedef struct xfs_efd_log_format_64 {
+	__uint16_t		efd_type;	/* efd log item type */
+	__uint16_t		efd_size;	/* size of this item */
+	__uint32_t		efd_nextents;	/* # of extents freed */
+	__uint64_t		efd_efi_id;	/* id of corresponding efi */
+	xfs_extent_64_t		efd_extents[1];	/* array of extents freed */
+} xfs_efd_log_format_64_t;
+
+/*
+ * Dquot Log format definitions.
+ *
+ * The first two fields must be the type and size fitting into
+ * 32 bits : log_recovery code assumes that.
+ */
+typedef struct xfs_dq_logformat {
+	__uint16_t		qlf_type;      /* dquot log item type */
+	__uint16_t		qlf_size;      /* size of this item */
+	xfs_dqid_t		qlf_id;	       /* usr/grp/proj id : 32 bits */
+	__int64_t		qlf_blkno;     /* blkno of dquot buffer */
+	__int32_t		qlf_len;       /* len of dquot buffer */
+	__uint32_t		qlf_boffset;   /* off of dquot in buffer */
+} xfs_dq_logformat_t;
+
+/*
+ * log format struct for QUOTAOFF records.
+ * The first two fields must be the type and size fitting into
+ * 32 bits : log_recovery code assumes that.
+ * We write two LI_QUOTAOFF logitems per quotaoff, the last one keeps a pointer
+ * to the first and ensures that the first logitem is taken out of the AIL
+ * only when the last one is securely committed.
+ */
+typedef struct xfs_qoff_logformat {
+	unsigned short		qf_type;	/* quotaoff log item type */
+	unsigned short		qf_size;	/* size of this item */
+	unsigned int		qf_flags;	/* USR and/or GRP */
+	char			qf_pad[12];	/* padding for future */
+} xfs_qoff_logformat_t;
+
+
+/*
+ * Disk quotas status in m_qflags, and also sb_qflags. 16 bits.
+ */
+#define XFS_UQUOTA_ACCT	0x0001  /* user quota accounting ON */
+#define XFS_UQUOTA_ENFD	0x0002  /* user quota limits enforced */
+#define XFS_UQUOTA_CHKD	0x0004  /* quotacheck run on usr quotas */
+#define XFS_PQUOTA_ACCT	0x0008  /* project quota accounting ON */
+#define XFS_OQUOTA_ENFD	0x0010  /* other (grp/prj) quota limits enforced */
+#define XFS_OQUOTA_CHKD	0x0020  /* quotacheck run on other (grp/prj) quotas */
+#define XFS_GQUOTA_ACCT	0x0040  /* group quota accounting ON */
+
+/*
+ * Conversion to and from the combined OQUOTA flag (if necessary)
+ * is done only in xfs_sb_qflags_to_disk() and xfs_sb_qflags_from_disk()
+ */
+#define XFS_GQUOTA_ENFD	0x0080  /* group quota limits enforced */
+#define XFS_GQUOTA_CHKD	0x0100  /* quotacheck run on group quotas */
+#define XFS_PQUOTA_ENFD	0x0200  /* project quota limits enforced */
+#define XFS_PQUOTA_CHKD	0x0400  /* quotacheck run on project quotas */
+
+#define XFS_ALL_QUOTA_ACCT	\
+		(XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT)
+#define XFS_ALL_QUOTA_ENFD	\
+		(XFS_UQUOTA_ENFD | XFS_GQUOTA_ENFD | XFS_PQUOTA_ENFD)
+#define XFS_ALL_QUOTA_CHKD	\
+		(XFS_UQUOTA_CHKD | XFS_GQUOTA_CHKD | XFS_PQUOTA_CHKD)
+
+#define XFS_MOUNT_QUOTA_ALL	(XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\
+				 XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\
+				 XFS_GQUOTA_ENFD|XFS_GQUOTA_CHKD|\
+				 XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD|\
+				 XFS_PQUOTA_CHKD)
+
+/*
+ * Inode create log item structure
+ *
+ * Log recovery assumes the first two entries are the type and size and they fit
+ * in 32 bits. Also in host order (ugh) so they have to be 32 bit aligned so
+ * decoding can be done correctly.
+ */
+struct xfs_icreate_log {
+	__uint16_t	icl_type;	/* type of log format structure */
+	__uint16_t	icl_size;	/* size of log format structure */
+	__be32		icl_ag;		/* ag being allocated in */
+	__be32		icl_agbno;	/* start block of inode range */
+	__be32		icl_count;	/* number of inodes to initialise */
+	__be32		icl_isize;	/* size of inodes */
+	__be32		icl_length;	/* length of extent to initialise */
+	__be32		icl_gen;	/* inode generation number to use */
+};
+
+int	xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes);
+int	xfs_log_calc_minimum_size(struct xfs_mount *);
+
+
+#endif /* __XFS_LOG_FORMAT_H__ */
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index b9ea262..136654b 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -24,51 +24,13 @@
 struct xfs_mount;
 
 /*
- * Macros, structures, prototypes for internal log manager use.
+ * Flags for log structure
  */
-
-#define XLOG_MIN_ICLOGS		2
-#define XLOG_MAX_ICLOGS		8
-#define XLOG_HEADER_MAGIC_NUM	0xFEEDbabe	/* Invalid cycle number */
-#define XLOG_VERSION_1		1
-#define XLOG_VERSION_2		2		/* Large IClogs, Log sunit */
-#define XLOG_VERSION_OKBITS	(XLOG_VERSION_1 | XLOG_VERSION_2)
-#define XLOG_MIN_RECORD_BSIZE	(16*1024)	/* eventually 32k */
-#define XLOG_BIG_RECORD_BSIZE	(32*1024)	/* 32k buffers */
-#define XLOG_MAX_RECORD_BSIZE	(256*1024)
-#define XLOG_HEADER_CYCLE_SIZE	(32*1024)	/* cycle data in header */
-#define XLOG_MIN_RECORD_BSHIFT	14		/* 16384 == 1 << 14 */
-#define XLOG_BIG_RECORD_BSHIFT	15		/* 32k == 1 << 15 */
-#define XLOG_MAX_RECORD_BSHIFT	18		/* 256k == 1 << 18 */
-#define XLOG_BTOLSUNIT(log, b)  (((b)+(log)->l_mp->m_sb.sb_logsunit-1) / \
-                                 (log)->l_mp->m_sb.sb_logsunit)
-#define XLOG_LSUNITTOB(log, su) ((su) * (log)->l_mp->m_sb.sb_logsunit)
-
-#define XLOG_HEADER_SIZE	512
-
-#define XLOG_REC_SHIFT(log) \
-	BTOBB(1 << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
-	 XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
-#define XLOG_TOTAL_REC_SHIFT(log) \
-	BTOBB(XLOG_MAX_ICLOGS << (xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? \
-	 XLOG_MAX_RECORD_BSHIFT : XLOG_BIG_RECORD_BSHIFT))
-
-static inline xfs_lsn_t xlog_assign_lsn(uint cycle, uint block)
-{
-	return ((xfs_lsn_t)cycle << 32) | block;
-}
-
-static inline uint xlog_get_cycle(char *ptr)
-{
-	if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM)
-		return be32_to_cpu(*((__be32 *)ptr + 1));
-	else
-		return be32_to_cpu(*(__be32 *)ptr);
-}
-
-#define BLK_AVG(blk1, blk2)	((blk1+blk2) >> 1)
-
-#ifdef __KERNEL__
+#define XLOG_ACTIVE_RECOVERY	0x2	/* in the middle of recovery */
+#define	XLOG_RECOVERY_NEEDED	0x4	/* log was recovered */
+#define XLOG_IO_ERROR		0x8	/* log hit an I/O error, and being
+					   shutdown */
+#define XLOG_TAIL_WARN		0x10	/* log tail verify warning issued */
 
 /*
  * get client id from packed copy.
@@ -101,28 +63,8 @@
 #define XLOG_STATE_IOERROR   0x0080 /* IO error happened in sync'ing log */
 #define XLOG_STATE_ALL	     0x7FFF /* All possible valid flags */
 #define XLOG_STATE_NOTUSED   0x8000 /* This IC log not being used */
-#endif	/* __KERNEL__ */
 
 /*
- * Flags to log operation header
- *
- * The first write of a new transaction will be preceded with a start
- * record, XLOG_START_TRANS.  Once a transaction is committed, a commit
- * record is written, XLOG_COMMIT_TRANS.  If a single region can not fit into
- * the remainder of the current active in-core log, it is split up into
- * multiple regions.  Each partial region will be marked with a
- * XLOG_CONTINUE_TRANS until the last one, which gets marked with XLOG_END_TRANS.
- *
- */
-#define XLOG_START_TRANS	0x01	/* Start a new transaction */
-#define XLOG_COMMIT_TRANS	0x02	/* Commit this transaction */
-#define XLOG_CONTINUE_TRANS	0x04	/* Cont this trans into new region */
-#define XLOG_WAS_CONT_TRANS	0x08	/* Cont this trans into new region */
-#define XLOG_END_TRANS		0x10	/* End a continued transaction */
-#define XLOG_UNMOUNT_TRANS	0x20	/* Unmount a filesystem transaction */
-
-#ifdef __KERNEL__
-/*
  * Flags to log ticket
  */
 #define XLOG_TIC_INITED		0x1	/* has been initialized */
@@ -132,22 +74,6 @@
 	{ XLOG_TIC_INITED,	"XLOG_TIC_INITED" }, \
 	{ XLOG_TIC_PERM_RESERV,	"XLOG_TIC_PERM_RESERV" }
 
-#endif	/* __KERNEL__ */
-
-#define XLOG_UNMOUNT_TYPE	0x556e	/* Un for Unmount */
-
-/*
- * Flags for log structure
- */
-#define XLOG_ACTIVE_RECOVERY	0x2	/* in the middle of recovery */
-#define	XLOG_RECOVERY_NEEDED	0x4	/* log was recovered */
-#define XLOG_IO_ERROR		0x8	/* log hit an I/O error, and being
-					   shutdown */
-#define XLOG_TAIL_WARN		0x10	/* log tail verify warning issued */
-
-typedef __uint32_t xlog_tid_t;
-
-#ifdef __KERNEL__
 /*
  * Below are states for covering allocation transactions.
  * By covering, we mean changing the h_tail_lsn in the last on-disk
@@ -223,7 +149,6 @@
 
 #define XLOG_COVER_OPS		5
 
-
 /* Ticket reservation region accounting */ 
 #define XLOG_TIC_LEN_MAX	15
 
@@ -258,64 +183,6 @@
 	xlog_res_t	   t_res_arr[XLOG_TIC_LEN_MAX];  /* array of res : 8 * 15 */ 
 } xlog_ticket_t;
 
-#endif
-
-
-typedef struct xlog_op_header {
-	__be32	   oh_tid;	/* transaction id of operation	:  4 b */
-	__be32	   oh_len;	/* bytes in data region		:  4 b */
-	__u8	   oh_clientid;	/* who sent me this		:  1 b */
-	__u8	   oh_flags;	/*				:  1 b */
-	__u16	   oh_res2;	/* 32 bit align			:  2 b */
-} xlog_op_header_t;
-
-
-/* valid values for h_fmt */
-#define XLOG_FMT_UNKNOWN  0
-#define XLOG_FMT_LINUX_LE 1
-#define XLOG_FMT_LINUX_BE 2
-#define XLOG_FMT_IRIX_BE  3
-
-/* our fmt */
-#ifdef XFS_NATIVE_HOST
-#define XLOG_FMT XLOG_FMT_LINUX_BE
-#else
-#define XLOG_FMT XLOG_FMT_LINUX_LE
-#endif
-
-typedef struct xlog_rec_header {
-	__be32	  h_magicno;	/* log record (LR) identifier		:  4 */
-	__be32	  h_cycle;	/* write cycle of log			:  4 */
-	__be32	  h_version;	/* LR version				:  4 */
-	__be32	  h_len;	/* len in bytes; should be 64-bit aligned: 4 */
-	__be64	  h_lsn;	/* lsn of this LR			:  8 */
-	__be64	  h_tail_lsn;	/* lsn of 1st LR w/ buffers not committed: 8 */
-	__le32	  h_crc;	/* crc of log record                    :  4 */
-	__be32	  h_prev_block; /* block number to previous LR		:  4 */
-	__be32	  h_num_logops;	/* number of log operations in this LR	:  4 */
-	__be32	  h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE];
-	/* new fields */
-	__be32    h_fmt;        /* format of log record                 :  4 */
-	uuid_t	  h_fs_uuid;    /* uuid of FS                           : 16 */
-	__be32	  h_size;	/* iclog size				:  4 */
-} xlog_rec_header_t;
-
-typedef struct xlog_rec_ext_header {
-	__be32	  xh_cycle;	/* write cycle of log			: 4 */
-	__be32	  xh_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; /*	: 256 */
-} xlog_rec_ext_header_t;
-
-#ifdef __KERNEL__
-
-/*
- * Quite misnamed, because this union lays out the actual on-disk log buffer.
- */
-typedef union xlog_in_core2 {
-	xlog_rec_header_t	hic_header;
-	xlog_rec_ext_header_t	hic_xheader;
-	char			hic_sector[XLOG_HEADER_SIZE];
-} xlog_in_core_2_t;
-
 /*
  * - A log record header is 512 bytes.  There is plenty of room to grow the
  *	xlog_rec_header_t into the reserved space.
@@ -411,14 +278,17 @@
 	struct xlog		*xc_log;
 	struct list_head	xc_cil;
 	spinlock_t		xc_cil_lock;
+
+	struct rw_semaphore	xc_ctx_lock ____cacheline_aligned_in_smp;
 	struct xfs_cil_ctx	*xc_ctx;
-	struct rw_semaphore	xc_ctx_lock;
+
+	spinlock_t		xc_push_lock ____cacheline_aligned_in_smp;
+	xfs_lsn_t		xc_push_seq;
 	struct list_head	xc_committing;
 	wait_queue_head_t	xc_commit_wait;
 	xfs_lsn_t		xc_current_sequence;
 	struct work_struct	xc_push_work;
-	xfs_lsn_t		xc_push_seq;
-};
+} ____cacheline_aligned_in_smp;
 
 /*
  * The amount of log space we allow the CIL to aggregate is difficult to size.
@@ -686,6 +556,5 @@
 	schedule();
 	remove_wait_queue(wq, &wait);
 }
-#endif	/* __KERNEL__ */
 
 #endif	/* __XFS_LOG_PRIV_H__ */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 7681b19..7c0c1fd 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -17,7 +17,7 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
-#include "xfs_types.h"
+#include "xfs_format.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
 #include "xfs_inum.h"
@@ -41,7 +41,6 @@
 #include "xfs_extfree_item.h"
 #include "xfs_trans_priv.h"
 #include "xfs_quota.h"
-#include "xfs_utils.h"
 #include "xfs_cksum.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
@@ -51,10 +50,12 @@
 #include "xfs_symlink.h"
 #include "xfs_da_btree.h"
 #include "xfs_dir2_format.h"
-#include "xfs_dir2_priv.h"
+#include "xfs_dir2.h"
 #include "xfs_attr_leaf.h"
 #include "xfs_attr_remote.h"
 
+#define BLK_AVG(blk1, blk2)	((blk1+blk2) >> 1)
+
 STATIC int
 xlog_find_zeroed(
 	struct xlog	*,
@@ -607,7 +608,7 @@
 
 /*
  * Head is defined to be the point of the log where the next log write
- * write could go.  This means that incomplete LR writes at the end are
+ * could go.  This means that incomplete LR writes at the end are
  * eliminated when calculating the head.  We aren't guaranteed that previous
  * LR have complete transactions.  We only know that a cycle number of
  * current cycle number -1 won't be present in the log if we start writing
@@ -963,6 +964,7 @@
 	}
 	if (!found) {
 		xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
+		xlog_put_bp(bp);
 		ASSERT(0);
 		return XFS_ERROR(EIO);
 	}
@@ -1144,7 +1146,8 @@
 		 */
 		xfs_warn(log->l_mp,
 			"Log inconsistent or not a log (last==0, first!=1)");
-		return XFS_ERROR(EINVAL);
+		error = XFS_ERROR(EINVAL);
+		goto bp_err;
 	}
 
 	/* we have a partially zeroed log */
@@ -1766,19 +1769,11 @@
 
 /*
  * Check to see whether the buffer being recovered has a corresponding
- * entry in the buffer cancel record table.  If it does then return 1
- * so that it will be cancelled, otherwise return 0.  If the buffer is
- * actually a buffer cancel item (XFS_BLF_CANCEL is set), then decrement
- * the refcount on the entry in the table and remove it from the table
- * if this is the last reference.
- *
- * We remove the cancel record from the table when we encounter its
- * last occurrence in the log so that if the same buffer is re-used
- * again after its last cancellation we actually replay the changes
- * made at that point.
+ * entry in the buffer cancel record table. If it is, return the cancel
+ * buffer structure to the caller.
  */
-STATIC int
-xlog_check_buffer_cancelled(
+STATIC struct xfs_buf_cancel *
+xlog_peek_buffer_cancelled(
 	struct xlog		*log,
 	xfs_daddr_t		blkno,
 	uint			len,
@@ -1787,22 +1782,16 @@
 	struct list_head	*bucket;
 	struct xfs_buf_cancel	*bcp;
 
-	if (log->l_buf_cancel_table == NULL) {
-		/*
-		 * There is nothing in the table built in pass one,
-		 * so this buffer must not be cancelled.
-		 */
+	if (!log->l_buf_cancel_table) {
+		/* empty table means no cancelled buffers in the log */
 		ASSERT(!(flags & XFS_BLF_CANCEL));
-		return 0;
+		return NULL;
 	}
 
-	/*
-	 * Search for an entry in the  cancel table that matches our buffer.
-	 */
 	bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno);
 	list_for_each_entry(bcp, bucket, bc_list) {
 		if (bcp->bc_blkno == blkno && bcp->bc_len == len)
-			goto found;
+			return bcp;
 	}
 
 	/*
@@ -1810,9 +1799,32 @@
 	 * that the buffer is NOT cancelled.
 	 */
 	ASSERT(!(flags & XFS_BLF_CANCEL));
-	return 0;
+	return NULL;
+}
 
-found:
+/*
+ * If the buffer is being cancelled then return 1 so that it will be cancelled,
+ * otherwise return 0.  If the buffer is actually a buffer cancel item
+ * (XFS_BLF_CANCEL is set), then decrement the refcount on the entry in the
+ * table and remove it from the table if this is the last reference.
+ *
+ * We remove the cancel record from the table when we encounter its last
+ * occurrence in the log so that if the same buffer is re-used again after its
+ * last cancellation we actually replay the changes made at that point.
+ */
+STATIC int
+xlog_check_buffer_cancelled(
+	struct xlog		*log,
+	xfs_daddr_t		blkno,
+	uint			len,
+	ushort			flags)
+{
+	struct xfs_buf_cancel	*bcp;
+
+	bcp = xlog_peek_buffer_cancelled(log, blkno, len, flags);
+	if (!bcp)
+		return 0;
+
 	/*
 	 * We've go a match, so return 1 so that the recovery of this buffer
 	 * is cancelled.  If this buffer is actually a buffer cancel log
@@ -1947,6 +1959,104 @@
 }
 
 /*
+ * V5 filesystems know the age of the buffer on disk being recovered. We can
+ * have newer objects on disk than we are replaying, and so for these cases we
+ * don't want to replay the current change as that will make the buffer contents
+ * temporarily invalid on disk.
+ *
+ * The magic number might not match the buffer type we are going to recover
+ * (e.g. reallocated blocks), so we ignore the xfs_buf_log_format flags.  Hence
+ * extract the LSN of the existing object in the buffer based on it's current
+ * magic number.  If we don't recognise the magic number in the buffer, then
+ * return a LSN of -1 so that the caller knows it was an unrecognised block and
+ * so can recover the buffer.
+ */
+static xfs_lsn_t
+xlog_recover_get_buf_lsn(
+	struct xfs_mount	*mp,
+	struct xfs_buf		*bp)
+{
+	__uint32_t		magic32;
+	__uint16_t		magic16;
+	__uint16_t		magicda;
+	void			*blk = bp->b_addr;
+
+	/* v4 filesystems always recover immediately */
+	if (!xfs_sb_version_hascrc(&mp->m_sb))
+		goto recover_immediately;
+
+	magic32 = be32_to_cpu(*(__be32 *)blk);
+	switch (magic32) {
+	case XFS_ABTB_CRC_MAGIC:
+	case XFS_ABTC_CRC_MAGIC:
+	case XFS_ABTB_MAGIC:
+	case XFS_ABTC_MAGIC:
+	case XFS_IBT_CRC_MAGIC:
+	case XFS_IBT_MAGIC:
+		return be64_to_cpu(
+				((struct xfs_btree_block *)blk)->bb_u.s.bb_lsn);
+	case XFS_BMAP_CRC_MAGIC:
+	case XFS_BMAP_MAGIC:
+		return be64_to_cpu(
+				((struct xfs_btree_block *)blk)->bb_u.l.bb_lsn);
+	case XFS_AGF_MAGIC:
+		return be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn);
+	case XFS_AGFL_MAGIC:
+		return be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn);
+	case XFS_AGI_MAGIC:
+		return be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn);
+	case XFS_SYMLINK_MAGIC:
+		return be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn);
+	case XFS_DIR3_BLOCK_MAGIC:
+	case XFS_DIR3_DATA_MAGIC:
+	case XFS_DIR3_FREE_MAGIC:
+		return be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn);
+	case XFS_ATTR3_RMT_MAGIC:
+		return be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn);
+	case XFS_SB_MAGIC:
+		return be64_to_cpu(((struct xfs_sb *)blk)->sb_lsn);
+	default:
+		break;
+	}
+
+	magicda = be16_to_cpu(((struct xfs_da_blkinfo *)blk)->magic);
+	switch (magicda) {
+	case XFS_DIR3_LEAF1_MAGIC:
+	case XFS_DIR3_LEAFN_MAGIC:
+	case XFS_DA3_NODE_MAGIC:
+		return be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
+	default:
+		break;
+	}
+
+	/*
+	 * We do individual object checks on dquot and inode buffers as they
+	 * have their own individual LSN records. Also, we could have a stale
+	 * buffer here, so we have to at least recognise these buffer types.
+	 *
+	 * A notd complexity here is inode unlinked list processing - it logs
+	 * the inode directly in the buffer, but we don't know which inodes have
+	 * been modified, and there is no global buffer LSN. Hence we need to
+	 * recover all inode buffer types immediately. This problem will be
+	 * fixed by logical logging of the unlinked list modifications.
+	 */
+	magic16 = be16_to_cpu(*(__be16 *)blk);
+	switch (magic16) {
+	case XFS_DQUOT_MAGIC:
+	case XFS_DINODE_MAGIC:
+		goto recover_immediately;
+	default:
+		break;
+	}
+
+	/* unknown buffer contents, recover immediately */
+
+recover_immediately:
+	return (xfs_lsn_t)-1;
+
+}
+
+/*
  * Validate the recovered buffer is of the correct type and attach the
  * appropriate buffer operations to them for writeback. Magic numbers are in a
  * few places:
@@ -1955,7 +2065,7 @@
  *	inside a struct xfs_da_blkinfo at the start of the buffer.
  */
 static void
-xlog_recovery_validate_buf_type(
+xlog_recover_validate_buf_type(
 	struct xfs_mount	*mp,
 	struct xfs_buf		*bp,
 	xfs_buf_log_format_t	*buf_f)
@@ -2234,7 +2344,7 @@
 	 * just avoid the verification stage for non-crc filesystems
 	 */
 	if (xfs_sb_version_hascrc(&mp->m_sb))
-		xlog_recovery_validate_buf_type(mp, bp, buf_f);
+		xlog_recover_validate_buf_type(mp, bp, buf_f);
 }
 
 /*
@@ -2366,7 +2476,7 @@
 
 /*
  * Perform a dquot buffer recovery.
- * Simple algorithm: if we have found a QUOTAOFF logitem of the same type
+ * Simple algorithm: if we have found a QUOTAOFF log item of the same type
  * (ie. USR or GRP), then just toss this buffer away; don't recover it.
  * Else, treat it as a regular buffer and do recovery.
  */
@@ -2425,20 +2535,22 @@
  * over the log during recovery.  During the first we build a table of
  * those buffers which have been cancelled, and during the second we
  * only replay those buffers which do not have corresponding cancel
- * records in the table.  See xlog_recover_do_buffer_pass[1,2] above
+ * records in the table.  See xlog_recover_buffer_pass[1,2] above
  * for more details on the implementation of the table of cancel records.
  */
 STATIC int
 xlog_recover_buffer_pass2(
 	struct xlog			*log,
 	struct list_head		*buffer_list,
-	struct xlog_recover_item	*item)
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			current_lsn)
 {
 	xfs_buf_log_format_t	*buf_f = item->ri_buf[0].i_addr;
 	xfs_mount_t		*mp = log->l_mp;
 	xfs_buf_t		*bp;
 	int			error;
 	uint			buf_flags;
+	xfs_lsn_t		lsn;
 
 	/*
 	 * In this pass we only want to recover all the buffers which have
@@ -2463,10 +2575,17 @@
 	error = bp->b_error;
 	if (error) {
 		xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)");
-		xfs_buf_relse(bp);
-		return error;
+		goto out_release;
 	}
 
+	/*
+	 * recover the buffer only if we get an LSN from it and it's less than
+	 * the lsn of the transaction we are replaying.
+	 */
+	lsn = xlog_recover_get_buf_lsn(mp, bp);
+	if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0)
+		goto out_release;
+
 	if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
 		error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
 	} else if (buf_f->blf_flags &
@@ -2476,7 +2595,7 @@
 		xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
 	}
 	if (error)
-		return XFS_ERROR(error);
+		goto out_release;
 
 	/*
 	 * Perform delayed write on the buffer.  Asynchronous writes will be
@@ -2505,6 +2624,7 @@
 		xfs_buf_delwri_queue(bp, buffer_list);
 	}
 
+out_release:
 	xfs_buf_relse(bp);
 	return error;
 }
@@ -2513,7 +2633,8 @@
 xlog_recover_inode_pass2(
 	struct xlog			*log,
 	struct list_head		*buffer_list,
-	struct xlog_recover_item	*item)
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			current_lsn)
 {
 	xfs_inode_log_format_t	*in_f;
 	xfs_mount_t		*mp = log->l_mp;
@@ -2593,6 +2714,20 @@
 	}
 
 	/*
+	 * If the inode has an LSN in it, recover the inode only if it's less
+	 * than the lsn of the transaction we are replaying.
+	 */
+	if (dip->di_version >= 3) {
+		xfs_lsn_t	lsn = be64_to_cpu(dip->di_lsn);
+
+		if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
+			trace_xfs_log_recover_inode_skip(log, in_f);
+			error = 0;
+			goto out_release;
+		}
+	}
+
+	/*
 	 * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
 	 * are transactional and if ordering is necessary we can determine that
 	 * more accurately by the LSN field in the V3 inode core. Don't trust
@@ -2781,6 +2916,8 @@
 	ASSERT(bp->b_target->bt_mount == mp);
 	bp->b_iodone = xlog_recover_iodone;
 	xfs_buf_delwri_queue(bp, buffer_list);
+
+out_release:
 	xfs_buf_relse(bp);
 error:
 	if (need_free)
@@ -2822,7 +2959,8 @@
 xlog_recover_dquot_pass2(
 	struct xlog			*log,
 	struct list_head		*buffer_list,
-	struct xlog_recover_item	*item)
+	struct xlog_recover_item	*item,
+	xfs_lsn_t			current_lsn)
 {
 	xfs_mount_t		*mp = log->l_mp;
 	xfs_buf_t		*bp;
@@ -2896,6 +3034,19 @@
 		return XFS_ERROR(EIO);
 	}
 
+	/*
+	 * If the dquot has an LSN in it, recover the dquot only if it's less
+	 * than the lsn of the transaction we are replaying.
+	 */
+	if (xfs_sb_version_hascrc(&mp->m_sb)) {
+		struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq;
+		xfs_lsn_t	lsn = be64_to_cpu(dqb->dd_lsn);
+
+		if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
+			goto out_release;
+		}
+	}
+
 	memcpy(ddq, recddq, item->ri_buf[1].i_len);
 	if (xfs_sb_version_hascrc(&mp->m_sb)) {
 		xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk),
@@ -2906,9 +3057,10 @@
 	ASSERT(bp->b_target->bt_mount == mp);
 	bp->b_iodone = xlog_recover_iodone;
 	xfs_buf_delwri_queue(bp, buffer_list);
-	xfs_buf_relse(bp);
 
-	return (0);
+out_release:
+	xfs_buf_relse(bp);
+	return 0;
 }
 
 /*
@@ -3116,6 +3268,106 @@
 	kmem_free(trans);
 }
 
+STATIC void
+xlog_recover_buffer_ra_pass2(
+	struct xlog                     *log,
+	struct xlog_recover_item        *item)
+{
+	struct xfs_buf_log_format	*buf_f = item->ri_buf[0].i_addr;
+	struct xfs_mount		*mp = log->l_mp;
+
+	if (xlog_peek_buffer_cancelled(log, buf_f->blf_blkno,
+			buf_f->blf_len, buf_f->blf_flags)) {
+		return;
+	}
+
+	xfs_buf_readahead(mp->m_ddev_targp, buf_f->blf_blkno,
+				buf_f->blf_len, NULL);
+}
+
+STATIC void
+xlog_recover_inode_ra_pass2(
+	struct xlog                     *log,
+	struct xlog_recover_item        *item)
+{
+	struct xfs_inode_log_format	ilf_buf;
+	struct xfs_inode_log_format	*ilfp;
+	struct xfs_mount		*mp = log->l_mp;
+	int			error;
+
+	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
+		ilfp = item->ri_buf[0].i_addr;
+	} else {
+		ilfp = &ilf_buf;
+		memset(ilfp, 0, sizeof(*ilfp));
+		error = xfs_inode_item_format_convert(&item->ri_buf[0], ilfp);
+		if (error)
+			return;
+	}
+
+	if (xlog_peek_buffer_cancelled(log, ilfp->ilf_blkno, ilfp->ilf_len, 0))
+		return;
+
+	xfs_buf_readahead(mp->m_ddev_targp, ilfp->ilf_blkno,
+				ilfp->ilf_len, &xfs_inode_buf_ra_ops);
+}
+
+STATIC void
+xlog_recover_dquot_ra_pass2(
+	struct xlog			*log,
+	struct xlog_recover_item	*item)
+{
+	struct xfs_mount	*mp = log->l_mp;
+	struct xfs_disk_dquot	*recddq;
+	struct xfs_dq_logformat	*dq_f;
+	uint			type;
+
+
+	if (mp->m_qflags == 0)
+		return;
+
+	recddq = item->ri_buf[1].i_addr;
+	if (recddq == NULL)
+		return;
+	if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot))
+		return;
+
+	type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP);
+	ASSERT(type);
+	if (log->l_quotaoffs_flag & type)
+		return;
+
+	dq_f = item->ri_buf[0].i_addr;
+	ASSERT(dq_f);
+	ASSERT(dq_f->qlf_len == 1);
+
+	xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno,
+			  XFS_FSB_TO_BB(mp, dq_f->qlf_len), NULL);
+}
+
+STATIC void
+xlog_recover_ra_pass2(
+	struct xlog			*log,
+	struct xlog_recover_item	*item)
+{
+	switch (ITEM_TYPE(item)) {
+	case XFS_LI_BUF:
+		xlog_recover_buffer_ra_pass2(log, item);
+		break;
+	case XFS_LI_INODE:
+		xlog_recover_inode_ra_pass2(log, item);
+		break;
+	case XFS_LI_DQUOT:
+		xlog_recover_dquot_ra_pass2(log, item);
+		break;
+	case XFS_LI_EFI:
+	case XFS_LI_EFD:
+	case XFS_LI_QUOTAOFF:
+	default:
+		break;
+	}
+}
+
 STATIC int
 xlog_recover_commit_pass1(
 	struct xlog			*log,
@@ -3155,15 +3407,18 @@
 
 	switch (ITEM_TYPE(item)) {
 	case XFS_LI_BUF:
-		return xlog_recover_buffer_pass2(log, buffer_list, item);
+		return xlog_recover_buffer_pass2(log, buffer_list, item,
+						 trans->r_lsn);
 	case XFS_LI_INODE:
-		return xlog_recover_inode_pass2(log, buffer_list, item);
+		return xlog_recover_inode_pass2(log, buffer_list, item,
+						 trans->r_lsn);
 	case XFS_LI_EFI:
 		return xlog_recover_efi_pass2(log, item, trans->r_lsn);
 	case XFS_LI_EFD:
 		return xlog_recover_efd_pass2(log, item);
 	case XFS_LI_DQUOT:
-		return xlog_recover_dquot_pass2(log, buffer_list, item);
+		return xlog_recover_dquot_pass2(log, buffer_list, item,
+						trans->r_lsn);
 	case XFS_LI_ICREATE:
 		return xlog_recover_do_icreate_pass2(log, buffer_list, item);
 	case XFS_LI_QUOTAOFF:
@@ -3177,6 +3432,26 @@
 	}
 }
 
+STATIC int
+xlog_recover_items_pass2(
+	struct xlog                     *log,
+	struct xlog_recover             *trans,
+	struct list_head                *buffer_list,
+	struct list_head                *item_list)
+{
+	struct xlog_recover_item	*item;
+	int				error = 0;
+
+	list_for_each_entry(item, item_list, ri_list) {
+		error = xlog_recover_commit_pass2(log, trans,
+					  buffer_list, item);
+		if (error)
+			return error;
+	}
+
+	return error;
+}
+
 /*
  * Perform the transaction.
  *
@@ -3189,9 +3464,16 @@
 	struct xlog_recover	*trans,
 	int			pass)
 {
-	int			error = 0, error2;
-	xlog_recover_item_t	*item;
-	LIST_HEAD		(buffer_list);
+	int				error = 0;
+	int				error2;
+	int				items_queued = 0;
+	struct xlog_recover_item	*item;
+	struct xlog_recover_item	*next;
+	LIST_HEAD			(buffer_list);
+	LIST_HEAD			(ra_list);
+	LIST_HEAD			(done_list);
+
+	#define XLOG_RECOVER_COMMIT_QUEUE_MAX 100
 
 	hlist_del(&trans->r_list);
 
@@ -3199,14 +3481,22 @@
 	if (error)
 		return error;
 
-	list_for_each_entry(item, &trans->r_itemq, ri_list) {
+	list_for_each_entry_safe(item, next, &trans->r_itemq, ri_list) {
 		switch (pass) {
 		case XLOG_RECOVER_PASS1:
 			error = xlog_recover_commit_pass1(log, trans, item);
 			break;
 		case XLOG_RECOVER_PASS2:
-			error = xlog_recover_commit_pass2(log, trans,
-							  &buffer_list, item);
+			xlog_recover_ra_pass2(log, item);
+			list_move_tail(&item->ri_list, &ra_list);
+			items_queued++;
+			if (items_queued >= XLOG_RECOVER_COMMIT_QUEUE_MAX) {
+				error = xlog_recover_items_pass2(log, trans,
+						&buffer_list, &ra_list);
+				list_splice_tail_init(&ra_list, &done_list);
+				items_queued = 0;
+			}
+
 			break;
 		default:
 			ASSERT(0);
@@ -3216,9 +3506,19 @@
 			goto out;
 	}
 
+out:
+	if (!list_empty(&ra_list)) {
+		if (!error)
+			error = xlog_recover_items_pass2(log, trans,
+					&buffer_list, &ra_list);
+		list_splice_tail_init(&ra_list, &done_list);
+	}
+
+	if (!list_empty(&done_list))
+		list_splice_init(&done_list, &trans->r_itemq);
+
 	xlog_recover_free_trans(trans);
 
-out:
 	error2 = xfs_buf_delwri_submit(&buffer_list);
 	return error ? error : error2;
 }
@@ -3376,7 +3676,7 @@
 	}
 
 	tp = xfs_trans_alloc(mp, 0);
-	error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0);
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
 	if (error)
 		goto abort_error;
 	efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
@@ -3482,8 +3782,7 @@
 	int		error;
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET);
-	error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp),
-				  0, 0, 0);
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_clearagi, 0, 0);
 	if (error)
 		goto out_abort;
 
diff --git a/fs/xfs/xfs_log_rlimit.c b/fs/xfs/xfs_log_rlimit.c
new file mode 100644
index 0000000..bbcec0b
--- /dev/null
+++ b/fs/xfs/xfs_log_rlimit.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2013 Jie Liu.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_ag.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_trans_space.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_da_btree.h"
+#include "xfs_attr_leaf.h"
+
+/*
+ * Calculate the maximum length in bytes that would be required for a local
+ * attribute value as large attributes out of line are not logged.
+ */
+STATIC int
+xfs_log_calc_max_attrsetm_res(
+	struct xfs_mount	*mp)
+{
+	int			size;
+	int			nblks;
+
+	size = xfs_attr_leaf_entsize_local_max(mp->m_sb.sb_blocksize) -
+	       MAXNAMELEN - 1;
+	nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
+	nblks += XFS_B_TO_FSB(mp, size);
+	nblks += XFS_NEXTENTADD_SPACE_RES(mp, size, XFS_ATTR_FORK);
+
+	return  M_RES(mp)->tr_attrsetm.tr_logres +
+		M_RES(mp)->tr_attrsetrt.tr_logres * nblks;
+}
+
+/*
+ * Iterate over the log space reservation table to figure out and return
+ * the maximum one in terms of the pre-calculated values which were done
+ * at mount time.
+ */
+STATIC void
+xfs_log_get_max_trans_res(
+	struct xfs_mount	*mp,
+	struct xfs_trans_res	*max_resp)
+{
+	struct xfs_trans_res	*resp;
+	struct xfs_trans_res	*end_resp;
+	int			log_space = 0;
+	int			attr_space;
+
+	attr_space = xfs_log_calc_max_attrsetm_res(mp);
+
+	resp = (struct xfs_trans_res *)M_RES(mp);
+	end_resp = (struct xfs_trans_res *)(M_RES(mp) + 1);
+	for (; resp < end_resp; resp++) {
+		int		tmp = resp->tr_logcount > 1 ?
+				      resp->tr_logres * resp->tr_logcount :
+				      resp->tr_logres;
+		if (log_space < tmp) {
+			log_space = tmp;
+			*max_resp = *resp;		/* struct copy */
+		}
+	}
+
+	if (attr_space > log_space) {
+		*max_resp = M_RES(mp)->tr_attrsetm;	/* struct copy */
+		max_resp->tr_logres = attr_space;
+	}
+}
+
+/*
+ * Calculate the minimum valid log size for the given superblock configuration.
+ * Used to calculate the minimum log size at mkfs time, and to determine if
+ * the log is large enough or not at mount time. Returns the minimum size in
+ * filesystem block size units.
+ */
+int
+xfs_log_calc_minimum_size(
+	struct xfs_mount	*mp)
+{
+	struct xfs_trans_res	tres = {0};
+	int			max_logres;
+	int			min_logblks = 0;
+	int			lsunit = 0;
+
+	xfs_log_get_max_trans_res(mp, &tres);
+
+	max_logres = xfs_log_calc_unit_res(mp, tres.tr_logres);
+	if (tres.tr_logcount > 1)
+		max_logres *= tres.tr_logcount;
+
+	if (xfs_sb_version_haslogv2(&mp->m_sb) && mp->m_sb.sb_logsunit > 1)
+		lsunit = BTOBB(mp->m_sb.sb_logsunit);
+
+	/*
+	 * Two factors should be taken into account for calculating the minimum
+	 * log space.
+	 * 1) The fundamental limitation is that no single transaction can be
+	 *    larger than half size of the log.
+	 *
+	 *    From mkfs.xfs, this is considered by the XFS_MIN_LOG_FACTOR
+	 *    define, which is set to 3. That means we can definitely fit
+	 *    maximally sized 2 transactions in the log. We'll use this same
+	 *    value here.
+	 *
+	 * 2) If the lsunit option is specified, a transaction requires 2 LSU
+	 *    for the reservation because there are two log writes that can
+	 *    require padding - the transaction data and the commit record which
+	 *    are written separately and both can require padding to the LSU.
+	 *    Consider that we can have an active CIL reservation holding 2*LSU,
+	 *    but the CIL is not over a push threshold, in this case, if we
+	 *    don't have enough log space for at one new transaction, which
+	 *    includes another 2*LSU in the reservation, we will run into dead
+	 *    loop situation in log space grant procedure. i.e.
+	 *    xlog_grant_head_wait().
+	 *
+	 *    Hence the log size needs to be able to contain two maximally sized
+	 *    and padded transactions, which is (2 * (2 * LSU + maxlres)).
+	 *
+	 * Also, the log size should be a multiple of the log stripe unit, round
+	 * it up to lsunit boundary if lsunit is specified.
+	 */
+	if (lsunit) {
+		min_logblks = roundup_64(BTOBB(max_logres), lsunit) +
+			      2 * lsunit;
+	} else
+		min_logblks = BTOBB(max_logres) + 2 * BBSIZE;
+	min_logblks *= XFS_MIN_LOG_FACTOR;
+
+	return XFS_BB_TO_FSB(mp, min_logblks);
+}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 2b0ba35..5dcc680 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -17,7 +17,7 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
-#include "xfs_types.h"
+#include "xfs_format.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
 #include "xfs_inum.h"
@@ -25,8 +25,10 @@
 #include "xfs_trans_priv.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_mount.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
@@ -40,7 +42,6 @@
 #include "xfs_error.h"
 #include "xfs_quota.h"
 #include "xfs_fsops.h"
-#include "xfs_utils.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
 #include "xfs_cksum.h"
@@ -59,69 +60,6 @@
 #define xfs_icsb_balance_counter_locked(mp, a, b)	do { } while (0)
 #endif
 
-static const struct {
-	short offset;
-	short type;	/* 0 = integer
-			 * 1 = binary / string (no translation)
-			 */
-} xfs_sb_info[] = {
-    { offsetof(xfs_sb_t, sb_magicnum),   0 },
-    { offsetof(xfs_sb_t, sb_blocksize),  0 },
-    { offsetof(xfs_sb_t, sb_dblocks),    0 },
-    { offsetof(xfs_sb_t, sb_rblocks),    0 },
-    { offsetof(xfs_sb_t, sb_rextents),   0 },
-    { offsetof(xfs_sb_t, sb_uuid),       1 },
-    { offsetof(xfs_sb_t, sb_logstart),   0 },
-    { offsetof(xfs_sb_t, sb_rootino),    0 },
-    { offsetof(xfs_sb_t, sb_rbmino),     0 },
-    { offsetof(xfs_sb_t, sb_rsumino),    0 },
-    { offsetof(xfs_sb_t, sb_rextsize),   0 },
-    { offsetof(xfs_sb_t, sb_agblocks),   0 },
-    { offsetof(xfs_sb_t, sb_agcount),    0 },
-    { offsetof(xfs_sb_t, sb_rbmblocks),  0 },
-    { offsetof(xfs_sb_t, sb_logblocks),  0 },
-    { offsetof(xfs_sb_t, sb_versionnum), 0 },
-    { offsetof(xfs_sb_t, sb_sectsize),   0 },
-    { offsetof(xfs_sb_t, sb_inodesize),  0 },
-    { offsetof(xfs_sb_t, sb_inopblock),  0 },
-    { offsetof(xfs_sb_t, sb_fname[0]),   1 },
-    { offsetof(xfs_sb_t, sb_blocklog),   0 },
-    { offsetof(xfs_sb_t, sb_sectlog),    0 },
-    { offsetof(xfs_sb_t, sb_inodelog),   0 },
-    { offsetof(xfs_sb_t, sb_inopblog),   0 },
-    { offsetof(xfs_sb_t, sb_agblklog),   0 },
-    { offsetof(xfs_sb_t, sb_rextslog),   0 },
-    { offsetof(xfs_sb_t, sb_inprogress), 0 },
-    { offsetof(xfs_sb_t, sb_imax_pct),   0 },
-    { offsetof(xfs_sb_t, sb_icount),     0 },
-    { offsetof(xfs_sb_t, sb_ifree),      0 },
-    { offsetof(xfs_sb_t, sb_fdblocks),   0 },
-    { offsetof(xfs_sb_t, sb_frextents),  0 },
-    { offsetof(xfs_sb_t, sb_uquotino),   0 },
-    { offsetof(xfs_sb_t, sb_gquotino),   0 },
-    { offsetof(xfs_sb_t, sb_qflags),     0 },
-    { offsetof(xfs_sb_t, sb_flags),      0 },
-    { offsetof(xfs_sb_t, sb_shared_vn),  0 },
-    { offsetof(xfs_sb_t, sb_inoalignmt), 0 },
-    { offsetof(xfs_sb_t, sb_unit),	 0 },
-    { offsetof(xfs_sb_t, sb_width),	 0 },
-    { offsetof(xfs_sb_t, sb_dirblklog),	 0 },
-    { offsetof(xfs_sb_t, sb_logsectlog), 0 },
-    { offsetof(xfs_sb_t, sb_logsectsize),0 },
-    { offsetof(xfs_sb_t, sb_logsunit),	 0 },
-    { offsetof(xfs_sb_t, sb_features2),	 0 },
-    { offsetof(xfs_sb_t, sb_bad_features2), 0 },
-    { offsetof(xfs_sb_t, sb_features_compat), 0 },
-    { offsetof(xfs_sb_t, sb_features_ro_compat), 0 },
-    { offsetof(xfs_sb_t, sb_features_incompat), 0 },
-    { offsetof(xfs_sb_t, sb_features_log_incompat), 0 },
-    { offsetof(xfs_sb_t, sb_crc),	 0 },
-    { offsetof(xfs_sb_t, sb_pad),	 0 },
-    { offsetof(xfs_sb_t, sb_pquotino),	 0 },
-    { offsetof(xfs_sb_t, sb_lsn),	 0 },
-    { sizeof(xfs_sb_t),			 0 }
-};
-
 static DEFINE_MUTEX(xfs_uuid_table_mutex);
 static int xfs_uuid_table_size;
 static uuid_t *xfs_uuid_table;
@@ -197,64 +135,6 @@
 }
 
 
-/*
- * Reference counting access wrappers to the perag structures.
- * Because we never free per-ag structures, the only thing we
- * have to protect against changes is the tree structure itself.
- */
-struct xfs_perag *
-xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
-{
-	struct xfs_perag	*pag;
-	int			ref = 0;
-
-	rcu_read_lock();
-	pag = radix_tree_lookup(&mp->m_perag_tree, agno);
-	if (pag) {
-		ASSERT(atomic_read(&pag->pag_ref) >= 0);
-		ref = atomic_inc_return(&pag->pag_ref);
-	}
-	rcu_read_unlock();
-	trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
-	return pag;
-}
-
-/*
- * search from @first to find the next perag with the given tag set.
- */
-struct xfs_perag *
-xfs_perag_get_tag(
-	struct xfs_mount	*mp,
-	xfs_agnumber_t		first,
-	int			tag)
-{
-	struct xfs_perag	*pag;
-	int			found;
-	int			ref;
-
-	rcu_read_lock();
-	found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
-					(void **)&pag, first, 1, tag);
-	if (found <= 0) {
-		rcu_read_unlock();
-		return NULL;
-	}
-	ref = atomic_inc_return(&pag->pag_ref);
-	rcu_read_unlock();
-	trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_);
-	return pag;
-}
-
-void
-xfs_perag_put(struct xfs_perag *pag)
-{
-	int	ref;
-
-	ASSERT(atomic_read(&pag->pag_ref) > 0);
-	ref = atomic_dec_return(&pag->pag_ref);
-	trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
-}
-
 STATIC void
 __xfs_free_perag(
 	struct rcu_head	*head)
@@ -307,184 +187,6 @@
 	return 0;
 }
 
-/*
- * Check the validity of the SB found.
- */
-STATIC int
-xfs_mount_validate_sb(
-	xfs_mount_t	*mp,
-	xfs_sb_t	*sbp,
-	bool		check_inprogress,
-	bool		check_version)
-{
-
-	/*
-	 * If the log device and data device have the
-	 * same device number, the log is internal.
-	 * Consequently, the sb_logstart should be non-zero.  If
-	 * we have a zero sb_logstart in this case, we may be trying to mount
-	 * a volume filesystem in a non-volume manner.
-	 */
-	if (sbp->sb_magicnum != XFS_SB_MAGIC) {
-		xfs_warn(mp, "bad magic number");
-		return XFS_ERROR(EWRONGFS);
-	}
-
-
-	if (!xfs_sb_good_version(sbp)) {
-		xfs_warn(mp, "bad version");
-		return XFS_ERROR(EWRONGFS);
-	}
-
-	if ((sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) &&
-			(sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD |
-				XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD))) {
-		xfs_notice(mp,
-"Super block has XFS_OQUOTA bits along with XFS_PQUOTA and/or XFS_GQUOTA bits.\n");
-		return XFS_ERROR(EFSCORRUPTED);
-	}
-
-	/*
-	 * Version 5 superblock feature mask validation. Reject combinations the
-	 * kernel cannot support up front before checking anything else. For
-	 * write validation, we don't need to check feature masks.
-	 */
-	if (check_version && XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) {
-		xfs_alert(mp,
-"Version 5 superblock detected. This kernel has EXPERIMENTAL support enabled!\n"
-"Use of these features in this kernel is at your own risk!");
-
-		if (xfs_sb_has_compat_feature(sbp,
-					XFS_SB_FEAT_COMPAT_UNKNOWN)) {
-			xfs_warn(mp,
-"Superblock has unknown compatible features (0x%x) enabled.\n"
-"Using a more recent kernel is recommended.",
-				(sbp->sb_features_compat &
-						XFS_SB_FEAT_COMPAT_UNKNOWN));
-		}
-
-		if (xfs_sb_has_ro_compat_feature(sbp,
-					XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
-			xfs_alert(mp,
-"Superblock has unknown read-only compatible features (0x%x) enabled.",
-				(sbp->sb_features_ro_compat &
-						XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
-			if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
-				xfs_warn(mp,
-"Attempted to mount read-only compatible filesystem read-write.\n"
-"Filesystem can only be safely mounted read only.");
-				return XFS_ERROR(EINVAL);
-			}
-		}
-		if (xfs_sb_has_incompat_feature(sbp,
-					XFS_SB_FEAT_INCOMPAT_UNKNOWN)) {
-			xfs_warn(mp,
-"Superblock has unknown incompatible features (0x%x) enabled.\n"
-"Filesystem can not be safely mounted by this kernel.",
-				(sbp->sb_features_incompat &
-						XFS_SB_FEAT_INCOMPAT_UNKNOWN));
-			return XFS_ERROR(EINVAL);
-		}
-	}
-
-	if (unlikely(
-	    sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
-		xfs_warn(mp,
-		"filesystem is marked as having an external log; "
-		"specify logdev on the mount command line.");
-		return XFS_ERROR(EINVAL);
-	}
-
-	if (unlikely(
-	    sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) {
-		xfs_warn(mp,
-		"filesystem is marked as having an internal log; "
-		"do not specify logdev on the mount command line.");
-		return XFS_ERROR(EINVAL);
-	}
-
-	/*
-	 * More sanity checking.  Most of these were stolen directly from
-	 * xfs_repair.
-	 */
-	if (unlikely(
-	    sbp->sb_agcount <= 0					||
-	    sbp->sb_sectsize < XFS_MIN_SECTORSIZE			||
-	    sbp->sb_sectsize > XFS_MAX_SECTORSIZE			||
-	    sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG			||
-	    sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG			||
-	    sbp->sb_sectsize != (1 << sbp->sb_sectlog)			||
-	    sbp->sb_blocksize < XFS_MIN_BLOCKSIZE			||
-	    sbp->sb_blocksize > XFS_MAX_BLOCKSIZE			||
-	    sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG			||
-	    sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG			||
-	    sbp->sb_blocksize != (1 << sbp->sb_blocklog)		||
-	    sbp->sb_inodesize < XFS_DINODE_MIN_SIZE			||
-	    sbp->sb_inodesize > XFS_DINODE_MAX_SIZE			||
-	    sbp->sb_inodelog < XFS_DINODE_MIN_LOG			||
-	    sbp->sb_inodelog > XFS_DINODE_MAX_LOG			||
-	    sbp->sb_inodesize != (1 << sbp->sb_inodelog)		||
-	    (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog)	||
-	    (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE)	||
-	    (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE)	||
-	    (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */)	||
-	    sbp->sb_dblocks == 0					||
-	    sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp)			||
-	    sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) {
-		XFS_CORRUPTION_ERROR("SB sanity check failed",
-				XFS_ERRLEVEL_LOW, mp, sbp);
-		return XFS_ERROR(EFSCORRUPTED);
-	}
-
-	/*
-	 * Until this is fixed only page-sized or smaller data blocks work.
-	 */
-	if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
-		xfs_warn(mp,
-		"File system with blocksize %d bytes. "
-		"Only pagesize (%ld) or less will currently work.",
-				sbp->sb_blocksize, PAGE_SIZE);
-		return XFS_ERROR(ENOSYS);
-	}
-
-	/*
-	 * Currently only very few inode sizes are supported.
-	 */
-	switch (sbp->sb_inodesize) {
-	case 256:
-	case 512:
-	case 1024:
-	case 2048:
-		break;
-	default:
-		xfs_warn(mp, "inode size of %d bytes not supported",
-				sbp->sb_inodesize);
-		return XFS_ERROR(ENOSYS);
-	}
-
-	if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
-	    xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
-		xfs_warn(mp,
-		"file system too large to be mounted on this system.");
-		return XFS_ERROR(EFBIG);
-	}
-
-	if (check_inprogress && sbp->sb_inprogress) {
-		xfs_warn(mp, "Offline file system operation in progress!");
-		return XFS_ERROR(EFSCORRUPTED);
-	}
-
-	/*
-	 * Version 1 directory format has never worked on Linux.
-	 */
-	if (unlikely(!xfs_sb_version_hasdirv2(sbp))) {
-		xfs_warn(mp, "file system using version 1 directory format");
-		return XFS_ERROR(ENOSYS);
-	}
-
-	return 0;
-}
-
 int
 xfs_initialize_perag(
 	xfs_mount_t	*mp,
@@ -569,283 +271,15 @@
 	return error;
 }
 
-static void
-xfs_sb_quota_from_disk(struct xfs_sb *sbp)
-{
-	if (sbp->sb_qflags & XFS_OQUOTA_ENFD)
-		sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ?
-					XFS_PQUOTA_ENFD : XFS_GQUOTA_ENFD;
-	if (sbp->sb_qflags & XFS_OQUOTA_CHKD)
-		sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ?
-					XFS_PQUOTA_CHKD : XFS_GQUOTA_CHKD;
-	sbp->sb_qflags &= ~(XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD);
-}
-
-void
-xfs_sb_from_disk(
-	struct xfs_sb	*to,
-	xfs_dsb_t	*from)
-{
-	to->sb_magicnum = be32_to_cpu(from->sb_magicnum);
-	to->sb_blocksize = be32_to_cpu(from->sb_blocksize);
-	to->sb_dblocks = be64_to_cpu(from->sb_dblocks);
-	to->sb_rblocks = be64_to_cpu(from->sb_rblocks);
-	to->sb_rextents = be64_to_cpu(from->sb_rextents);
-	memcpy(&to->sb_uuid, &from->sb_uuid, sizeof(to->sb_uuid));
-	to->sb_logstart = be64_to_cpu(from->sb_logstart);
-	to->sb_rootino = be64_to_cpu(from->sb_rootino);
-	to->sb_rbmino = be64_to_cpu(from->sb_rbmino);
-	to->sb_rsumino = be64_to_cpu(from->sb_rsumino);
-	to->sb_rextsize = be32_to_cpu(from->sb_rextsize);
-	to->sb_agblocks = be32_to_cpu(from->sb_agblocks);
-	to->sb_agcount = be32_to_cpu(from->sb_agcount);
-	to->sb_rbmblocks = be32_to_cpu(from->sb_rbmblocks);
-	to->sb_logblocks = be32_to_cpu(from->sb_logblocks);
-	to->sb_versionnum = be16_to_cpu(from->sb_versionnum);
-	to->sb_sectsize = be16_to_cpu(from->sb_sectsize);
-	to->sb_inodesize = be16_to_cpu(from->sb_inodesize);
-	to->sb_inopblock = be16_to_cpu(from->sb_inopblock);
-	memcpy(&to->sb_fname, &from->sb_fname, sizeof(to->sb_fname));
-	to->sb_blocklog = from->sb_blocklog;
-	to->sb_sectlog = from->sb_sectlog;
-	to->sb_inodelog = from->sb_inodelog;
-	to->sb_inopblog = from->sb_inopblog;
-	to->sb_agblklog = from->sb_agblklog;
-	to->sb_rextslog = from->sb_rextslog;
-	to->sb_inprogress = from->sb_inprogress;
-	to->sb_imax_pct = from->sb_imax_pct;
-	to->sb_icount = be64_to_cpu(from->sb_icount);
-	to->sb_ifree = be64_to_cpu(from->sb_ifree);
-	to->sb_fdblocks = be64_to_cpu(from->sb_fdblocks);
-	to->sb_frextents = be64_to_cpu(from->sb_frextents);
-	to->sb_uquotino = be64_to_cpu(from->sb_uquotino);
-	to->sb_gquotino = be64_to_cpu(from->sb_gquotino);
-	to->sb_qflags = be16_to_cpu(from->sb_qflags);
-	to->sb_flags = from->sb_flags;
-	to->sb_shared_vn = from->sb_shared_vn;
-	to->sb_inoalignmt = be32_to_cpu(from->sb_inoalignmt);
-	to->sb_unit = be32_to_cpu(from->sb_unit);
-	to->sb_width = be32_to_cpu(from->sb_width);
-	to->sb_dirblklog = from->sb_dirblklog;
-	to->sb_logsectlog = from->sb_logsectlog;
-	to->sb_logsectsize = be16_to_cpu(from->sb_logsectsize);
-	to->sb_logsunit = be32_to_cpu(from->sb_logsunit);
-	to->sb_features2 = be32_to_cpu(from->sb_features2);
-	to->sb_bad_features2 = be32_to_cpu(from->sb_bad_features2);
-	to->sb_features_compat = be32_to_cpu(from->sb_features_compat);
-	to->sb_features_ro_compat = be32_to_cpu(from->sb_features_ro_compat);
-	to->sb_features_incompat = be32_to_cpu(from->sb_features_incompat);
-	to->sb_features_log_incompat =
-				be32_to_cpu(from->sb_features_log_incompat);
-	to->sb_pad = 0;
-	to->sb_pquotino = be64_to_cpu(from->sb_pquotino);
-	to->sb_lsn = be64_to_cpu(from->sb_lsn);
-}
-
-static inline void
-xfs_sb_quota_to_disk(
-	xfs_dsb_t	*to,
-	xfs_sb_t	*from,
-	__int64_t	*fields)
-{
-	__uint16_t	qflags = from->sb_qflags;
-
-	if (*fields & XFS_SB_QFLAGS) {
-		/*
-		 * The in-core version of sb_qflags do not have
-		 * XFS_OQUOTA_* flags, whereas the on-disk version
-		 * does.  So, convert incore XFS_{PG}QUOTA_* flags
-		 * to on-disk XFS_OQUOTA_* flags.
-		 */
-		qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD |
-				XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD);
-
-		if (from->sb_qflags &
-				(XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD))
-			qflags |= XFS_OQUOTA_ENFD;
-		if (from->sb_qflags &
-				(XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD))
-			qflags |= XFS_OQUOTA_CHKD;
-		to->sb_qflags = cpu_to_be16(qflags);
-		*fields &= ~XFS_SB_QFLAGS;
-	}
-}
-
-/*
- * Copy in core superblock to ondisk one.
- *
- * The fields argument is mask of superblock fields to copy.
- */
-void
-xfs_sb_to_disk(
-	xfs_dsb_t	*to,
-	xfs_sb_t	*from,
-	__int64_t	fields)
-{
-	xfs_caddr_t	to_ptr = (xfs_caddr_t)to;
-	xfs_caddr_t	from_ptr = (xfs_caddr_t)from;
-	xfs_sb_field_t	f;
-	int		first;
-	int		size;
-
-	ASSERT(fields);
-	if (!fields)
-		return;
-
-	xfs_sb_quota_to_disk(to, from, &fields);
-	while (fields) {
-		f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
-		first = xfs_sb_info[f].offset;
-		size = xfs_sb_info[f + 1].offset - first;
-
-		ASSERT(xfs_sb_info[f].type == 0 || xfs_sb_info[f].type == 1);
-
-		if (size == 1 || xfs_sb_info[f].type == 1) {
-			memcpy(to_ptr + first, from_ptr + first, size);
-		} else {
-			switch (size) {
-			case 2:
-				*(__be16 *)(to_ptr + first) =
-					cpu_to_be16(*(__u16 *)(from_ptr + first));
-				break;
-			case 4:
-				*(__be32 *)(to_ptr + first) =
-					cpu_to_be32(*(__u32 *)(from_ptr + first));
-				break;
-			case 8:
-				*(__be64 *)(to_ptr + first) =
-					cpu_to_be64(*(__u64 *)(from_ptr + first));
-				break;
-			default:
-				ASSERT(0);
-			}
-		}
-
-		fields &= ~(1LL << f);
-	}
-}
-
-static int
-xfs_sb_verify(
-	struct xfs_buf	*bp,
-	bool		check_version)
-{
-	struct xfs_mount *mp = bp->b_target->bt_mount;
-	struct xfs_sb	sb;
-
-	xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp));
-
-	/*
-	 * Only check the in progress field for the primary superblock as
-	 * mkfs.xfs doesn't clear it from secondary superblocks.
-	 */
-	return xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR,
-				     check_version);
-}
-
-/*
- * If the superblock has the CRC feature bit set or the CRC field is non-null,
- * check that the CRC is valid.  We check the CRC field is non-null because a
- * single bit error could clear the feature bit and unused parts of the
- * superblock are supposed to be zero. Hence a non-null crc field indicates that
- * we've potentially lost a feature bit and we should check it anyway.
- */
-static void
-xfs_sb_read_verify(
-	struct xfs_buf	*bp)
-{
-	struct xfs_mount *mp = bp->b_target->bt_mount;
-	struct xfs_dsb	*dsb = XFS_BUF_TO_SBP(bp);
-	int		error;
-
-	/*
-	 * open code the version check to avoid needing to convert the entire
-	 * superblock from disk order just to check the version number
-	 */
-	if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC) &&
-	    (((be16_to_cpu(dsb->sb_versionnum) & XFS_SB_VERSION_NUMBITS) ==
-						XFS_SB_VERSION_5) ||
-	     dsb->sb_crc != 0)) {
-
-		if (!xfs_verify_cksum(bp->b_addr, be16_to_cpu(dsb->sb_sectsize),
-				      offsetof(struct xfs_sb, sb_crc))) {
-			error = EFSCORRUPTED;
-			goto out_error;
-		}
-	}
-	error = xfs_sb_verify(bp, true);
-
-out_error:
-	if (error) {
-		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
-		xfs_buf_ioerror(bp, error);
-	}
-}
-
-/*
- * We may be probed for a filesystem match, so we may not want to emit
- * messages when the superblock buffer is not actually an XFS superblock.
- * If we find an XFS superblock, the run a normal, noisy mount because we are
- * really going to mount it and want to know about errors.
- */
-static void
-xfs_sb_quiet_read_verify(
-	struct xfs_buf	*bp)
-{
-	struct xfs_dsb	*dsb = XFS_BUF_TO_SBP(bp);
-
-
-	if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) {
-		/* XFS filesystem, verify noisily! */
-		xfs_sb_read_verify(bp);
-		return;
-	}
-	/* quietly fail */
-	xfs_buf_ioerror(bp, EWRONGFS);
-}
-
-static void
-xfs_sb_write_verify(
-	struct xfs_buf		*bp)
-{
-	struct xfs_mount	*mp = bp->b_target->bt_mount;
-	struct xfs_buf_log_item	*bip = bp->b_fspriv;
-	int			error;
-
-	error = xfs_sb_verify(bp, false);
-	if (error) {
-		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
-		xfs_buf_ioerror(bp, error);
-		return;
-	}
-
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
-		return;
-
-	if (bip)
-		XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
-
-	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
-			 offsetof(struct xfs_sb, sb_crc));
-}
-
-const struct xfs_buf_ops xfs_sb_buf_ops = {
-	.verify_read = xfs_sb_read_verify,
-	.verify_write = xfs_sb_write_verify,
-};
-
-static const struct xfs_buf_ops xfs_sb_quiet_buf_ops = {
-	.verify_read = xfs_sb_quiet_read_verify,
-	.verify_write = xfs_sb_write_verify,
-};
-
 /*
  * xfs_readsb
  *
  * Does the initial read of the superblock.
  */
 int
-xfs_readsb(xfs_mount_t *mp, int flags)
+xfs_readsb(
+	struct xfs_mount *mp,
+	int		flags)
 {
 	unsigned int	sector_size;
 	struct xfs_buf	*bp;
@@ -884,8 +318,8 @@
 	 * Initialize the mount structure from the superblock.
 	 */
 	xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp));
-
 	xfs_sb_quota_from_disk(&mp->m_sb);
+
 	/*
 	 * We must be able to do sector-sized and sector-aligned IO.
 	 */
@@ -922,107 +356,6 @@
 	return error;
 }
 
-
-/*
- * xfs_mount_common
- *
- * Mount initialization code establishing various mount
- * fields from the superblock associated with the given
- * mount structure
- */
-STATIC void
-xfs_mount_common(xfs_mount_t *mp, xfs_sb_t *sbp)
-{
-	mp->m_agfrotor = mp->m_agirotor = 0;
-	spin_lock_init(&mp->m_agirotor_lock);
-	mp->m_maxagi = mp->m_sb.sb_agcount;
-	mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG;
-	mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
-	mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
-	mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
-	mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
-	mp->m_blockmask = sbp->sb_blocksize - 1;
-	mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
-	mp->m_blockwmask = mp->m_blockwsize - 1;
-
-	mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1);
-	mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0);
-	mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2;
-	mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2;
-
-	mp->m_inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
-	mp->m_inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
-	mp->m_inobt_mnr[0] = mp->m_inobt_mxr[0] / 2;
-	mp->m_inobt_mnr[1] = mp->m_inobt_mxr[1] / 2;
-
-	mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1);
-	mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0);
-	mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2;
-	mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2;
-
-	mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
-	mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
-					sbp->sb_inopblock);
-	mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
-}
-
-/*
- * xfs_initialize_perag_data
- *
- * Read in each per-ag structure so we can count up the number of
- * allocated inodes, free inodes and used filesystem blocks as this
- * information is no longer persistent in the superblock. Once we have
- * this information, write it into the in-core superblock structure.
- */
-STATIC int
-xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
-{
-	xfs_agnumber_t	index;
-	xfs_perag_t	*pag;
-	xfs_sb_t	*sbp = &mp->m_sb;
-	uint64_t	ifree = 0;
-	uint64_t	ialloc = 0;
-	uint64_t	bfree = 0;
-	uint64_t	bfreelst = 0;
-	uint64_t	btree = 0;
-	int		error;
-
-	for (index = 0; index < agcount; index++) {
-		/*
-		 * read the agf, then the agi. This gets us
-		 * all the information we need and populates the
-		 * per-ag structures for us.
-		 */
-		error = xfs_alloc_pagf_init(mp, NULL, index, 0);
-		if (error)
-			return error;
-
-		error = xfs_ialloc_pagi_init(mp, NULL, index);
-		if (error)
-			return error;
-		pag = xfs_perag_get(mp, index);
-		ifree += pag->pagi_freecount;
-		ialloc += pag->pagi_count;
-		bfree += pag->pagf_freeblks;
-		bfreelst += pag->pagf_flcount;
-		btree += pag->pagf_btreeblks;
-		xfs_perag_put(pag);
-	}
-	/*
-	 * Overwrite incore superblock counters with just-read data
-	 */
-	spin_lock(&mp->m_sb_lock);
-	sbp->sb_ifree = ifree;
-	sbp->sb_icount = ialloc;
-	sbp->sb_fdblocks = bfree + bfreelst + btree;
-	spin_unlock(&mp->m_sb_lock);
-
-	/* Fixup the per-cpu counters as well. */
-	xfs_icsb_reinit_counters(mp);
-
-	return 0;
-}
-
 /*
  * Update alignment values based on mount options and sb values
  */
@@ -1194,7 +527,7 @@
 }
 
 /*
- * Check that the data (and log if separate) are an ok size.
+ * Check that the data (and log if separate) is an ok size.
  */
 STATIC int
 xfs_check_sizes(xfs_mount_t *mp)
@@ -1264,8 +597,7 @@
 		return 0;
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
-	error = xfs_trans_reserve(tp, 0, XFS_QM_SBCHANGE_LOG_RES(mp),
-				  0, 0, XFS_DEFAULT_LOG_COUNT);
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_sbchange, 0, 0);
 	if (error) {
 		xfs_trans_cancel(tp, 0);
 		xfs_alert(mp, "%s: Superblock update failed!", __func__);
@@ -1315,7 +647,7 @@
 	uint		quotaflags = 0;
 	int		error = 0;
 
-	xfs_mount_common(mp, sbp);
+	xfs_sb_mount_common(mp, sbp);
 
 	/*
 	 * Check for a mismatched features2 values.  Older kernels
@@ -1400,7 +732,7 @@
 	xfs_set_inoalignment(mp);
 
 	/*
-	 * Check that the data (and log if separate) are an ok size.
+	 * Check that the data (and log if separate) is an ok size.
 	 */
 	error = xfs_check_sizes(mp);
 	if (error)
@@ -1738,8 +1070,7 @@
 		return 0;
 
 	tp = _xfs_trans_alloc(mp, XFS_TRANS_SB_COUNT, KM_SLEEP);
-	error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
-				  XFS_DEFAULT_LOG_COUNT);
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0);
 	if (error) {
 		xfs_trans_cancel(tp, 0);
 		return error;
@@ -1752,49 +1083,7 @@
 }
 
 /*
- * xfs_mod_sb() can be used to copy arbitrary changes to the
- * in-core superblock into the superblock buffer to be logged.
- * It does not provide the higher level of locking that is
- * needed to protect the in-core superblock from concurrent
- * access.
- */
-void
-xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
-{
-	xfs_buf_t	*bp;
-	int		first;
-	int		last;
-	xfs_mount_t	*mp;
-	xfs_sb_field_t	f;
-
-	ASSERT(fields);
-	if (!fields)
-		return;
-	mp = tp->t_mountp;
-	bp = xfs_trans_getsb(tp, mp, 0);
-	first = sizeof(xfs_sb_t);
-	last = 0;
-
-	/* translate/copy */
-
-	xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields);
-
-	/* find modified range */
-	f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
-	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
-	last = xfs_sb_info[f + 1].offset - 1;
-
-	f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
-	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
-	first = xfs_sb_info[f].offset;
-
-	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
-	xfs_trans_log_buf(tp, bp, first, last);
-}
-
-
-/*
- * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
+ * xfs_mod_incore_sb_unlocked() is a utility routine commonly used to apply
  * a delta to a specified field in the in-core superblock.  Simply
  * switch on the field indicated and apply the delta to that field.
  * Fields are not allowed to dip below zero, so if the delta would
@@ -2101,8 +1390,7 @@
 			 XFS_SB_VERSIONNUM));
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
-	error = xfs_trans_reserve(tp, 0, XFS_SB_LOG_RES(mp), 0, 0,
-				  XFS_DEFAULT_LOG_COUNT);
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_sb, 0, 0);
 	if (error) {
 		xfs_trans_cancel(tp, 0);
 		return error;
@@ -2260,12 +1548,6 @@
 	if (mp->m_sb_cnts == NULL)
 		return -ENOMEM;
 
-#ifdef CONFIG_HOTPLUG_CPU
-	mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
-	mp->m_icsb_notifier.priority = 0;
-	register_hotcpu_notifier(&mp->m_icsb_notifier);
-#endif /* CONFIG_HOTPLUG_CPU */
-
 	for_each_online_cpu(i) {
 		cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
 		memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
@@ -2278,6 +1560,13 @@
 	 * initial balance kicks us off correctly
 	 */
 	mp->m_icsb_counters = -1;
+
+#ifdef CONFIG_HOTPLUG_CPU
+	mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
+	mp->m_icsb_notifier.priority = 0;
+	register_hotcpu_notifier(&mp->m_icsb_notifier);
+#endif /* CONFIG_HOTPLUG_CPU */
+
 	return 0;
 }
 
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 4e374d4..1fa0584 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -18,45 +18,7 @@
 #ifndef __XFS_MOUNT_H__
 #define	__XFS_MOUNT_H__
 
-typedef struct xfs_trans_reservations {
-	uint	tr_write;	/* extent alloc trans */
-	uint	tr_itruncate;	/* truncate trans */
-	uint	tr_rename;	/* rename trans */
-	uint	tr_link;	/* link trans */
-	uint	tr_remove;	/* unlink trans */
-	uint	tr_symlink;	/* symlink trans */
-	uint	tr_create;	/* create trans */
-	uint	tr_mkdir;	/* mkdir trans */
-	uint	tr_ifree;	/* inode free trans */
-	uint	tr_ichange;	/* inode update trans */
-	uint	tr_growdata;	/* fs data section grow trans */
-	uint	tr_swrite;	/* sync write inode trans */
-	uint	tr_addafork;	/* cvt inode to attributed trans */
-	uint	tr_writeid;	/* write setuid/setgid file */
-	uint	tr_attrinval;	/* attr fork buffer invalidation */
-	uint	tr_attrsetm;	/* set/create an attribute at mount time */
-	uint	tr_attrsetrt;	/* set/create an attribute at runtime */
-	uint	tr_attrrm;	/* remove an attribute */
-	uint	tr_clearagi;	/* clear bad agi unlinked ino bucket */
-	uint	tr_growrtalloc;	/* grow realtime allocations */
-	uint	tr_growrtzero;	/* grow realtime zeroing */
-	uint	tr_growrtfree;	/* grow realtime freeing */
-	uint	tr_qm_sbchange;	/* change quota flags */
-	uint	tr_qm_setqlim;	/* adjust quota limits */
-	uint	tr_qm_dqalloc;	/* allocate quota on disk */
-	uint	tr_qm_quotaoff;	/* turn quota off */
-	uint	tr_qm_equotaoff;/* end of turn quota off */
-	uint	tr_sb;		/* modify superblock */
-} xfs_trans_reservations_t;
-
-#ifndef __KERNEL__
-
-#define xfs_daddr_to_agno(mp,d) \
-	((xfs_agnumber_t)(XFS_BB_TO_FSBT(mp, d) / (mp)->m_sb.sb_agblocks))
-#define xfs_daddr_to_agbno(mp,d) \
-	((xfs_agblock_t)(XFS_BB_TO_FSBT(mp, d) % (mp)->m_sb.sb_agblocks))
-
-#else /* __KERNEL__ */
+#ifdef __KERNEL__
 
 struct xlog;
 struct xfs_inode;
@@ -174,7 +136,7 @@
 	int			m_ialloc_blks;	/* blocks in inode allocation */
 	int			m_inoalign_mask;/* mask sb_inoalignmt if used */
 	uint			m_qflags;	/* quota status flags */
-	xfs_trans_reservations_t m_reservations;/* precomputed res values */
+	struct xfs_trans_resv	m_resv;		/* precomputed res values */
 	__uint64_t		m_maxicount;	/* maximum inode count */
 	__uint64_t		m_resblks;	/* total reserved blocks */
 	__uint64_t		m_resblks_avail;/* available reserved blocks */
@@ -330,14 +292,6 @@
 }
 
 /*
- * perag get/put wrappers for ref counting
- */
-struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
-struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno,
-					int tag);
-void	xfs_perag_put(struct xfs_perag *pag);
-
-/*
  * Per-cpu superblock locking functions
  */
 #ifdef HAVE_PERCPU_SB
@@ -366,9 +320,63 @@
 	int64_t		msb_delta;	/* Change to make to specified field */
 } xfs_mod_sb_t;
 
+/*
+ * Per-ag incore structure, copies of information in agf and agi, to improve the
+ * performance of allocation group selection. This is defined for the kernel
+ * only, and hence is defined here instead of in xfs_ag.h. You need the struct
+ * xfs_mount to be defined to look up a xfs_perag anyway (via mp->m_perag_tree),
+ * so this doesn't introduce any strange header file dependencies.
+ */
+typedef struct xfs_perag {
+	struct xfs_mount *pag_mount;	/* owner filesystem */
+	xfs_agnumber_t	pag_agno;	/* AG this structure belongs to */
+	atomic_t	pag_ref;	/* perag reference count */
+	char		pagf_init;	/* this agf's entry is initialized */
+	char		pagi_init;	/* this agi's entry is initialized */
+	char		pagf_metadata;	/* the agf is preferred to be metadata */
+	char		pagi_inodeok;	/* The agi is ok for inodes */
+	__uint8_t	pagf_levels[XFS_BTNUM_AGF];
+					/* # of levels in bno & cnt btree */
+	__uint32_t	pagf_flcount;	/* count of blocks in freelist */
+	xfs_extlen_t	pagf_freeblks;	/* total free blocks */
+	xfs_extlen_t	pagf_longest;	/* longest free space */
+	__uint32_t	pagf_btreeblks;	/* # of blocks held in AGF btrees */
+	xfs_agino_t	pagi_freecount;	/* number of free inodes */
+	xfs_agino_t	pagi_count;	/* number of allocated inodes */
+
+	/*
+	 * Inode allocation search lookup optimisation.
+	 * If the pagino matches, the search for new inodes
+	 * doesn't need to search the near ones again straight away
+	 */
+	xfs_agino_t	pagl_pagino;
+	xfs_agino_t	pagl_leftrec;
+	xfs_agino_t	pagl_rightrec;
+	spinlock_t	pagb_lock;	/* lock for pagb_tree */
+	struct rb_root	pagb_tree;	/* ordered tree of busy extents */
+
+	atomic_t        pagf_fstrms;    /* # of filestreams active in this AG */
+
+	spinlock_t	pag_ici_lock;	/* incore inode cache lock */
+	struct radix_tree_root pag_ici_root;	/* incore inode cache root */
+	int		pag_ici_reclaimable;	/* reclaimable inodes */
+	struct mutex	pag_ici_reclaim_lock;	/* serialisation point */
+	unsigned long	pag_ici_reclaim_cursor;	/* reclaim restart point */
+
+	/* buffer cache index */
+	spinlock_t	pag_buf_lock;	/* lock for pag_buf_tree */
+	struct rb_root	pag_buf_tree;	/* ordered tree of active buffers */
+
+	/* for rcu-safe freeing */
+	struct rcu_head	rcu_head;
+	int		pagb_count;	/* pagb slots in use */
+} xfs_perag_t;
+
 extern int	xfs_log_sbcount(xfs_mount_t *);
 extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
 extern int	xfs_mountfs(xfs_mount_t *mp);
+extern int	xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount,
+				     xfs_agnumber_t *maxagi);
 
 extern void	xfs_unmountfs(xfs_mount_t *);
 extern int	xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
@@ -387,13 +395,4 @@
 
 #endif	/* __KERNEL__ */
 
-extern void	xfs_sb_calc_crc(struct xfs_buf	*);
-extern void	xfs_mod_sb(struct xfs_trans *, __int64_t);
-extern int	xfs_initialize_perag(struct xfs_mount *, xfs_agnumber_t,
-					xfs_agnumber_t *);
-extern void	xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *);
-extern void	xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
-
-extern const struct xfs_buf_ops xfs_sb_buf_ops;
-
 #endif	/* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index d320794..6218a0a 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -17,6 +17,7 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_format.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
 #include "xfs_trans.h"
@@ -37,7 +38,6 @@
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
 #include "xfs_trans_space.h"
-#include "xfs_utils.h"
 #include "xfs_qm.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
@@ -834,21 +834,52 @@
 	int		error;
 	int		committed;
 
+	*ip = NULL;
+	/*
+	 * With superblock that doesn't have separate pquotino, we
+	 * share an inode between gquota and pquota. If the on-disk
+	 * superblock has GQUOTA and the filesystem is now mounted
+	 * with PQUOTA, just use sb_gquotino for sb_pquotino and
+	 * vice-versa.
+	 */
+	if (!xfs_sb_version_has_pquotino(&mp->m_sb) &&
+			(flags & (XFS_QMOPT_PQUOTA|XFS_QMOPT_GQUOTA))) {
+		xfs_ino_t ino = NULLFSINO;
+
+		if ((flags & XFS_QMOPT_PQUOTA) &&
+			     (mp->m_sb.sb_gquotino != NULLFSINO)) {
+			ino = mp->m_sb.sb_gquotino;
+			ASSERT(mp->m_sb.sb_pquotino == NULLFSINO);
+		} else if ((flags & XFS_QMOPT_GQUOTA) &&
+			     (mp->m_sb.sb_pquotino != NULLFSINO)) {
+			ino = mp->m_sb.sb_pquotino;
+			ASSERT(mp->m_sb.sb_gquotino == NULLFSINO);
+		}
+		if (ino != NULLFSINO) {
+			error = xfs_iget(mp, NULL, ino, 0, 0, ip);
+			if (error)
+				return error;
+			mp->m_sb.sb_gquotino = NULLFSINO;
+			mp->m_sb.sb_pquotino = NULLFSINO;
+		}
+	}
+
 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
-	if ((error = xfs_trans_reserve(tp,
-				      XFS_QM_QINOCREATE_SPACE_RES(mp),
-				      XFS_CREATE_LOG_RES(mp), 0,
-				      XFS_TRANS_PERM_LOG_RES,
-				      XFS_CREATE_LOG_COUNT))) {
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_create,
+				  XFS_QM_QINOCREATE_SPACE_RES(mp), 0);
+	if (error) {
 		xfs_trans_cancel(tp, 0);
 		return error;
 	}
 
-	error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed);
-	if (error) {
-		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
-				 XFS_TRANS_ABORT);
-		return error;
+	if (!*ip) {
+		error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip,
+								&committed);
+		if (error) {
+			xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
+					 XFS_TRANS_ABORT);
+			return error;
+		}
 	}
 
 	/*
@@ -860,21 +891,25 @@
 	if (flags & XFS_QMOPT_SBVERSION) {
 		ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
 		ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
-				   XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
-		       (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
-			XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
+			XFS_SB_GQUOTINO | XFS_SB_PQUOTINO | XFS_SB_QFLAGS)) ==
+				(XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
+				 XFS_SB_GQUOTINO | XFS_SB_PQUOTINO |
+				 XFS_SB_QFLAGS));
 
 		xfs_sb_version_addquota(&mp->m_sb);
 		mp->m_sb.sb_uquotino = NULLFSINO;
 		mp->m_sb.sb_gquotino = NULLFSINO;
+		mp->m_sb.sb_pquotino = NULLFSINO;
 
-		/* qflags will get updated _after_ quotacheck */
-		mp->m_sb.sb_qflags = 0;
+		/* qflags will get updated fully _after_ quotacheck */
+		mp->m_sb.sb_qflags = mp->m_qflags & XFS_ALL_QUOTA_ACCT;
 	}
 	if (flags & XFS_QMOPT_UQUOTA)
 		mp->m_sb.sb_uquotino = (*ip)->i_ino;
-	else
+	else if (flags & XFS_QMOPT_GQUOTA)
 		mp->m_sb.sb_gquotino = (*ip)->i_ino;
+	else
+		mp->m_sb.sb_pquotino = (*ip)->i_ino;
 	spin_unlock(&mp->m_sb_lock);
 	xfs_mod_sb(tp, sbfields);
 
@@ -1484,11 +1519,10 @@
 			if (error)
 				goto error_rele;
 		}
-		/* XXX: Use gquotino for now */
 		if (XFS_IS_PQUOTA_ON(mp) &&
-		    mp->m_sb.sb_gquotino != NULLFSINO) {
-			ASSERT(mp->m_sb.sb_gquotino > 0);
-			error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
+		    mp->m_sb.sb_pquotino != NULLFSINO) {
+			ASSERT(mp->m_sb.sb_pquotino > 0);
+			error = xfs_iget(mp, NULL, mp->m_sb.sb_pquotino,
 					     0, 0, &pip);
 			if (error)
 				goto error_rele;
@@ -1496,7 +1530,8 @@
 	} else {
 		flags |= XFS_QMOPT_SBVERSION;
 		sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
-			    XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
+			    XFS_SB_GQUOTINO | XFS_SB_PQUOTINO |
+			    XFS_SB_QFLAGS);
 	}
 
 	/*
@@ -1524,9 +1559,8 @@
 		flags &= ~XFS_QMOPT_SBVERSION;
 	}
 	if (XFS_IS_PQUOTA_ON(mp) && pip == NULL) {
-		/* XXX: Use XFS_SB_GQUOTINO for now */
 		error = xfs_qm_qino_alloc(mp, &pip,
-					  sbflags | XFS_SB_GQUOTINO,
+					  sbflags | XFS_SB_PQUOTINO,
 					  flags | XFS_QMOPT_PQUOTA);
 		if (error)
 			goto error_rele;
@@ -1704,8 +1738,7 @@
 	int		error;
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
-	error = xfs_trans_reserve(tp, 0, XFS_QM_SBCHANGE_LOG_RES(mp),
-				  0, 0, XFS_DEFAULT_LOG_COUNT);
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_sbchange, 0, 0);
 	if (error) {
 		xfs_trans_cancel(tp, 0);
 		return error;
@@ -1734,8 +1767,8 @@
 int
 xfs_qm_vop_dqalloc(
 	struct xfs_inode	*ip,
-	uid_t			uid,
-	gid_t			gid,
+	xfs_dqid_t		uid,
+	xfs_dqid_t		gid,
 	prid_t			prid,
 	uint			flags,
 	struct xfs_dquot	**O_udqpp,
@@ -1782,7 +1815,7 @@
 			 * holding ilock.
 			 */
 			xfs_iunlock(ip, lockflags);
-			error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
+			error = xfs_qm_dqget(mp, NULL, uid,
 						 XFS_DQ_USER,
 						 XFS_QMOPT_DQALLOC |
 						 XFS_QMOPT_DOWARN,
@@ -1809,7 +1842,7 @@
 	if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
 		if (ip->i_d.di_gid != gid) {
 			xfs_iunlock(ip, lockflags);
-			error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
+			error = xfs_qm_dqget(mp, NULL, gid,
 						 XFS_DQ_GROUP,
 						 XFS_QMOPT_DQALLOC |
 						 XFS_QMOPT_DOWARN,
@@ -1943,7 +1976,7 @@
 			XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
 
 	if (XFS_IS_UQUOTA_ON(mp) && udqp &&
-	    ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) {
+	    ip->i_d.di_uid != be32_to_cpu(udqp->q_core.d_id)) {
 		udq_delblks = udqp;
 		/*
 		 * If there are delayed allocation blocks, then we have to
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 579d6a0..670cd44 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -160,6 +160,8 @@
 					struct fs_disk_quota *);
 extern int		xfs_qm_scall_getqstat(struct xfs_mount *,
 					struct fs_quota_stat *);
+extern int		xfs_qm_scall_getqstatv(struct xfs_mount *,
+					struct fs_quota_statv *);
 extern int		xfs_qm_scall_quotaon(struct xfs_mount *, uint);
 extern int		xfs_qm_scall_quotaoff(struct xfs_mount *, uint);
 
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index 437a52d..3af50cc 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -17,6 +17,7 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_format.h"
 #include "xfs_log.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index e4f8b2d..8174aad 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -20,6 +20,7 @@
 
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_format.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
 #include "xfs_trans.h"
@@ -37,7 +38,6 @@
 #include "xfs_error.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
-#include "xfs_utils.h"
 #include "xfs_qm.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
@@ -247,9 +247,7 @@
 	xfs_ilock(ip, XFS_IOLOCK_EXCL);
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
-	error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
-				  XFS_TRANS_PERM_LOG_RES,
-				  XFS_ITRUNCATE_LOG_COUNT);
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
 	if (error) {
 		xfs_trans_cancel(tp, 0);
 		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
@@ -296,8 +294,10 @@
 
 	if (flags & XFS_DQ_USER)
 		error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino);
-	if (flags & (XFS_DQ_GROUP|XFS_DQ_PROJ))
+	if (flags & XFS_DQ_GROUP)
 		error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino);
+	if (flags & XFS_DQ_PROJ)
+		error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_pquotino);
 
 	return error ? error : error2;
 }
@@ -404,6 +404,7 @@
 
 /*
  * Return quota status information, such as uquota-off, enforcements, etc.
+ * for Q_XGETQSTAT command.
  */
 int
 xfs_qm_scall_getqstat(
@@ -413,8 +414,10 @@
 	struct xfs_quotainfo	*q = mp->m_quotainfo;
 	struct xfs_inode	*uip = NULL;
 	struct xfs_inode	*gip = NULL;
+	struct xfs_inode	*pip = NULL;
 	bool                    tempuqip = false;
 	bool                    tempgqip = false;
+	bool                    temppqip = false;
 
 	memset(out, 0, sizeof(fs_quota_stat_t));
 
@@ -424,16 +427,14 @@
 		out->qs_gquota.qfs_ino = NULLFSINO;
 		return (0);
 	}
+
 	out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
 							(XFS_ALL_QUOTA_ACCT|
 							 XFS_ALL_QUOTA_ENFD));
-	out->qs_pad = 0;
-	out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
-	out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
-
 	if (q) {
 		uip = q->qi_uquotaip;
 		gip = q->qi_gquotaip;
+		pip = q->qi_pquotaip;
 	}
 	if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
 		if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
@@ -445,18 +446,122 @@
 					0, 0, &gip) == 0)
 			tempgqip = true;
 	}
+	/*
+	 * Q_XGETQSTAT doesn't have room for both group and project quotas.
+	 * So, allow the project quota values to be copied out only if
+	 * there is no group quota information available.
+	 */
+	if (!gip) {
+		if (!pip && mp->m_sb.sb_pquotino != NULLFSINO) {
+			if (xfs_iget(mp, NULL, mp->m_sb.sb_pquotino,
+						0, 0, &pip) == 0)
+				temppqip = true;
+		}
+	} else
+		pip = NULL;
+	if (uip) {
+		out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
+		out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
+		out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
+		if (tempuqip)
+			IRELE(uip);
+	}
+
+	if (gip) {
+		out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
+		out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
+		out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
+		if (tempgqip)
+			IRELE(gip);
+	}
+	if (pip) {
+		out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
+		out->qs_gquota.qfs_nblks = pip->i_d.di_nblocks;
+		out->qs_gquota.qfs_nextents = pip->i_d.di_nextents;
+		if (temppqip)
+			IRELE(pip);
+	}
+	if (q) {
+		out->qs_incoredqs = q->qi_dquots;
+		out->qs_btimelimit = q->qi_btimelimit;
+		out->qs_itimelimit = q->qi_itimelimit;
+		out->qs_rtbtimelimit = q->qi_rtbtimelimit;
+		out->qs_bwarnlimit = q->qi_bwarnlimit;
+		out->qs_iwarnlimit = q->qi_iwarnlimit;
+	}
+	return 0;
+}
+
+/*
+ * Return quota status information, such as uquota-off, enforcements, etc.
+ * for Q_XGETQSTATV command, to support separate project quota field.
+ */
+int
+xfs_qm_scall_getqstatv(
+	struct xfs_mount	*mp,
+	struct fs_quota_statv	*out)
+{
+	struct xfs_quotainfo	*q = mp->m_quotainfo;
+	struct xfs_inode	*uip = NULL;
+	struct xfs_inode	*gip = NULL;
+	struct xfs_inode	*pip = NULL;
+	bool                    tempuqip = false;
+	bool                    tempgqip = false;
+	bool                    temppqip = false;
+
+	if (!xfs_sb_version_hasquota(&mp->m_sb)) {
+		out->qs_uquota.qfs_ino = NULLFSINO;
+		out->qs_gquota.qfs_ino = NULLFSINO;
+		out->qs_pquota.qfs_ino = NULLFSINO;
+		return (0);
+	}
+
+	out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
+							(XFS_ALL_QUOTA_ACCT|
+							 XFS_ALL_QUOTA_ENFD));
+	out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
+	out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
+	out->qs_pquota.qfs_ino = mp->m_sb.sb_pquotino;
+
+	if (q) {
+		uip = q->qi_uquotaip;
+		gip = q->qi_gquotaip;
+		pip = q->qi_pquotaip;
+	}
+	if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
+		if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
+					0, 0, &uip) == 0)
+			tempuqip = true;
+	}
+	if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
+		if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
+					0, 0, &gip) == 0)
+			tempgqip = true;
+	}
+	if (!pip && mp->m_sb.sb_pquotino != NULLFSINO) {
+		if (xfs_iget(mp, NULL, mp->m_sb.sb_pquotino,
+					0, 0, &pip) == 0)
+			temppqip = true;
+	}
 	if (uip) {
 		out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
 		out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
 		if (tempuqip)
 			IRELE(uip);
 	}
+
 	if (gip) {
 		out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
 		out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
 		if (tempgqip)
 			IRELE(gip);
 	}
+	if (pip) {
+		out->qs_pquota.qfs_nblks = pip->i_d.di_nblocks;
+		out->qs_pquota.qfs_nextents = pip->i_d.di_nextents;
+		if (temppqip)
+			IRELE(pip);
+	}
 	if (q) {
 		out->qs_incoredqs = q->qi_dquots;
 		out->qs_btimelimit = q->qi_btimelimit;
@@ -515,8 +620,7 @@
 	xfs_dqunlock(dqp);
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
-	error = xfs_trans_reserve(tp, 0, XFS_QM_SETQLIM_LOG_RES(mp),
-				  0, 0, XFS_DEFAULT_LOG_COUNT);
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_setqlim, 0, 0);
 	if (error) {
 		xfs_trans_cancel(tp, 0);
 		goto out_rele;
@@ -650,8 +754,7 @@
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
 
-	error = xfs_trans_reserve(tp, 0, XFS_QM_QUOTAOFF_END_LOG_RES(mp),
-				  0, 0, XFS_DEFAULT_LOG_COUNT);
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_equotaoff, 0, 0);
 	if (error) {
 		xfs_trans_cancel(tp, 0);
 		return (error);
@@ -684,8 +787,7 @@
 	uint			oldsbqflag=0;
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
-	error = xfs_trans_reserve(tp, 0, XFS_QM_QUOTAOFF_LOG_RES(mp),
-				  0, 0, XFS_DEFAULT_LOG_COUNT);
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_quotaoff, 0, 0);
 	if (error)
 		goto error0;
 
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index b14f42c..e7d84d2 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -18,268 +18,15 @@
 #ifndef __XFS_QUOTA_H__
 #define __XFS_QUOTA_H__
 
+#include "xfs_quota_defs.h"
+
+/*
+ * Kernel only quota definitions and functions
+ */
+
 struct xfs_trans;
 
 /*
- * The ondisk form of a dquot structure.
- */
-#define XFS_DQUOT_MAGIC		0x4451		/* 'DQ' */
-#define XFS_DQUOT_VERSION	(u_int8_t)0x01	/* latest version number */
-
-/*
- * uid_t and gid_t are hard-coded to 32 bits in the inode.
- * Hence, an 'id' in a dquot is 32 bits..
- */
-typedef __uint32_t	xfs_dqid_t;
-
-/*
- * Even though users may not have quota limits occupying all 64-bits,
- * they may need 64-bit accounting. Hence, 64-bit quota-counters,
- * and quota-limits. This is a waste in the common case, but hey ...
- */
-typedef __uint64_t	xfs_qcnt_t;
-typedef __uint16_t	xfs_qwarncnt_t;
-
-/*
- * This is the main portion of the on-disk representation of quota
- * information for a user. This is the q_core of the xfs_dquot_t that
- * is kept in kernel memory. We pad this with some more expansion room
- * to construct the on disk structure.
- */
-typedef struct	xfs_disk_dquot {
-	__be16		d_magic;	/* dquot magic = XFS_DQUOT_MAGIC */
-	__u8		d_version;	/* dquot version */
-	__u8		d_flags;	/* XFS_DQ_USER/PROJ/GROUP */
-	__be32		d_id;		/* user,project,group id */
-	__be64		d_blk_hardlimit;/* absolute limit on disk blks */
-	__be64		d_blk_softlimit;/* preferred limit on disk blks */
-	__be64		d_ino_hardlimit;/* maximum # allocated inodes */
-	__be64		d_ino_softlimit;/* preferred inode limit */
-	__be64		d_bcount;	/* disk blocks owned by the user */
-	__be64		d_icount;	/* inodes owned by the user */
-	__be32		d_itimer;	/* zero if within inode limits if not,
-					   this is when we refuse service */
-	__be32		d_btimer;	/* similar to above; for disk blocks */
-	__be16		d_iwarns;	/* warnings issued wrt num inodes */
-	__be16		d_bwarns;	/* warnings issued wrt disk blocks */
-	__be32		d_pad0;		/* 64 bit align */
-	__be64		d_rtb_hardlimit;/* absolute limit on realtime blks */
-	__be64		d_rtb_softlimit;/* preferred limit on RT disk blks */
-	__be64		d_rtbcount;	/* realtime blocks owned */
-	__be32		d_rtbtimer;	/* similar to above; for RT disk blocks */
-	__be16		d_rtbwarns;	/* warnings issued wrt RT disk blocks */
-	__be16		d_pad;
-} xfs_disk_dquot_t;
-
-/*
- * This is what goes on disk. This is separated from the xfs_disk_dquot because
- * carrying the unnecessary padding would be a waste of memory.
- */
-typedef struct xfs_dqblk {
-	xfs_disk_dquot_t  dd_diskdq;	/* portion that lives incore as well */
-	char		  dd_fill[4];	/* filling for posterity */
-
-	/*
-	 * These two are only present on filesystems with the CRC bits set.
-	 */
-	__be32		  dd_crc;	/* checksum */
-	__be64		  dd_lsn;	/* last modification in log */
-	uuid_t		  dd_uuid;	/* location information */
-} xfs_dqblk_t;
-
-#define XFS_DQUOT_CRC_OFF	offsetof(struct xfs_dqblk, dd_crc)
-
-/*
- * flags for q_flags field in the dquot.
- */
-#define XFS_DQ_USER		0x0001		/* a user quota */
-#define XFS_DQ_PROJ		0x0002		/* project quota */
-#define XFS_DQ_GROUP		0x0004		/* a group quota */
-#define XFS_DQ_DIRTY		0x0008		/* dquot is dirty */
-#define XFS_DQ_FREEING		0x0010		/* dquot is beeing torn down */
-
-#define XFS_DQ_ALLTYPES		(XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
-
-#define XFS_DQ_FLAGS \
-	{ XFS_DQ_USER,		"USER" }, \
-	{ XFS_DQ_PROJ,		"PROJ" }, \
-	{ XFS_DQ_GROUP,		"GROUP" }, \
-	{ XFS_DQ_DIRTY,		"DIRTY" }, \
-	{ XFS_DQ_FREEING,	"FREEING" }
-
-/*
- * We have the possibility of all three quota types being active at once, and
- * hence free space modification requires modification of all three current
- * dquots in a single transaction. For this case we need to have a reservation
- * of at least 3 dquots.
- *
- * However, a chmod operation can change both UID and GID in a single
- * transaction, resulting in requiring {old, new} x {uid, gid} dquots to be
- * modified. Hence for this case we need to reserve space for at least 4 dquots.
- *
- * And in the worst case, there's a rename operation that can be modifying up to
- * 4 inodes with dquots attached to them. In reality, the only inodes that can
- * have their dquots modified are the source and destination directory inodes
- * due to directory name creation and removal. That can require space allocation
- * and/or freeing on both directory inodes, and hence all three dquots on each
- * inode can be modified. And if the directories are world writeable, all the
- * dquots can be unique and so 6 dquots can be modified....
- *
- * And, of course, we also need to take into account the dquot log format item
- * used to describe each dquot.
- */
-#define XFS_DQUOT_LOGRES(mp)	\
-	((sizeof(struct xfs_dq_logformat) + sizeof(struct xfs_disk_dquot)) * 6)
-
-/*
- * These are the structures used to lay out dquots and quotaoff
- * records on the log. Quite similar to those of inodes.
- */
-
-/*
- * log format struct for dquots.
- * The first two fields must be the type and size fitting into
- * 32 bits : log_recovery code assumes that.
- */
-typedef struct xfs_dq_logformat {
-	__uint16_t		qlf_type;      /* dquot log item type */
-	__uint16_t		qlf_size;      /* size of this item */
-	xfs_dqid_t		qlf_id;	       /* usr/grp/proj id : 32 bits */
-	__int64_t		qlf_blkno;     /* blkno of dquot buffer */
-	__int32_t		qlf_len;       /* len of dquot buffer */
-	__uint32_t		qlf_boffset;   /* off of dquot in buffer */
-} xfs_dq_logformat_t;
-
-/*
- * log format struct for QUOTAOFF records.
- * The first two fields must be the type and size fitting into
- * 32 bits : log_recovery code assumes that.
- * We write two LI_QUOTAOFF logitems per quotaoff, the last one keeps a pointer
- * to the first and ensures that the first logitem is taken out of the AIL
- * only when the last one is securely committed.
- */
-typedef struct xfs_qoff_logformat {
-	unsigned short		qf_type;	/* quotaoff log item type */
-	unsigned short		qf_size;	/* size of this item */
-	unsigned int		qf_flags;	/* USR and/or GRP */
-	char			qf_pad[12];	/* padding for future */
-} xfs_qoff_logformat_t;
-
-
-/*
- * Disk quotas status in m_qflags, and also sb_qflags. 16 bits.
- */
-#define XFS_UQUOTA_ACCT	0x0001  /* user quota accounting ON */
-#define XFS_UQUOTA_ENFD	0x0002  /* user quota limits enforced */
-#define XFS_UQUOTA_CHKD	0x0004  /* quotacheck run on usr quotas */
-#define XFS_PQUOTA_ACCT	0x0008  /* project quota accounting ON */
-#define XFS_OQUOTA_ENFD	0x0010  /* other (grp/prj) quota limits enforced */
-#define XFS_OQUOTA_CHKD	0x0020  /* quotacheck run on other (grp/prj) quotas */
-#define XFS_GQUOTA_ACCT	0x0040  /* group quota accounting ON */
-
-/*
- * Conversion to and from the combined OQUOTA flag (if necessary)
- * is done only in xfs_sb_qflags_to_disk() and xfs_sb_qflags_from_disk()
- */
-#define XFS_GQUOTA_ENFD	0x0080  /* group quota limits enforced */
-#define XFS_GQUOTA_CHKD	0x0100  /* quotacheck run on group quotas */
-#define XFS_PQUOTA_ENFD	0x0200  /* project quota limits enforced */
-#define XFS_PQUOTA_CHKD	0x0400  /* quotacheck run on project quotas */
-
-/*
- * Quota Accounting/Enforcement flags
- */
-#define XFS_ALL_QUOTA_ACCT	\
-		(XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT)
-#define XFS_ALL_QUOTA_ENFD	\
-		(XFS_UQUOTA_ENFD | XFS_GQUOTA_ENFD | XFS_PQUOTA_ENFD)
-#define XFS_ALL_QUOTA_CHKD	\
-		(XFS_UQUOTA_CHKD | XFS_GQUOTA_CHKD | XFS_PQUOTA_CHKD)
-
-#define XFS_IS_QUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
-#define XFS_IS_UQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_UQUOTA_ACCT)
-#define XFS_IS_PQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_PQUOTA_ACCT)
-#define XFS_IS_GQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_GQUOTA_ACCT)
-#define XFS_IS_UQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_UQUOTA_ENFD)
-#define XFS_IS_GQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_GQUOTA_ENFD)
-#define XFS_IS_PQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_PQUOTA_ENFD)
-
-/*
- * Incore only flags for quotaoff - these bits get cleared when quota(s)
- * are in the process of getting turned off. These flags are in m_qflags but
- * never in sb_qflags.
- */
-#define XFS_UQUOTA_ACTIVE	0x1000  /* uquotas are being turned off */
-#define XFS_GQUOTA_ACTIVE	0x2000  /* gquotas are being turned off */
-#define XFS_PQUOTA_ACTIVE	0x4000  /* pquotas are being turned off */
-#define XFS_ALL_QUOTA_ACTIVE	\
-	(XFS_UQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE)
-
-/*
- * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees
- * quota will be not be switched off as long as that inode lock is held.
- */
-#define XFS_IS_QUOTA_ON(mp)	((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \
-						   XFS_GQUOTA_ACTIVE | \
-						   XFS_PQUOTA_ACTIVE))
-#define XFS_IS_OQUOTA_ON(mp)	((mp)->m_qflags & (XFS_GQUOTA_ACTIVE | \
-						   XFS_PQUOTA_ACTIVE))
-#define XFS_IS_UQUOTA_ON(mp)	((mp)->m_qflags & XFS_UQUOTA_ACTIVE)
-#define XFS_IS_GQUOTA_ON(mp)	((mp)->m_qflags & XFS_GQUOTA_ACTIVE)
-#define XFS_IS_PQUOTA_ON(mp)	((mp)->m_qflags & XFS_PQUOTA_ACTIVE)
-
-/*
- * Flags to tell various functions what to do. Not all of these are meaningful
- * to a single function. None of these XFS_QMOPT_* flags are meant to have
- * persistent values (ie. their values can and will change between versions)
- */
-#define XFS_QMOPT_DQALLOC	0x0000002 /* alloc dquot ondisk if needed */
-#define XFS_QMOPT_UQUOTA	0x0000004 /* user dquot requested */
-#define XFS_QMOPT_PQUOTA	0x0000008 /* project dquot requested */
-#define XFS_QMOPT_FORCE_RES	0x0000010 /* ignore quota limits */
-#define XFS_QMOPT_SBVERSION	0x0000040 /* change superblock version num */
-#define XFS_QMOPT_DOWARN        0x0000400 /* increase warning cnt if needed */
-#define XFS_QMOPT_DQREPAIR	0x0001000 /* repair dquot if damaged */
-#define XFS_QMOPT_GQUOTA	0x0002000 /* group dquot requested */
-#define XFS_QMOPT_ENOSPC	0x0004000 /* enospc instead of edquot (prj) */
-
-/*
- * flags to xfs_trans_mod_dquot to indicate which field needs to be
- * modified.
- */
-#define XFS_QMOPT_RES_REGBLKS	0x0010000
-#define XFS_QMOPT_RES_RTBLKS	0x0020000
-#define XFS_QMOPT_BCOUNT	0x0040000
-#define XFS_QMOPT_ICOUNT	0x0080000
-#define XFS_QMOPT_RTBCOUNT	0x0100000
-#define XFS_QMOPT_DELBCOUNT	0x0200000
-#define XFS_QMOPT_DELRTBCOUNT	0x0400000
-#define XFS_QMOPT_RES_INOS	0x0800000
-
-/*
- * flags for dqalloc.
- */
-#define XFS_QMOPT_INHERIT	0x1000000
-
-/*
- * flags to xfs_trans_mod_dquot.
- */
-#define XFS_TRANS_DQ_RES_BLKS	XFS_QMOPT_RES_REGBLKS
-#define XFS_TRANS_DQ_RES_RTBLKS	XFS_QMOPT_RES_RTBLKS
-#define XFS_TRANS_DQ_RES_INOS	XFS_QMOPT_RES_INOS
-#define XFS_TRANS_DQ_BCOUNT	XFS_QMOPT_BCOUNT
-#define XFS_TRANS_DQ_DELBCOUNT	XFS_QMOPT_DELBCOUNT
-#define XFS_TRANS_DQ_ICOUNT	XFS_QMOPT_ICOUNT
-#define XFS_TRANS_DQ_RTBCOUNT	XFS_QMOPT_RTBCOUNT
-#define XFS_TRANS_DQ_DELRTBCOUNT XFS_QMOPT_DELRTBCOUNT
-
-
-#define XFS_QMOPT_QUOTALL	\
-		(XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA)
-#define XFS_QMOPT_RESBLK_MASK	(XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS)
-
-#ifdef __KERNEL__
-/*
  * This check is done typically without holding the inode lock;
  * that may seem racy, but it is harmless in the context that it is used.
  * The inode cannot go inactive as long a reference is kept, and
@@ -301,13 +48,6 @@
 	 (XFS_IS_PQUOTA_ON(mp) && \
 		(mp->m_sb.sb_qflags & XFS_PQUOTA_CHKD) == 0))
 
-#define XFS_MOUNT_QUOTA_ALL	(XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\
-				 XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\
-				 XFS_GQUOTA_ENFD|XFS_GQUOTA_CHKD|\
-				 XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD|\
-				 XFS_PQUOTA_CHKD)
-
-
 /*
  * The structure kept inside the xfs_trans_t keep track of dquot changes
  * within a transaction and apply them later.
@@ -340,8 +80,9 @@
 		struct xfs_mount *, struct xfs_dquot *,
 		struct xfs_dquot *, struct xfs_dquot *, long, long, uint);
 
-extern int xfs_qm_vop_dqalloc(struct xfs_inode *, uid_t, gid_t, prid_t, uint,
-		struct xfs_dquot **, struct xfs_dquot **, struct xfs_dquot **);
+extern int xfs_qm_vop_dqalloc(struct xfs_inode *, xfs_dqid_t, xfs_dqid_t,
+		prid_t, uint, struct xfs_dquot **, struct xfs_dquot **,
+		struct xfs_dquot **);
 extern void xfs_qm_vop_create_dqattach(struct xfs_trans *, struct xfs_inode *,
 		struct xfs_dquot *, struct xfs_dquot *, struct xfs_dquot *);
 extern int xfs_qm_vop_rename_dqattach(struct xfs_inode **);
@@ -362,9 +103,9 @@
 
 #else
 static inline int
-xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
-		uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp,
-		struct xfs_dquot **pdqp)
+xfs_qm_vop_dqalloc(struct xfs_inode *ip, xfs_dqid_t uid, xfs_dqid_t gid,
+		prid_t prid, uint flags, struct xfs_dquot **udqp,
+		struct xfs_dquot **gdqp, struct xfs_dquot **pdqp)
 {
 	*udqp = NULL;
 	*gdqp = NULL;
@@ -415,5 +156,4 @@
 
 extern const struct xfs_buf_ops xfs_dquot_buf_ops;
 
-#endif	/* __KERNEL__ */
 #endif	/* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/xfs_quota_defs.h b/fs/xfs/xfs_quota_defs.h
new file mode 100644
index 0000000..e6b0d6e
--- /dev/null
+++ b/fs/xfs/xfs_quota_defs.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef __XFS_QUOTA_DEFS_H__
+#define __XFS_QUOTA_DEFS_H__
+
+/*
+ * Quota definitions shared between user and kernel source trees.
+ */
+
+/*
+ * Even though users may not have quota limits occupying all 64-bits,
+ * they may need 64-bit accounting. Hence, 64-bit quota-counters,
+ * and quota-limits. This is a waste in the common case, but hey ...
+ */
+typedef __uint64_t	xfs_qcnt_t;
+typedef __uint16_t	xfs_qwarncnt_t;
+
+/*
+ * flags for q_flags field in the dquot.
+ */
+#define XFS_DQ_USER		0x0001		/* a user quota */
+#define XFS_DQ_PROJ		0x0002		/* project quota */
+#define XFS_DQ_GROUP		0x0004		/* a group quota */
+#define XFS_DQ_DIRTY		0x0008		/* dquot is dirty */
+#define XFS_DQ_FREEING		0x0010		/* dquot is beeing torn down */
+
+#define XFS_DQ_ALLTYPES		(XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
+
+#define XFS_DQ_FLAGS \
+	{ XFS_DQ_USER,		"USER" }, \
+	{ XFS_DQ_PROJ,		"PROJ" }, \
+	{ XFS_DQ_GROUP,		"GROUP" }, \
+	{ XFS_DQ_DIRTY,		"DIRTY" }, \
+	{ XFS_DQ_FREEING,	"FREEING" }
+
+/*
+ * We have the possibility of all three quota types being active at once, and
+ * hence free space modification requires modification of all three current
+ * dquots in a single transaction. For this case we need to have a reservation
+ * of at least 3 dquots.
+ *
+ * However, a chmod operation can change both UID and GID in a single
+ * transaction, resulting in requiring {old, new} x {uid, gid} dquots to be
+ * modified. Hence for this case we need to reserve space for at least 4 dquots.
+ *
+ * And in the worst case, there's a rename operation that can be modifying up to
+ * 4 inodes with dquots attached to them. In reality, the only inodes that can
+ * have their dquots modified are the source and destination directory inodes
+ * due to directory name creation and removal. That can require space allocation
+ * and/or freeing on both directory inodes, and hence all three dquots on each
+ * inode can be modified. And if the directories are world writeable, all the
+ * dquots can be unique and so 6 dquots can be modified....
+ *
+ * And, of course, we also need to take into account the dquot log format item
+ * used to describe each dquot.
+ */
+#define XFS_DQUOT_LOGRES(mp)	\
+	((sizeof(struct xfs_dq_logformat) + sizeof(struct xfs_disk_dquot)) * 6)
+
+#define XFS_IS_QUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
+#define XFS_IS_UQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_UQUOTA_ACCT)
+#define XFS_IS_PQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_PQUOTA_ACCT)
+#define XFS_IS_GQUOTA_RUNNING(mp)	((mp)->m_qflags & XFS_GQUOTA_ACCT)
+#define XFS_IS_UQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_UQUOTA_ENFD)
+#define XFS_IS_GQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_GQUOTA_ENFD)
+#define XFS_IS_PQUOTA_ENFORCED(mp)	((mp)->m_qflags & XFS_PQUOTA_ENFD)
+
+/*
+ * Incore only flags for quotaoff - these bits get cleared when quota(s)
+ * are in the process of getting turned off. These flags are in m_qflags but
+ * never in sb_qflags.
+ */
+#define XFS_UQUOTA_ACTIVE	0x1000  /* uquotas are being turned off */
+#define XFS_GQUOTA_ACTIVE	0x2000  /* gquotas are being turned off */
+#define XFS_PQUOTA_ACTIVE	0x4000  /* pquotas are being turned off */
+#define XFS_ALL_QUOTA_ACTIVE	\
+	(XFS_UQUOTA_ACTIVE | XFS_GQUOTA_ACTIVE | XFS_PQUOTA_ACTIVE)
+
+/*
+ * Checking XFS_IS_*QUOTA_ON() while holding any inode lock guarantees
+ * quota will be not be switched off as long as that inode lock is held.
+ */
+#define XFS_IS_QUOTA_ON(mp)	((mp)->m_qflags & (XFS_UQUOTA_ACTIVE | \
+						   XFS_GQUOTA_ACTIVE | \
+						   XFS_PQUOTA_ACTIVE))
+#define XFS_IS_OQUOTA_ON(mp)	((mp)->m_qflags & (XFS_GQUOTA_ACTIVE | \
+						   XFS_PQUOTA_ACTIVE))
+#define XFS_IS_UQUOTA_ON(mp)	((mp)->m_qflags & XFS_UQUOTA_ACTIVE)
+#define XFS_IS_GQUOTA_ON(mp)	((mp)->m_qflags & XFS_GQUOTA_ACTIVE)
+#define XFS_IS_PQUOTA_ON(mp)	((mp)->m_qflags & XFS_PQUOTA_ACTIVE)
+
+/*
+ * Flags to tell various functions what to do. Not all of these are meaningful
+ * to a single function. None of these XFS_QMOPT_* flags are meant to have
+ * persistent values (ie. their values can and will change between versions)
+ */
+#define XFS_QMOPT_DQALLOC	0x0000002 /* alloc dquot ondisk if needed */
+#define XFS_QMOPT_UQUOTA	0x0000004 /* user dquot requested */
+#define XFS_QMOPT_PQUOTA	0x0000008 /* project dquot requested */
+#define XFS_QMOPT_FORCE_RES	0x0000010 /* ignore quota limits */
+#define XFS_QMOPT_SBVERSION	0x0000040 /* change superblock version num */
+#define XFS_QMOPT_DOWARN        0x0000400 /* increase warning cnt if needed */
+#define XFS_QMOPT_DQREPAIR	0x0001000 /* repair dquot if damaged */
+#define XFS_QMOPT_GQUOTA	0x0002000 /* group dquot requested */
+#define XFS_QMOPT_ENOSPC	0x0004000 /* enospc instead of edquot (prj) */
+
+/*
+ * flags to xfs_trans_mod_dquot to indicate which field needs to be
+ * modified.
+ */
+#define XFS_QMOPT_RES_REGBLKS	0x0010000
+#define XFS_QMOPT_RES_RTBLKS	0x0020000
+#define XFS_QMOPT_BCOUNT	0x0040000
+#define XFS_QMOPT_ICOUNT	0x0080000
+#define XFS_QMOPT_RTBCOUNT	0x0100000
+#define XFS_QMOPT_DELBCOUNT	0x0200000
+#define XFS_QMOPT_DELRTBCOUNT	0x0400000
+#define XFS_QMOPT_RES_INOS	0x0800000
+
+/*
+ * flags for dqalloc.
+ */
+#define XFS_QMOPT_INHERIT	0x1000000
+
+/*
+ * flags to xfs_trans_mod_dquot.
+ */
+#define XFS_TRANS_DQ_RES_BLKS	XFS_QMOPT_RES_REGBLKS
+#define XFS_TRANS_DQ_RES_RTBLKS	XFS_QMOPT_RES_RTBLKS
+#define XFS_TRANS_DQ_RES_INOS	XFS_QMOPT_RES_INOS
+#define XFS_TRANS_DQ_BCOUNT	XFS_QMOPT_BCOUNT
+#define XFS_TRANS_DQ_DELBCOUNT	XFS_QMOPT_DELBCOUNT
+#define XFS_TRANS_DQ_ICOUNT	XFS_QMOPT_ICOUNT
+#define XFS_TRANS_DQ_RTBCOUNT	XFS_QMOPT_RTBCOUNT
+#define XFS_TRANS_DQ_DELRTBCOUNT XFS_QMOPT_DELRTBCOUNT
+
+
+#define XFS_QMOPT_QUOTALL	\
+		(XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA)
+#define XFS_QMOPT_RESBLK_MASK	(XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS)
+
+#endif	/* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index 20e30f9..1326d81 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -16,8 +16,10 @@
  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include "xfs.h"
-#include "xfs_sb.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
 #include "xfs_log.h"
+#include "xfs_sb.h"
 #include "xfs_ag.h"
 #include "xfs_mount.h"
 #include "xfs_quota.h"
@@ -54,6 +56,18 @@
 }
 
 STATIC int
+xfs_fs_get_xstatev(
+	struct super_block	*sb,
+	struct fs_quota_statv	*fqs)
+{
+	struct xfs_mount	*mp = XFS_M(sb);
+
+	if (!XFS_IS_QUOTA_RUNNING(mp))
+		return -ENOSYS;
+	return -xfs_qm_scall_getqstatv(mp, fqs);
+}
+
+STATIC int
 xfs_fs_set_xstate(
 	struct super_block	*sb,
 	unsigned int		uflags,
@@ -133,6 +147,7 @@
 }
 
 const struct quotactl_ops xfs_quotactl_operations = {
+	.get_xstatev		= xfs_fs_get_xstatev,
 	.get_xstate		= xfs_fs_get_xstate,
 	.set_xstate		= xfs_fs_set_xstate,
 	.get_dqblk		= xfs_fs_get_dqblk,
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
deleted file mode 100644
index 30ff5f4..0000000
--- a/fs/xfs/xfs_rename.c
+++ /dev/null
@@ -1,346 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_log.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_mount.h"
-#include "xfs_da_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_inode_item.h"
-#include "xfs_bmap.h"
-#include "xfs_error.h"
-#include "xfs_quota.h"
-#include "xfs_utils.h"
-#include "xfs_trans_space.h"
-#include "xfs_vnodeops.h"
-#include "xfs_trace.h"
-
-
-/*
- * Enter all inodes for a rename transaction into a sorted array.
- */
-STATIC void
-xfs_sort_for_rename(
-	xfs_inode_t	*dp1,	/* in: old (source) directory inode */
-	xfs_inode_t	*dp2,	/* in: new (target) directory inode */
-	xfs_inode_t	*ip1,	/* in: inode of old entry */
-	xfs_inode_t	*ip2,	/* in: inode of new entry, if it
-				   already exists, NULL otherwise. */
-	xfs_inode_t	**i_tab,/* out: array of inode returned, sorted */
-	int		*num_inodes)  /* out: number of inodes in array */
-{
-	xfs_inode_t		*temp;
-	int			i, j;
-
-	/*
-	 * i_tab contains a list of pointers to inodes.  We initialize
-	 * the table here & we'll sort it.  We will then use it to
-	 * order the acquisition of the inode locks.
-	 *
-	 * Note that the table may contain duplicates.  e.g., dp1 == dp2.
-	 */
-	i_tab[0] = dp1;
-	i_tab[1] = dp2;
-	i_tab[2] = ip1;
-	if (ip2) {
-		*num_inodes = 4;
-		i_tab[3] = ip2;
-	} else {
-		*num_inodes = 3;
-		i_tab[3] = NULL;
-	}
-
-	/*
-	 * Sort the elements via bubble sort.  (Remember, there are at
-	 * most 4 elements to sort, so this is adequate.)
-	 */
-	for (i = 0; i < *num_inodes; i++) {
-		for (j = 1; j < *num_inodes; j++) {
-			if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
-				temp = i_tab[j];
-				i_tab[j] = i_tab[j-1];
-				i_tab[j-1] = temp;
-			}
-		}
-	}
-}
-
-/*
- * xfs_rename
- */
-int
-xfs_rename(
-	xfs_inode_t	*src_dp,
-	struct xfs_name	*src_name,
-	xfs_inode_t	*src_ip,
-	xfs_inode_t	*target_dp,
-	struct xfs_name	*target_name,
-	xfs_inode_t	*target_ip)
-{
-	xfs_trans_t	*tp = NULL;
-	xfs_mount_t	*mp = src_dp->i_mount;
-	int		new_parent;		/* moving to a new dir */
-	int		src_is_directory;	/* src_name is a directory */
-	int		error;
-	xfs_bmap_free_t free_list;
-	xfs_fsblock_t   first_block;
-	int		cancel_flags;
-	int		committed;
-	xfs_inode_t	*inodes[4];
-	int		spaceres;
-	int		num_inodes;
-
-	trace_xfs_rename(src_dp, target_dp, src_name, target_name);
-
-	new_parent = (src_dp != target_dp);
-	src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
-
-	xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip,
-				inodes, &num_inodes);
-
-	xfs_bmap_init(&free_list, &first_block);
-	tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
-	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
-	spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
-	error = xfs_trans_reserve(tp, spaceres, XFS_RENAME_LOG_RES(mp), 0,
-			XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT);
-	if (error == ENOSPC) {
-		spaceres = 0;
-		error = xfs_trans_reserve(tp, 0, XFS_RENAME_LOG_RES(mp), 0,
-				XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT);
-	}
-	if (error) {
-		xfs_trans_cancel(tp, 0);
-		goto std_return;
-	}
-
-	/*
-	 * Attach the dquots to the inodes
-	 */
-	error = xfs_qm_vop_rename_dqattach(inodes);
-	if (error) {
-		xfs_trans_cancel(tp, cancel_flags);
-		goto std_return;
-	}
-
-	/*
-	 * Lock all the participating inodes. Depending upon whether
-	 * the target_name exists in the target directory, and
-	 * whether the target directory is the same as the source
-	 * directory, we can lock from 2 to 4 inodes.
-	 */
-	xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
-
-	/*
-	 * Join all the inodes to the transaction. From this point on,
-	 * we can rely on either trans_commit or trans_cancel to unlock
-	 * them.
-	 */
-	xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
-	if (new_parent)
-		xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
-	xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
-	if (target_ip)
-		xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
-
-	/*
-	 * If we are using project inheritance, we only allow renames
-	 * into our tree when the project IDs are the same; else the
-	 * tree quota mechanism would be circumvented.
-	 */
-	if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
-		     (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) {
-		error = XFS_ERROR(EXDEV);
-		goto error_return;
-	}
-
-	/*
-	 * Set up the target.
-	 */
-	if (target_ip == NULL) {
-		/*
-		 * If there's no space reservation, check the entry will
-		 * fit before actually inserting it.
-		 */
-		error = xfs_dir_canenter(tp, target_dp, target_name, spaceres);
-		if (error)
-			goto error_return;
-		/*
-		 * If target does not exist and the rename crosses
-		 * directories, adjust the target directory link count
-		 * to account for the ".." reference from the new entry.
-		 */
-		error = xfs_dir_createname(tp, target_dp, target_name,
-						src_ip->i_ino, &first_block,
-						&free_list, spaceres);
-		if (error == ENOSPC)
-			goto error_return;
-		if (error)
-			goto abort_return;
-
-		xfs_trans_ichgtime(tp, target_dp,
-					XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-
-		if (new_parent && src_is_directory) {
-			error = xfs_bumplink(tp, target_dp);
-			if (error)
-				goto abort_return;
-		}
-	} else { /* target_ip != NULL */
-		/*
-		 * If target exists and it's a directory, check that both
-		 * target and source are directories and that target can be
-		 * destroyed, or that neither is a directory.
-		 */
-		if (S_ISDIR(target_ip->i_d.di_mode)) {
-			/*
-			 * Make sure target dir is empty.
-			 */
-			if (!(xfs_dir_isempty(target_ip)) ||
-			    (target_ip->i_d.di_nlink > 2)) {
-				error = XFS_ERROR(EEXIST);
-				goto error_return;
-			}
-		}
-
-		/*
-		 * Link the source inode under the target name.
-		 * If the source inode is a directory and we are moving
-		 * it across directories, its ".." entry will be
-		 * inconsistent until we replace that down below.
-		 *
-		 * In case there is already an entry with the same
-		 * name at the destination directory, remove it first.
-		 */
-		error = xfs_dir_replace(tp, target_dp, target_name,
-					src_ip->i_ino,
-					&first_block, &free_list, spaceres);
-		if (error)
-			goto abort_return;
-
-		xfs_trans_ichgtime(tp, target_dp,
-					XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-
-		/*
-		 * Decrement the link count on the target since the target
-		 * dir no longer points to it.
-		 */
-		error = xfs_droplink(tp, target_ip);
-		if (error)
-			goto abort_return;
-
-		if (src_is_directory) {
-			/*
-			 * Drop the link from the old "." entry.
-			 */
-			error = xfs_droplink(tp, target_ip);
-			if (error)
-				goto abort_return;
-		}
-	} /* target_ip != NULL */
-
-	/*
-	 * Remove the source.
-	 */
-	if (new_parent && src_is_directory) {
-		/*
-		 * Rewrite the ".." entry to point to the new
-		 * directory.
-		 */
-		error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
-					target_dp->i_ino,
-					&first_block, &free_list, spaceres);
-		ASSERT(error != EEXIST);
-		if (error)
-			goto abort_return;
-	}
-
-	/*
-	 * We always want to hit the ctime on the source inode.
-	 *
-	 * This isn't strictly required by the standards since the source
-	 * inode isn't really being changed, but old unix file systems did
-	 * it and some incremental backup programs won't work without it.
-	 */
-	xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
-	xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE);
-
-	/*
-	 * Adjust the link count on src_dp.  This is necessary when
-	 * renaming a directory, either within one parent when
-	 * the target existed, or across two parent directories.
-	 */
-	if (src_is_directory && (new_parent || target_ip != NULL)) {
-
-		/*
-		 * Decrement link count on src_directory since the
-		 * entry that's moved no longer points to it.
-		 */
-		error = xfs_droplink(tp, src_dp);
-		if (error)
-			goto abort_return;
-	}
-
-	error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
-					&first_block, &free_list, spaceres);
-	if (error)
-		goto abort_return;
-
-	xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-	xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
-	if (new_parent)
-		xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
-
-	/*
-	 * If this is a synchronous mount, make sure that the
-	 * rename transaction goes to disk before returning to
-	 * the user.
-	 */
-	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
-		xfs_trans_set_sync(tp);
-	}
-
-	error = xfs_bmap_finish(&tp, &free_list, &committed);
-	if (error) {
-		xfs_bmap_cancel(&free_list);
-		xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
-				 XFS_TRANS_ABORT));
-		goto std_return;
-	}
-
-	/*
-	 * trans_commit will unlock src_ip, target_ip & decrement
-	 * the vnode references.
-	 */
-	return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-
- abort_return:
-	cancel_flags |= XFS_TRANS_ABORT;
- error_return:
-	xfs_bmap_cancel(&free_list);
-	xfs_trans_cancel(tp, cancel_flags);
- std_return:
-	return error;
-}
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 98dc670..6f9e63c 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -17,25 +17,24 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
-#include "xfs_types.h"
+#include "xfs_format.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_mount.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
 #include "xfs_alloc.h"
 #include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
 #include "xfs_rtalloc.h"
 #include "xfs_fsops.h"
 #include "xfs_error.h"
 #include "xfs_inode_item.h"
 #include "xfs_trans_space.h"
-#include "xfs_utils.h"
 #include "xfs_trace.h"
 #include "xfs_buf.h"
 #include "xfs_icache.h"
@@ -101,10 +100,9 @@
 		/*
 		 * Reserve space & log for one extent added to the file.
 		 */
-		if ((error = xfs_trans_reserve(tp, resblks,
-				XFS_GROWRTALLOC_LOG_RES(mp), 0,
-				XFS_TRANS_PERM_LOG_RES,
-				XFS_DEFAULT_PERM_LOG_COUNT)))
+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata,
+					  resblks, 0);
+		if (error)
 			goto error_cancel;
 		cancelflags = XFS_TRANS_RELEASE_LOG_RES;
 		/*
@@ -147,8 +145,9 @@
 			/*
 			 * Reserve log for one block zeroing.
 			 */
-			if ((error = xfs_trans_reserve(tp, 0,
-					XFS_GROWRTZERO_LOG_RES(mp), 0, 0, 0)))
+			error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtzero,
+						  0, 0);
+			if (error)
 				goto error_cancel;
 			/*
 			 * Lock the bitmap inode.
@@ -736,8 +735,8 @@
 {
 	xfs_rtblock_t	end;		/* end of the allocated extent */
 	int		error;		/* error value */
-	xfs_rtblock_t	postblock;	/* first block allocated > end */
-	xfs_rtblock_t	preblock;	/* first block allocated < start */
+	xfs_rtblock_t	postblock = 0;	/* first block allocated > end */
+	xfs_rtblock_t	preblock = 0;	/* first block allocated < start */
 
 	end = start + len - 1;
 	/*
@@ -1958,8 +1957,9 @@
 		 * Start a transaction, get the log reservation.
 		 */
 		tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_FREE);
-		if ((error = xfs_trans_reserve(tp, 0,
-				XFS_GROWRTFREE_LOG_RES(nmp), 0, 0, 0)))
+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtfree,
+					  0, 0);
+		if (error)
 			goto error_cancel;
 		/*
 		 * Lock out other callers by grabbing the bitmap inode lock.
@@ -2148,7 +2148,7 @@
 	ASSERT(mp->m_rbmip->i_itemp != NULL);
 	ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
 
-#if defined(__KERNEL__) && defined(DEBUG)
+#ifdef DEBUG
 	/*
 	 * Check to see that this whole range is currently allocated.
 	 */
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index f7f3a35..b2a1a24 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -18,58 +18,11 @@
 #ifndef __XFS_RTALLOC_H__
 #define	__XFS_RTALLOC_H__
 
+/* kernel only definitions and functions */
+
 struct xfs_mount;
 struct xfs_trans;
 
-/* Min and max rt extent sizes, specified in bytes */
-#define	XFS_MAX_RTEXTSIZE	(1024 * 1024 * 1024)	/* 1GB */
-#define	XFS_DFL_RTEXTSIZE	(64 * 1024)	        /* 64kB */
-#define	XFS_MIN_RTEXTSIZE	(4 * 1024)		/* 4kB */
-
-/*
- * Constants for bit manipulations.
- */
-#define	XFS_NBBYLOG	3		/* log2(NBBY) */
-#define	XFS_WORDLOG	2		/* log2(sizeof(xfs_rtword_t)) */
-#define	XFS_NBWORDLOG	(XFS_NBBYLOG + XFS_WORDLOG)
-#define	XFS_NBWORD	(1 << XFS_NBWORDLOG)
-#define	XFS_WORDMASK	((1 << XFS_WORDLOG) - 1)
-
-#define	XFS_BLOCKSIZE(mp)	((mp)->m_sb.sb_blocksize)
-#define	XFS_BLOCKMASK(mp)	((mp)->m_blockmask)
-#define	XFS_BLOCKWSIZE(mp)	((mp)->m_blockwsize)
-#define	XFS_BLOCKWMASK(mp)	((mp)->m_blockwmask)
-
-/*
- * Summary and bit manipulation macros.
- */
-#define	XFS_SUMOFFS(mp,ls,bb)	((int)((ls) * (mp)->m_sb.sb_rbmblocks + (bb)))
-#define	XFS_SUMOFFSTOBLOCK(mp,s)	\
-	(((s) * (uint)sizeof(xfs_suminfo_t)) >> (mp)->m_sb.sb_blocklog)
-#define	XFS_SUMPTR(mp,bp,so)	\
-	((xfs_suminfo_t *)((bp)->b_addr + \
-		(((so) * (uint)sizeof(xfs_suminfo_t)) & XFS_BLOCKMASK(mp))))
-
-#define	XFS_BITTOBLOCK(mp,bi)	((bi) >> (mp)->m_blkbit_log)
-#define	XFS_BLOCKTOBIT(mp,bb)	((bb) << (mp)->m_blkbit_log)
-#define	XFS_BITTOWORD(mp,bi)	\
-	((int)(((bi) >> XFS_NBWORDLOG) & XFS_BLOCKWMASK(mp)))
-
-#define	XFS_RTMIN(a,b)	((a) < (b) ? (a) : (b))
-#define	XFS_RTMAX(a,b)	((a) > (b) ? (a) : (b))
-
-#define	XFS_RTLOBIT(w)	xfs_lowbit32(w)
-#define	XFS_RTHIBIT(w)	xfs_highbit32(w)
-
-#if XFS_BIG_BLKNOS
-#define	XFS_RTBLOCKLOG(b)	xfs_highbit64(b)
-#else
-#define	XFS_RTBLOCKLOG(b)	xfs_highbit32(b)
-#endif
-
-
-#ifdef __KERNEL__
-
 #ifdef CONFIG_XFS_RT
 /*
  * Function prototypes for exported functions.
@@ -161,6 +114,4 @@
 # define xfs_rtunmount_inodes(m)
 #endif	/* CONFIG_XFS_RT */
 
-#endif	/* __KERNEL__ */
-
 #endif	/* __XFS_RTALLOC_H__ */
diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c
new file mode 100644
index 0000000..a5b59d9
--- /dev/null
+++ b/fs/xfs/xfs_sb.c
@@ -0,0 +1,834 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_format.h"
+#include "xfs_bit.h"
+#include "xfs_log.h"
+#include "xfs_inum.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_alloc.h"
+#include "xfs_rtalloc.h"
+#include "xfs_bmap.h"
+#include "xfs_error.h"
+#include "xfs_quota.h"
+#include "xfs_fsops.h"
+#include "xfs_trace.h"
+#include "xfs_cksum.h"
+#include "xfs_buf_item.h"
+
+/*
+ * Physical superblock buffer manipulations. Shared with libxfs in userspace.
+ */
+
+static const struct {
+	short offset;
+	short type;	/* 0 = integer
+			 * 1 = binary / string (no translation)
+			 */
+} xfs_sb_info[] = {
+	{ offsetof(xfs_sb_t, sb_magicnum),	0 },
+	{ offsetof(xfs_sb_t, sb_blocksize),	0 },
+	{ offsetof(xfs_sb_t, sb_dblocks),	0 },
+	{ offsetof(xfs_sb_t, sb_rblocks),	0 },
+	{ offsetof(xfs_sb_t, sb_rextents),	0 },
+	{ offsetof(xfs_sb_t, sb_uuid),		1 },
+	{ offsetof(xfs_sb_t, sb_logstart),	0 },
+	{ offsetof(xfs_sb_t, sb_rootino),	0 },
+	{ offsetof(xfs_sb_t, sb_rbmino),	0 },
+	{ offsetof(xfs_sb_t, sb_rsumino),	0 },
+	{ offsetof(xfs_sb_t, sb_rextsize),	0 },
+	{ offsetof(xfs_sb_t, sb_agblocks),	0 },
+	{ offsetof(xfs_sb_t, sb_agcount),	0 },
+	{ offsetof(xfs_sb_t, sb_rbmblocks),	0 },
+	{ offsetof(xfs_sb_t, sb_logblocks),	0 },
+	{ offsetof(xfs_sb_t, sb_versionnum),	0 },
+	{ offsetof(xfs_sb_t, sb_sectsize),	0 },
+	{ offsetof(xfs_sb_t, sb_inodesize),	0 },
+	{ offsetof(xfs_sb_t, sb_inopblock),	0 },
+	{ offsetof(xfs_sb_t, sb_fname[0]),	1 },
+	{ offsetof(xfs_sb_t, sb_blocklog),	0 },
+	{ offsetof(xfs_sb_t, sb_sectlog),	0 },
+	{ offsetof(xfs_sb_t, sb_inodelog),	0 },
+	{ offsetof(xfs_sb_t, sb_inopblog),	0 },
+	{ offsetof(xfs_sb_t, sb_agblklog),	0 },
+	{ offsetof(xfs_sb_t, sb_rextslog),	0 },
+	{ offsetof(xfs_sb_t, sb_inprogress),	0 },
+	{ offsetof(xfs_sb_t, sb_imax_pct),	0 },
+	{ offsetof(xfs_sb_t, sb_icount),	0 },
+	{ offsetof(xfs_sb_t, sb_ifree),		0 },
+	{ offsetof(xfs_sb_t, sb_fdblocks),	0 },
+	{ offsetof(xfs_sb_t, sb_frextents),	0 },
+	{ offsetof(xfs_sb_t, sb_uquotino),	0 },
+	{ offsetof(xfs_sb_t, sb_gquotino),	0 },
+	{ offsetof(xfs_sb_t, sb_qflags),	0 },
+	{ offsetof(xfs_sb_t, sb_flags),		0 },
+	{ offsetof(xfs_sb_t, sb_shared_vn),	0 },
+	{ offsetof(xfs_sb_t, sb_inoalignmt),	0 },
+	{ offsetof(xfs_sb_t, sb_unit),		0 },
+	{ offsetof(xfs_sb_t, sb_width),		0 },
+	{ offsetof(xfs_sb_t, sb_dirblklog),	0 },
+	{ offsetof(xfs_sb_t, sb_logsectlog),	0 },
+	{ offsetof(xfs_sb_t, sb_logsectsize),	0 },
+	{ offsetof(xfs_sb_t, sb_logsunit),	0 },
+	{ offsetof(xfs_sb_t, sb_features2),	0 },
+	{ offsetof(xfs_sb_t, sb_bad_features2),	0 },
+	{ offsetof(xfs_sb_t, sb_features_compat),	0 },
+	{ offsetof(xfs_sb_t, sb_features_ro_compat),	0 },
+	{ offsetof(xfs_sb_t, sb_features_incompat),	0 },
+	{ offsetof(xfs_sb_t, sb_features_log_incompat),	0 },
+	{ offsetof(xfs_sb_t, sb_crc),		0 },
+	{ offsetof(xfs_sb_t, sb_pad),		0 },
+	{ offsetof(xfs_sb_t, sb_pquotino),	0 },
+	{ offsetof(xfs_sb_t, sb_lsn),		0 },
+	{ sizeof(xfs_sb_t),			0 }
+};
+
+/*
+ * Reference counting access wrappers to the perag structures.
+ * Because we never free per-ag structures, the only thing we
+ * have to protect against changes is the tree structure itself.
+ */
+struct xfs_perag *
+xfs_perag_get(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno)
+{
+	struct xfs_perag	*pag;
+	int			ref = 0;
+
+	rcu_read_lock();
+	pag = radix_tree_lookup(&mp->m_perag_tree, agno);
+	if (pag) {
+		ASSERT(atomic_read(&pag->pag_ref) >= 0);
+		ref = atomic_inc_return(&pag->pag_ref);
+	}
+	rcu_read_unlock();
+	trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
+	return pag;
+}
+
+/*
+ * search from @first to find the next perag with the given tag set.
+ */
+struct xfs_perag *
+xfs_perag_get_tag(
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		first,
+	int			tag)
+{
+	struct xfs_perag	*pag;
+	int			found;
+	int			ref;
+
+	rcu_read_lock();
+	found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
+					(void **)&pag, first, 1, tag);
+	if (found <= 0) {
+		rcu_read_unlock();
+		return NULL;
+	}
+	ref = atomic_inc_return(&pag->pag_ref);
+	rcu_read_unlock();
+	trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_);
+	return pag;
+}
+
+void
+xfs_perag_put(
+	struct xfs_perag	*pag)
+{
+	int	ref;
+
+	ASSERT(atomic_read(&pag->pag_ref) > 0);
+	ref = atomic_dec_return(&pag->pag_ref);
+	trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
+}
+
+/*
+ * Check the validity of the SB found.
+ */
+STATIC int
+xfs_mount_validate_sb(
+	xfs_mount_t	*mp,
+	xfs_sb_t	*sbp,
+	bool		check_inprogress,
+	bool		check_version)
+{
+
+	/*
+	 * If the log device and data device have the
+	 * same device number, the log is internal.
+	 * Consequently, the sb_logstart should be non-zero.  If
+	 * we have a zero sb_logstart in this case, we may be trying to mount
+	 * a volume filesystem in a non-volume manner.
+	 */
+	if (sbp->sb_magicnum != XFS_SB_MAGIC) {
+		xfs_warn(mp, "bad magic number");
+		return XFS_ERROR(EWRONGFS);
+	}
+
+
+	if (!xfs_sb_good_version(sbp)) {
+		xfs_warn(mp, "bad version");
+		return XFS_ERROR(EWRONGFS);
+	}
+
+	/*
+	 * Version 5 superblock feature mask validation. Reject combinations the
+	 * kernel cannot support up front before checking anything else. For
+	 * write validation, we don't need to check feature masks.
+	 */
+	if (check_version && XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) {
+		xfs_alert(mp,
+"Version 5 superblock detected. This kernel has EXPERIMENTAL support enabled!\n"
+"Use of these features in this kernel is at your own risk!");
+
+		if (xfs_sb_has_compat_feature(sbp,
+					XFS_SB_FEAT_COMPAT_UNKNOWN)) {
+			xfs_warn(mp,
+"Superblock has unknown compatible features (0x%x) enabled.\n"
+"Using a more recent kernel is recommended.",
+				(sbp->sb_features_compat &
+						XFS_SB_FEAT_COMPAT_UNKNOWN));
+		}
+
+		if (xfs_sb_has_ro_compat_feature(sbp,
+					XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
+			xfs_alert(mp,
+"Superblock has unknown read-only compatible features (0x%x) enabled.",
+				(sbp->sb_features_ro_compat &
+						XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
+			if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
+				xfs_warn(mp,
+"Attempted to mount read-only compatible filesystem read-write.\n"
+"Filesystem can only be safely mounted read only.");
+				return XFS_ERROR(EINVAL);
+			}
+		}
+		if (xfs_sb_has_incompat_feature(sbp,
+					XFS_SB_FEAT_INCOMPAT_UNKNOWN)) {
+			xfs_warn(mp,
+"Superblock has unknown incompatible features (0x%x) enabled.\n"
+"Filesystem can not be safely mounted by this kernel.",
+				(sbp->sb_features_incompat &
+						XFS_SB_FEAT_INCOMPAT_UNKNOWN));
+			return XFS_ERROR(EINVAL);
+		}
+	}
+
+	if (xfs_sb_version_has_pquotino(sbp)) {
+		if (sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) {
+			xfs_notice(mp,
+			   "Version 5 of Super block has XFS_OQUOTA bits.\n");
+			return XFS_ERROR(EFSCORRUPTED);
+		}
+	} else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD |
+				XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) {
+			xfs_notice(mp,
+"Superblock earlier than Version 5 has XFS_[PQ]UOTA_{ENFD|CHKD} bits.\n");
+			return XFS_ERROR(EFSCORRUPTED);
+	}
+
+	if (unlikely(
+	    sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
+		xfs_warn(mp,
+		"filesystem is marked as having an external log; "
+		"specify logdev on the mount command line.");
+		return XFS_ERROR(EINVAL);
+	}
+
+	if (unlikely(
+	    sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) {
+		xfs_warn(mp,
+		"filesystem is marked as having an internal log; "
+		"do not specify logdev on the mount command line.");
+		return XFS_ERROR(EINVAL);
+	}
+
+	/*
+	 * More sanity checking.  Most of these were stolen directly from
+	 * xfs_repair.
+	 */
+	if (unlikely(
+	    sbp->sb_agcount <= 0					||
+	    sbp->sb_sectsize < XFS_MIN_SECTORSIZE			||
+	    sbp->sb_sectsize > XFS_MAX_SECTORSIZE			||
+	    sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG			||
+	    sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG			||
+	    sbp->sb_sectsize != (1 << sbp->sb_sectlog)			||
+	    sbp->sb_blocksize < XFS_MIN_BLOCKSIZE			||
+	    sbp->sb_blocksize > XFS_MAX_BLOCKSIZE			||
+	    sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG			||
+	    sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG			||
+	    sbp->sb_blocksize != (1 << sbp->sb_blocklog)		||
+	    sbp->sb_inodesize < XFS_DINODE_MIN_SIZE			||
+	    sbp->sb_inodesize > XFS_DINODE_MAX_SIZE			||
+	    sbp->sb_inodelog < XFS_DINODE_MIN_LOG			||
+	    sbp->sb_inodelog > XFS_DINODE_MAX_LOG			||
+	    sbp->sb_inodesize != (1 << sbp->sb_inodelog)		||
+	    (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog)	||
+	    (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE)	||
+	    (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE)	||
+	    (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */)	||
+	    sbp->sb_dblocks == 0					||
+	    sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp)			||
+	    sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) {
+		XFS_CORRUPTION_ERROR("SB sanity check failed",
+				XFS_ERRLEVEL_LOW, mp, sbp);
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+
+	/*
+	 * Until this is fixed only page-sized or smaller data blocks work.
+	 */
+	if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
+		xfs_warn(mp,
+		"File system with blocksize %d bytes. "
+		"Only pagesize (%ld) or less will currently work.",
+				sbp->sb_blocksize, PAGE_SIZE);
+		return XFS_ERROR(ENOSYS);
+	}
+
+	/*
+	 * Currently only very few inode sizes are supported.
+	 */
+	switch (sbp->sb_inodesize) {
+	case 256:
+	case 512:
+	case 1024:
+	case 2048:
+		break;
+	default:
+		xfs_warn(mp, "inode size of %d bytes not supported",
+				sbp->sb_inodesize);
+		return XFS_ERROR(ENOSYS);
+	}
+
+	if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
+	    xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
+		xfs_warn(mp,
+		"file system too large to be mounted on this system.");
+		return XFS_ERROR(EFBIG);
+	}
+
+	if (check_inprogress && sbp->sb_inprogress) {
+		xfs_warn(mp, "Offline file system operation in progress!");
+		return XFS_ERROR(EFSCORRUPTED);
+	}
+
+	/*
+	 * Version 1 directory format has never worked on Linux.
+	 */
+	if (unlikely(!xfs_sb_version_hasdirv2(sbp))) {
+		xfs_warn(mp, "file system using version 1 directory format");
+		return XFS_ERROR(ENOSYS);
+	}
+
+	return 0;
+}
+
+void
+xfs_sb_quota_from_disk(struct xfs_sb *sbp)
+{
+	/*
+	 * older mkfs doesn't initialize quota inodes to NULLFSINO. This
+	 * leads to in-core values having two different values for a quota
+	 * inode to be invalid: 0 and NULLFSINO. Change it to a single value
+	 * NULLFSINO.
+	 *
+	 * Note that this change affect only the in-core values. These
+	 * values are not written back to disk unless any quota information
+	 * is written to the disk. Even in that case, sb_pquotino field is
+	 * not written to disk unless the superblock supports pquotino.
+	 */
+	if (sbp->sb_uquotino == 0)
+		sbp->sb_uquotino = NULLFSINO;
+	if (sbp->sb_gquotino == 0)
+		sbp->sb_gquotino = NULLFSINO;
+	if (sbp->sb_pquotino == 0)
+		sbp->sb_pquotino = NULLFSINO;
+
+	/*
+	 * We need to do these manipilations only if we are working
+	 * with an older version of on-disk superblock.
+	 */
+	if (xfs_sb_version_has_pquotino(sbp))
+		return;
+
+	if (sbp->sb_qflags & XFS_OQUOTA_ENFD)
+		sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ?
+					XFS_PQUOTA_ENFD : XFS_GQUOTA_ENFD;
+	if (sbp->sb_qflags & XFS_OQUOTA_CHKD)
+		sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ?
+					XFS_PQUOTA_CHKD : XFS_GQUOTA_CHKD;
+	sbp->sb_qflags &= ~(XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD);
+
+	if (sbp->sb_qflags & XFS_PQUOTA_ACCT)  {
+		/*
+		 * In older version of superblock, on-disk superblock only
+		 * has sb_gquotino, and in-core superblock has both sb_gquotino
+		 * and sb_pquotino. But, only one of them is supported at any
+		 * point of time. So, if PQUOTA is set in disk superblock,
+		 * copy over sb_gquotino to sb_pquotino.
+		 */
+		sbp->sb_pquotino = sbp->sb_gquotino;
+		sbp->sb_gquotino = NULLFSINO;
+	}
+}
+
+void
+xfs_sb_from_disk(
+	struct xfs_sb	*to,
+	xfs_dsb_t	*from)
+{
+	to->sb_magicnum = be32_to_cpu(from->sb_magicnum);
+	to->sb_blocksize = be32_to_cpu(from->sb_blocksize);
+	to->sb_dblocks = be64_to_cpu(from->sb_dblocks);
+	to->sb_rblocks = be64_to_cpu(from->sb_rblocks);
+	to->sb_rextents = be64_to_cpu(from->sb_rextents);
+	memcpy(&to->sb_uuid, &from->sb_uuid, sizeof(to->sb_uuid));
+	to->sb_logstart = be64_to_cpu(from->sb_logstart);
+	to->sb_rootino = be64_to_cpu(from->sb_rootino);
+	to->sb_rbmino = be64_to_cpu(from->sb_rbmino);
+	to->sb_rsumino = be64_to_cpu(from->sb_rsumino);
+	to->sb_rextsize = be32_to_cpu(from->sb_rextsize);
+	to->sb_agblocks = be32_to_cpu(from->sb_agblocks);
+	to->sb_agcount = be32_to_cpu(from->sb_agcount);
+	to->sb_rbmblocks = be32_to_cpu(from->sb_rbmblocks);
+	to->sb_logblocks = be32_to_cpu(from->sb_logblocks);
+	to->sb_versionnum = be16_to_cpu(from->sb_versionnum);
+	to->sb_sectsize = be16_to_cpu(from->sb_sectsize);
+	to->sb_inodesize = be16_to_cpu(from->sb_inodesize);
+	to->sb_inopblock = be16_to_cpu(from->sb_inopblock);
+	memcpy(&to->sb_fname, &from->sb_fname, sizeof(to->sb_fname));
+	to->sb_blocklog = from->sb_blocklog;
+	to->sb_sectlog = from->sb_sectlog;
+	to->sb_inodelog = from->sb_inodelog;
+	to->sb_inopblog = from->sb_inopblog;
+	to->sb_agblklog = from->sb_agblklog;
+	to->sb_rextslog = from->sb_rextslog;
+	to->sb_inprogress = from->sb_inprogress;
+	to->sb_imax_pct = from->sb_imax_pct;
+	to->sb_icount = be64_to_cpu(from->sb_icount);
+	to->sb_ifree = be64_to_cpu(from->sb_ifree);
+	to->sb_fdblocks = be64_to_cpu(from->sb_fdblocks);
+	to->sb_frextents = be64_to_cpu(from->sb_frextents);
+	to->sb_uquotino = be64_to_cpu(from->sb_uquotino);
+	to->sb_gquotino = be64_to_cpu(from->sb_gquotino);
+	to->sb_qflags = be16_to_cpu(from->sb_qflags);
+	to->sb_flags = from->sb_flags;
+	to->sb_shared_vn = from->sb_shared_vn;
+	to->sb_inoalignmt = be32_to_cpu(from->sb_inoalignmt);
+	to->sb_unit = be32_to_cpu(from->sb_unit);
+	to->sb_width = be32_to_cpu(from->sb_width);
+	to->sb_dirblklog = from->sb_dirblklog;
+	to->sb_logsectlog = from->sb_logsectlog;
+	to->sb_logsectsize = be16_to_cpu(from->sb_logsectsize);
+	to->sb_logsunit = be32_to_cpu(from->sb_logsunit);
+	to->sb_features2 = be32_to_cpu(from->sb_features2);
+	to->sb_bad_features2 = be32_to_cpu(from->sb_bad_features2);
+	to->sb_features_compat = be32_to_cpu(from->sb_features_compat);
+	to->sb_features_ro_compat = be32_to_cpu(from->sb_features_ro_compat);
+	to->sb_features_incompat = be32_to_cpu(from->sb_features_incompat);
+	to->sb_features_log_incompat =
+				be32_to_cpu(from->sb_features_log_incompat);
+	to->sb_pad = 0;
+	to->sb_pquotino = be64_to_cpu(from->sb_pquotino);
+	to->sb_lsn = be64_to_cpu(from->sb_lsn);
+}
+
+static inline void
+xfs_sb_quota_to_disk(
+	xfs_dsb_t	*to,
+	xfs_sb_t	*from,
+	__int64_t	*fields)
+{
+	__uint16_t	qflags = from->sb_qflags;
+
+	/*
+	 * We need to do these manipilations only if we are working
+	 * with an older version of on-disk superblock.
+	 */
+	if (xfs_sb_version_has_pquotino(from))
+		return;
+
+	if (*fields & XFS_SB_QFLAGS) {
+		/*
+		 * The in-core version of sb_qflags do not have
+		 * XFS_OQUOTA_* flags, whereas the on-disk version
+		 * does.  So, convert incore XFS_{PG}QUOTA_* flags
+		 * to on-disk XFS_OQUOTA_* flags.
+		 */
+		qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD |
+				XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD);
+
+		if (from->sb_qflags &
+				(XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD))
+			qflags |= XFS_OQUOTA_ENFD;
+		if (from->sb_qflags &
+				(XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD))
+			qflags |= XFS_OQUOTA_CHKD;
+		to->sb_qflags = cpu_to_be16(qflags);
+		*fields &= ~XFS_SB_QFLAGS;
+	}
+
+	/*
+	 * GQUOTINO and PQUOTINO cannot be used together in versions
+	 * of superblock that do not have pquotino. from->sb_flags
+	 * tells us which quota is active and should be copied to
+	 * disk.
+	 */
+	if ((*fields & XFS_SB_GQUOTINO) &&
+				(from->sb_qflags & XFS_GQUOTA_ACCT))
+		to->sb_gquotino = cpu_to_be64(from->sb_gquotino);
+	else if ((*fields & XFS_SB_PQUOTINO) &&
+				(from->sb_qflags & XFS_PQUOTA_ACCT))
+		to->sb_gquotino = cpu_to_be64(from->sb_pquotino);
+
+	*fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO);
+}
+
+/*
+ * Copy in core superblock to ondisk one.
+ *
+ * The fields argument is mask of superblock fields to copy.
+ */
+void
+xfs_sb_to_disk(
+	xfs_dsb_t	*to,
+	xfs_sb_t	*from,
+	__int64_t	fields)
+{
+	xfs_caddr_t	to_ptr = (xfs_caddr_t)to;
+	xfs_caddr_t	from_ptr = (xfs_caddr_t)from;
+	xfs_sb_field_t	f;
+	int		first;
+	int		size;
+
+	ASSERT(fields);
+	if (!fields)
+		return;
+
+	xfs_sb_quota_to_disk(to, from, &fields);
+	while (fields) {
+		f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
+		first = xfs_sb_info[f].offset;
+		size = xfs_sb_info[f + 1].offset - first;
+
+		ASSERT(xfs_sb_info[f].type == 0 || xfs_sb_info[f].type == 1);
+
+		if (size == 1 || xfs_sb_info[f].type == 1) {
+			memcpy(to_ptr + first, from_ptr + first, size);
+		} else {
+			switch (size) {
+			case 2:
+				*(__be16 *)(to_ptr + first) =
+				      cpu_to_be16(*(__u16 *)(from_ptr + first));
+				break;
+			case 4:
+				*(__be32 *)(to_ptr + first) =
+				      cpu_to_be32(*(__u32 *)(from_ptr + first));
+				break;
+			case 8:
+				*(__be64 *)(to_ptr + first) =
+				      cpu_to_be64(*(__u64 *)(from_ptr + first));
+				break;
+			default:
+				ASSERT(0);
+			}
+		}
+
+		fields &= ~(1LL << f);
+	}
+}
+
+static int
+xfs_sb_verify(
+	struct xfs_buf	*bp,
+	bool		check_version)
+{
+	struct xfs_mount *mp = bp->b_target->bt_mount;
+	struct xfs_sb	sb;
+
+	xfs_sb_from_disk(&sb, XFS_BUF_TO_SBP(bp));
+
+	/*
+	 * Only check the in progress field for the primary superblock as
+	 * mkfs.xfs doesn't clear it from secondary superblocks.
+	 */
+	return xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR,
+				     check_version);
+}
+
+/*
+ * If the superblock has the CRC feature bit set or the CRC field is non-null,
+ * check that the CRC is valid.  We check the CRC field is non-null because a
+ * single bit error could clear the feature bit and unused parts of the
+ * superblock are supposed to be zero. Hence a non-null crc field indicates that
+ * we've potentially lost a feature bit and we should check it anyway.
+ */
+static void
+xfs_sb_read_verify(
+	struct xfs_buf	*bp)
+{
+	struct xfs_mount *mp = bp->b_target->bt_mount;
+	struct xfs_dsb	*dsb = XFS_BUF_TO_SBP(bp);
+	int		error;
+
+	/*
+	 * open code the version check to avoid needing to convert the entire
+	 * superblock from disk order just to check the version number
+	 */
+	if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC) &&
+	    (((be16_to_cpu(dsb->sb_versionnum) & XFS_SB_VERSION_NUMBITS) ==
+						XFS_SB_VERSION_5) ||
+	     dsb->sb_crc != 0)) {
+
+		if (!xfs_verify_cksum(bp->b_addr, be16_to_cpu(dsb->sb_sectsize),
+				      offsetof(struct xfs_sb, sb_crc))) {
+			error = EFSCORRUPTED;
+			goto out_error;
+		}
+	}
+	error = xfs_sb_verify(bp, true);
+
+out_error:
+	if (error) {
+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
+				     mp, bp->b_addr);
+		xfs_buf_ioerror(bp, error);
+	}
+}
+
+/*
+ * We may be probed for a filesystem match, so we may not want to emit
+ * messages when the superblock buffer is not actually an XFS superblock.
+ * If we find an XFS superblock, then run a normal, noisy mount because we are
+ * really going to mount it and want to know about errors.
+ */
+static void
+xfs_sb_quiet_read_verify(
+	struct xfs_buf	*bp)
+{
+	struct xfs_dsb	*dsb = XFS_BUF_TO_SBP(bp);
+
+
+	if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) {
+		/* XFS filesystem, verify noisily! */
+		xfs_sb_read_verify(bp);
+		return;
+	}
+	/* quietly fail */
+	xfs_buf_ioerror(bp, EWRONGFS);
+}
+
+static void
+xfs_sb_write_verify(
+	struct xfs_buf		*bp)
+{
+	struct xfs_mount	*mp = bp->b_target->bt_mount;
+	struct xfs_buf_log_item	*bip = bp->b_fspriv;
+	int			error;
+
+	error = xfs_sb_verify(bp, false);
+	if (error) {
+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
+				     mp, bp->b_addr);
+		xfs_buf_ioerror(bp, error);
+		return;
+	}
+
+	if (!xfs_sb_version_hascrc(&mp->m_sb))
+		return;
+
+	if (bip)
+		XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+
+	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
+			 offsetof(struct xfs_sb, sb_crc));
+}
+
+const struct xfs_buf_ops xfs_sb_buf_ops = {
+	.verify_read = xfs_sb_read_verify,
+	.verify_write = xfs_sb_write_verify,
+};
+
+const struct xfs_buf_ops xfs_sb_quiet_buf_ops = {
+	.verify_read = xfs_sb_quiet_read_verify,
+	.verify_write = xfs_sb_write_verify,
+};
+
+/*
+ * xfs_mount_common
+ *
+ * Mount initialization code establishing various mount
+ * fields from the superblock associated with the given
+ * mount structure
+ */
+void
+xfs_sb_mount_common(
+	struct xfs_mount *mp,
+	struct xfs_sb	*sbp)
+{
+	mp->m_agfrotor = mp->m_agirotor = 0;
+	spin_lock_init(&mp->m_agirotor_lock);
+	mp->m_maxagi = mp->m_sb.sb_agcount;
+	mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG;
+	mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
+	mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
+	mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
+	mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
+	mp->m_blockmask = sbp->sb_blocksize - 1;
+	mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
+	mp->m_blockwmask = mp->m_blockwsize - 1;
+
+	mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1);
+	mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0);
+	mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2;
+	mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2;
+
+	mp->m_inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
+	mp->m_inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
+	mp->m_inobt_mnr[0] = mp->m_inobt_mxr[0] / 2;
+	mp->m_inobt_mnr[1] = mp->m_inobt_mxr[1] / 2;
+
+	mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1);
+	mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0);
+	mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2;
+	mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2;
+
+	mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
+	mp->m_ialloc_inos = (int)MAX((__uint16_t)XFS_INODES_PER_CHUNK,
+					sbp->sb_inopblock);
+	mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
+}
+
+/*
+ * xfs_initialize_perag_data
+ *
+ * Read in each per-ag structure so we can count up the number of
+ * allocated inodes, free inodes and used filesystem blocks as this
+ * information is no longer persistent in the superblock. Once we have
+ * this information, write it into the in-core superblock structure.
+ */
+int
+xfs_initialize_perag_data(
+	struct xfs_mount *mp,
+	xfs_agnumber_t	agcount)
+{
+	xfs_agnumber_t	index;
+	xfs_perag_t	*pag;
+	xfs_sb_t	*sbp = &mp->m_sb;
+	uint64_t	ifree = 0;
+	uint64_t	ialloc = 0;
+	uint64_t	bfree = 0;
+	uint64_t	bfreelst = 0;
+	uint64_t	btree = 0;
+	int		error;
+
+	for (index = 0; index < agcount; index++) {
+		/*
+		 * read the agf, then the agi. This gets us
+		 * all the information we need and populates the
+		 * per-ag structures for us.
+		 */
+		error = xfs_alloc_pagf_init(mp, NULL, index, 0);
+		if (error)
+			return error;
+
+		error = xfs_ialloc_pagi_init(mp, NULL, index);
+		if (error)
+			return error;
+		pag = xfs_perag_get(mp, index);
+		ifree += pag->pagi_freecount;
+		ialloc += pag->pagi_count;
+		bfree += pag->pagf_freeblks;
+		bfreelst += pag->pagf_flcount;
+		btree += pag->pagf_btreeblks;
+		xfs_perag_put(pag);
+	}
+	/*
+	 * Overwrite incore superblock counters with just-read data
+	 */
+	spin_lock(&mp->m_sb_lock);
+	sbp->sb_ifree = ifree;
+	sbp->sb_icount = ialloc;
+	sbp->sb_fdblocks = bfree + bfreelst + btree;
+	spin_unlock(&mp->m_sb_lock);
+
+	/* Fixup the per-cpu counters as well. */
+	xfs_icsb_reinit_counters(mp);
+
+	return 0;
+}
+
+/*
+ * xfs_mod_sb() can be used to copy arbitrary changes to the
+ * in-core superblock into the superblock buffer to be logged.
+ * It does not provide the higher level of locking that is
+ * needed to protect the in-core superblock from concurrent
+ * access.
+ */
+void
+xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
+{
+	xfs_buf_t	*bp;
+	int		first;
+	int		last;
+	xfs_mount_t	*mp;
+	xfs_sb_field_t	f;
+
+	ASSERT(fields);
+	if (!fields)
+		return;
+	mp = tp->t_mountp;
+	bp = xfs_trans_getsb(tp, mp, 0);
+	first = sizeof(xfs_sb_t);
+	last = 0;
+
+	/* translate/copy */
+
+	xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields);
+
+	/* find modified range */
+	f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
+	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
+	last = xfs_sb_info[f + 1].offset - 1;
+
+	f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
+	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
+	first = xfs_sb_info[f].offset;
+
+	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
+	xfs_trans_log_buf(tp, bp, first, last);
+}
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 78f9e70..6835b44 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -26,6 +26,7 @@
 
 struct xfs_buf;
 struct xfs_mount;
+struct xfs_trans;
 
 #define	XFS_SB_MAGIC		0x58465342	/* 'XFSB' */
 #define	XFS_SB_VERSION_1	1		/* 5.3, 6.0.1, 6.1 */
@@ -83,11 +84,13 @@
 #define XFS_SB_VERSION2_PARENTBIT	0x00000010	/* parent pointers */
 #define XFS_SB_VERSION2_PROJID32BIT	0x00000080	/* 32 bit project id */
 #define XFS_SB_VERSION2_CRCBIT		0x00000100	/* metadata CRCs */
+#define XFS_SB_VERSION2_FTYPE		0x00000200	/* inode type in dir */
 
 #define	XFS_SB_VERSION2_OKREALFBITS	\
 	(XFS_SB_VERSION2_LAZYSBCOUNTBIT	| \
 	 XFS_SB_VERSION2_ATTR2BIT	| \
-	 XFS_SB_VERSION2_PROJID32BIT)
+	 XFS_SB_VERSION2_PROJID32BIT	| \
+	 XFS_SB_VERSION2_FTYPE)
 #define	XFS_SB_VERSION2_OKSASHFBITS	\
 	(0)
 #define XFS_SB_VERSION2_OKREALBITS	\
@@ -354,15 +357,8 @@
 		     (sbp->sb_features2 & ~XFS_SB_VERSION2_OKREALBITS)))
 			return 0;
 
-#ifdef __KERNEL__
 		if (sbp->sb_shared_vn > XFS_SB_MAX_SHARED_VN)
 			return 0;
-#else
-		if ((sbp->sb_versionnum & XFS_SB_VERSION_SHAREDBIT) &&
-		    sbp->sb_shared_vn > XFS_SB_MAX_SHARED_VN)
-			return 0;
-#endif
-
 		return 1;
 	}
 	if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5)
@@ -554,12 +550,13 @@
 		(sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT));
 }
 
-static inline int xfs_sb_version_hascrc(xfs_sb_t *sbp)
+static inline void xfs_sb_version_addprojid32bit(xfs_sb_t *sbp)
 {
-	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
+	sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT;
+	sbp->sb_features2 |= XFS_SB_VERSION2_PROJID32BIT;
+	sbp->sb_bad_features2 |= XFS_SB_VERSION2_PROJID32BIT;
 }
 
-
 /*
  * Extended v5 superblock feature masks. These are to be used for new v5
  * superblock features only.
@@ -598,7 +595,10 @@
 	return (sbp->sb_features_ro_compat & feature) != 0;
 }
 
-#define XFS_SB_FEAT_INCOMPAT_ALL 0
+#define XFS_SB_FEAT_INCOMPAT_FTYPE	(1 << 0)	/* filetype in dirent */
+#define XFS_SB_FEAT_INCOMPAT_ALL \
+		(XFS_SB_FEAT_INCOMPAT_FTYPE)
+
 #define XFS_SB_FEAT_INCOMPAT_UNKNOWN	~XFS_SB_FEAT_INCOMPAT_ALL
 static inline bool
 xfs_sb_has_incompat_feature(
@@ -618,16 +618,39 @@
 	return (sbp->sb_features_log_incompat & feature) != 0;
 }
 
-static inline bool
-xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino)
+/*
+ * V5 superblock specific feature checks
+ */
+static inline int xfs_sb_version_hascrc(xfs_sb_t *sbp)
 {
-	return (ino == sbp->sb_uquotino || ino == sbp->sb_gquotino);
+	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
+}
+
+static inline int xfs_sb_version_has_pquotino(xfs_sb_t *sbp)
+{
+	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
+}
+
+static inline int xfs_sb_version_hasftype(struct xfs_sb *sbp)
+{
+	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
+		xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_FTYPE)) ||
+	       (xfs_sb_version_hasmorebits(sbp) &&
+		 (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE));
 }
 
 /*
  * end of superblock version macros
  */
 
+static inline bool
+xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino)
+{
+	return (ino == sbp->sb_uquotino ||
+		ino == sbp->sb_gquotino ||
+		ino == sbp->sb_pquotino);
+}
+
 #define XFS_SB_DADDR		((xfs_daddr_t)0) /* daddr in filesystem/ag */
 #define	XFS_SB_BLOCK(mp)	XFS_HDR_BLOCK(mp, XFS_SB_DADDR)
 #define XFS_BUF_TO_SBP(bp)	((xfs_dsb_t *)((bp)->b_addr))
@@ -660,4 +683,23 @@
 #define XFS_B_TO_FSBT(mp,b)	(((__uint64_t)(b)) >> (mp)->m_sb.sb_blocklog)
 #define XFS_B_FSB_OFFSET(mp,b)	((b) & (mp)->m_blockmask)
 
+/*
+ * perag get/put wrappers for ref counting
+ */
+extern struct xfs_perag *xfs_perag_get(struct xfs_mount *, xfs_agnumber_t);
+extern struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *, xfs_agnumber_t,
+					   int tag);
+extern void	xfs_perag_put(struct xfs_perag *pag);
+extern int	xfs_initialize_perag_data(struct xfs_mount *, xfs_agnumber_t);
+
+extern void	xfs_sb_calc_crc(struct xfs_buf	*);
+extern void	xfs_mod_sb(struct xfs_trans *, __int64_t);
+extern void	xfs_sb_mount_common(struct xfs_mount *, struct xfs_sb *);
+extern void	xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *);
+extern void	xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);
+extern void	xfs_sb_quota_from_disk(struct xfs_sb *sbp);
+
+extern const struct xfs_buf_ops xfs_sb_buf_ops;
+extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
+
 #endif	/* __XFS_SB_H__ */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 1d68ffc..979a77d 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -17,12 +17,12 @@
  */
 
 #include "xfs.h"
+#include "xfs_format.h"
 #include "xfs_log.h"
 #include "xfs_inum.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_alloc.h"
 #include "xfs_quota.h"
 #include "xfs_mount.h"
@@ -40,12 +40,12 @@
 #include "xfs_fsops.h"
 #include "xfs_attr.h"
 #include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_vnodeops.h"
 #include "xfs_log_priv.h"
 #include "xfs_trans_priv.h"
 #include "xfs_filestream.h"
 #include "xfs_da_btree.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2.h"
 #include "xfs_extfree_item.h"
 #include "xfs_mru_cache.h"
 #include "xfs_inode_item.h"
@@ -421,12 +421,6 @@
 	}
 #endif
 
-	if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
-	    (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
-		xfs_warn(mp, "cannot mount with both project and group quota");
-		return EINVAL;
-	}
-
 	if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
 		xfs_warn(mp, "sunit and swidth must be specified together");
 		return EINVAL;
@@ -556,14 +550,13 @@
 	else if (mp->m_qflags & XFS_UQUOTA_ACCT)
 		seq_puts(m, "," MNTOPT_UQUOTANOENF);
 
-	/* Either project or group quotas can be active, not both */
-
 	if (mp->m_qflags & XFS_PQUOTA_ACCT) {
 		if (mp->m_qflags & XFS_PQUOTA_ENFD)
 			seq_puts(m, "," MNTOPT_PRJQUOTA);
 		else
 			seq_puts(m, "," MNTOPT_PQUOTANOENF);
-	} else if (mp->m_qflags & XFS_GQUOTA_ACCT) {
+	}
+	if (mp->m_qflags & XFS_GQUOTA_ACCT) {
 		if (mp->m_qflags & XFS_GQUOTA_ENFD)
 			seq_puts(m, "," MNTOPT_GRPQUOTA);
 		else
@@ -870,17 +863,17 @@
 		goto out_destroy_unwritten;
 
 	mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
-			WQ_NON_REENTRANT, 0, mp->m_fsname);
+			0, 0, mp->m_fsname);
 	if (!mp->m_reclaim_workqueue)
 		goto out_destroy_cil;
 
 	mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
-			WQ_NON_REENTRANT, 0, mp->m_fsname);
+			0, 0, mp->m_fsname);
 	if (!mp->m_log_workqueue)
 		goto out_destroy_reclaim;
 
 	mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
-			WQ_NON_REENTRANT, 0, mp->m_fsname);
+			0, 0, mp->m_fsname);
 	if (!mp->m_eofblocks_workqueue)
 		goto out_destroy_log;
 
@@ -1396,6 +1389,14 @@
 		return XFS_ERROR(EROFS);
 	}
 
+	if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
+	    (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE)) &&
+	    !xfs_sb_version_has_pquotino(&mp->m_sb)) {
+		xfs_warn(mp,
+		  "Super block does not support project and group quota together");
+		return XFS_ERROR(EINVAL);
+	}
+
 	return 0;
 }
 
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index f4895b6..2f2a7c0 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -18,200 +18,29 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
-#include "xfs_types.h"
+#include "xfs_format.h"
 #include "xfs_bit.h"
 #include "xfs_log.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
 #include "xfs_ag.h"
-#include "xfs_dir2.h"
 #include "xfs_mount.h"
 #include "xfs_da_btree.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_ialloc_btree.h"
 #include "xfs_dinode.h"
 #include "xfs_inode.h"
-#include "xfs_inode_item.h"
-#include "xfs_itable.h"
 #include "xfs_ialloc.h"
 #include "xfs_alloc.h"
 #include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
 #include "xfs_error.h"
 #include "xfs_quota.h"
-#include "xfs_utils.h"
 #include "xfs_trans_space.h"
-#include "xfs_log_priv.h"
 #include "xfs_trace.h"
 #include "xfs_symlink.h"
-#include "xfs_cksum.h"
-#include "xfs_buf_item.h"
-
-
-/*
- * Each contiguous block has a header, so it is not just a simple pathlen
- * to FSB conversion.
- */
-int
-xfs_symlink_blocks(
-	struct xfs_mount *mp,
-	int		pathlen)
-{
-	int buflen = XFS_SYMLINK_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
-
-	return (pathlen + buflen - 1) / buflen;
-}
-
-static int
-xfs_symlink_hdr_set(
-	struct xfs_mount	*mp,
-	xfs_ino_t		ino,
-	uint32_t		offset,
-	uint32_t		size,
-	struct xfs_buf		*bp)
-{
-	struct xfs_dsymlink_hdr	*dsl = bp->b_addr;
-
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
-		return 0;
-
-	dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC);
-	dsl->sl_offset = cpu_to_be32(offset);
-	dsl->sl_bytes = cpu_to_be32(size);
-	uuid_copy(&dsl->sl_uuid, &mp->m_sb.sb_uuid);
-	dsl->sl_owner = cpu_to_be64(ino);
-	dsl->sl_blkno = cpu_to_be64(bp->b_bn);
-	bp->b_ops = &xfs_symlink_buf_ops;
-
-	return sizeof(struct xfs_dsymlink_hdr);
-}
-
-/*
- * Checking of the symlink header is split into two parts. the verifier does
- * CRC, location and bounds checking, the unpacking function checks the path
- * parameters and owner.
- */
-bool
-xfs_symlink_hdr_ok(
-	struct xfs_mount	*mp,
-	xfs_ino_t		ino,
-	uint32_t		offset,
-	uint32_t		size,
-	struct xfs_buf		*bp)
-{
-	struct xfs_dsymlink_hdr *dsl = bp->b_addr;
-
-	if (offset != be32_to_cpu(dsl->sl_offset))
-		return false;
-	if (size != be32_to_cpu(dsl->sl_bytes))
-		return false;
-	if (ino != be64_to_cpu(dsl->sl_owner))
-		return false;
-
-	/* ok */
-	return true;
-}
-
-static bool
-xfs_symlink_verify(
-	struct xfs_buf		*bp)
-{
-	struct xfs_mount	*mp = bp->b_target->bt_mount;
-	struct xfs_dsymlink_hdr	*dsl = bp->b_addr;
-
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
-		return false;
-	if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC))
-		return false;
-	if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_uuid))
-		return false;
-	if (bp->b_bn != be64_to_cpu(dsl->sl_blkno))
-		return false;
-	if (be32_to_cpu(dsl->sl_offset) +
-				be32_to_cpu(dsl->sl_bytes) >= MAXPATHLEN)
-		return false;
-	if (dsl->sl_owner == 0)
-		return false;
-
-	return true;
-}
-
-static void
-xfs_symlink_read_verify(
-	struct xfs_buf	*bp)
-{
-	struct xfs_mount *mp = bp->b_target->bt_mount;
-
-	/* no verification of non-crc buffers */
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
-		return;
-
-	if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-				  offsetof(struct xfs_dsymlink_hdr, sl_crc)) ||
-	    !xfs_symlink_verify(bp)) {
-		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
-		xfs_buf_ioerror(bp, EFSCORRUPTED);
-	}
-}
-
-static void
-xfs_symlink_write_verify(
-	struct xfs_buf	*bp)
-{
-	struct xfs_mount *mp = bp->b_target->bt_mount;
-	struct xfs_buf_log_item	*bip = bp->b_fspriv;
-
-	/* no verification of non-crc buffers */
-	if (!xfs_sb_version_hascrc(&mp->m_sb))
-		return;
-
-	if (!xfs_symlink_verify(bp)) {
-		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
-		xfs_buf_ioerror(bp, EFSCORRUPTED);
-		return;
-	}
-
-	if (bip) {
-		struct xfs_dsymlink_hdr *dsl = bp->b_addr;
-		dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
-	}
-	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
-			 offsetof(struct xfs_dsymlink_hdr, sl_crc));
-}
-
-const struct xfs_buf_ops xfs_symlink_buf_ops = {
-	.verify_read = xfs_symlink_read_verify,
-	.verify_write = xfs_symlink_write_verify,
-};
-
-void
-xfs_symlink_local_to_remote(
-	struct xfs_trans	*tp,
-	struct xfs_buf		*bp,
-	struct xfs_inode	*ip,
-	struct xfs_ifork	*ifp)
-{
-	struct xfs_mount	*mp = ip->i_mount;
-	char			*buf;
-
-	if (!xfs_sb_version_hascrc(&mp->m_sb)) {
-		bp->b_ops = NULL;
-		memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
-		return;
-	}
-
-	/*
-	 * As this symlink fits in an inode literal area, it must also fit in
-	 * the smallest buffer the filesystem supports.
-	 */
-	ASSERT(BBTOB(bp->b_length) >=
-			ifp->if_bytes + sizeof(struct xfs_dsymlink_hdr));
-
-	bp->b_ops = &xfs_symlink_buf_ops;
-
-	buf = bp->b_addr;
-	buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp);
-	memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes);
-}
 
 /* ----- Kernel only functions below ----- */
 STATIC int
@@ -386,8 +215,11 @@
 	/*
 	 * Make sure that we have allocated dquot(s) on disk.
 	 */
-	error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
-		XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp);
+	error = xfs_qm_vop_dqalloc(dp,
+			xfs_kuid_to_uid(current_fsuid()),
+			xfs_kgid_to_gid(current_fsgid()), prid,
+			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
+			&udqp, &gdqp, &pdqp);
 	if (error)
 		goto std_return;
 
@@ -402,12 +234,10 @@
 	else
 		fs_blocks = xfs_symlink_blocks(mp, pathlen);
 	resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
-	error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0,
-			XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, resblks, 0);
 	if (error == ENOSPC && fs_blocks == 0) {
 		resblks = 0;
-		error = xfs_trans_reserve(tp, 0, XFS_SYMLINK_LOG_RES(mp), 0,
-				XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
+		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, 0, 0);
 	}
 	if (error) {
 		cancel_flags = 0;
@@ -710,8 +540,8 @@
 	 * Put an itruncate log reservation in the new transaction
 	 * for our caller.
 	 */
-	if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
-			XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) {
+	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
+	if (error) {
 		ASSERT(XFS_FORCED_SHUTDOWN(mp));
 		goto error0;
 	}
diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h
index 3743948..99338ba 100644
--- a/fs/xfs/xfs_symlink.h
+++ b/fs/xfs/xfs_symlink.h
@@ -17,50 +17,11 @@
 #ifndef __XFS_SYMLINK_H
 #define __XFS_SYMLINK_H 1
 
-struct xfs_mount;
-struct xfs_trans;
-struct xfs_inode;
-struct xfs_buf;
-struct xfs_ifork;
-struct xfs_name;
-
-#define XFS_SYMLINK_MAGIC	0x58534c4d	/* XSLM */
-
-struct xfs_dsymlink_hdr {
-	__be32	sl_magic;
-	__be32	sl_offset;
-	__be32	sl_bytes;
-	__be32	sl_crc;
-	uuid_t	sl_uuid;
-	__be64	sl_owner;
-	__be64	sl_blkno;
-	__be64	sl_lsn;
-};
-
-/*
- * The maximum pathlen is 1024 bytes. Since the minimum file system
- * blocksize is 512 bytes, we can get a max of 3 extents back from
- * bmapi when crc headers are taken into account.
- */
-#define XFS_SYMLINK_MAPS 3
-
-#define XFS_SYMLINK_BUF_SPACE(mp, bufsize)	\
-	((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \
-			sizeof(struct xfs_dsymlink_hdr) : 0))
-
-int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen);
-
-void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
-				 struct xfs_inode *ip, struct xfs_ifork *ifp);
-
-extern const struct xfs_buf_ops xfs_symlink_buf_ops;
-
-#ifdef __KERNEL__
+/* Kernel only symlink defintions */
 
 int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
 		const char *target_path, umode_t mode, struct xfs_inode **ipp);
 int xfs_readlink(struct xfs_inode *ip, char *link);
 int xfs_inactive_symlink(struct xfs_inode *ip, struct xfs_trans **tpp);
 
-#endif /* __KERNEL__ */
 #endif /* __XFS_SYMLINK_H */
diff --git a/fs/xfs/xfs_symlink_remote.c b/fs/xfs/xfs_symlink_remote.c
new file mode 100644
index 0000000..01c85e3
--- /dev/null
+++ b/fs/xfs/xfs_symlink_remote.c
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2000-2006 Silicon Graphics, Inc.
+ * Copyright (c) 2012-2013 Red Hat, Inc.
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_format.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_ag.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_symlink.h"
+#include "xfs_cksum.h"
+#include "xfs_buf_item.h"
+
+
+/*
+ * Each contiguous block has a header, so it is not just a simple pathlen
+ * to FSB conversion.
+ */
+int
+xfs_symlink_blocks(
+	struct xfs_mount *mp,
+	int		pathlen)
+{
+	int buflen = XFS_SYMLINK_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
+
+	return (pathlen + buflen - 1) / buflen;
+}
+
+int
+xfs_symlink_hdr_set(
+	struct xfs_mount	*mp,
+	xfs_ino_t		ino,
+	uint32_t		offset,
+	uint32_t		size,
+	struct xfs_buf		*bp)
+{
+	struct xfs_dsymlink_hdr	*dsl = bp->b_addr;
+
+	if (!xfs_sb_version_hascrc(&mp->m_sb))
+		return 0;
+
+	dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC);
+	dsl->sl_offset = cpu_to_be32(offset);
+	dsl->sl_bytes = cpu_to_be32(size);
+	uuid_copy(&dsl->sl_uuid, &mp->m_sb.sb_uuid);
+	dsl->sl_owner = cpu_to_be64(ino);
+	dsl->sl_blkno = cpu_to_be64(bp->b_bn);
+	bp->b_ops = &xfs_symlink_buf_ops;
+
+	return sizeof(struct xfs_dsymlink_hdr);
+}
+
+/*
+ * Checking of the symlink header is split into two parts. the verifier does
+ * CRC, location and bounds checking, the unpacking function checks the path
+ * parameters and owner.
+ */
+bool
+xfs_symlink_hdr_ok(
+	struct xfs_mount	*mp,
+	xfs_ino_t		ino,
+	uint32_t		offset,
+	uint32_t		size,
+	struct xfs_buf		*bp)
+{
+	struct xfs_dsymlink_hdr *dsl = bp->b_addr;
+
+	if (offset != be32_to_cpu(dsl->sl_offset))
+		return false;
+	if (size != be32_to_cpu(dsl->sl_bytes))
+		return false;
+	if (ino != be64_to_cpu(dsl->sl_owner))
+		return false;
+
+	/* ok */
+	return true;
+}
+
+static bool
+xfs_symlink_verify(
+	struct xfs_buf		*bp)
+{
+	struct xfs_mount	*mp = bp->b_target->bt_mount;
+	struct xfs_dsymlink_hdr	*dsl = bp->b_addr;
+
+	if (!xfs_sb_version_hascrc(&mp->m_sb))
+		return false;
+	if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC))
+		return false;
+	if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_uuid))
+		return false;
+	if (bp->b_bn != be64_to_cpu(dsl->sl_blkno))
+		return false;
+	if (be32_to_cpu(dsl->sl_offset) +
+				be32_to_cpu(dsl->sl_bytes) >= MAXPATHLEN)
+		return false;
+	if (dsl->sl_owner == 0)
+		return false;
+
+	return true;
+}
+
+static void
+xfs_symlink_read_verify(
+	struct xfs_buf	*bp)
+{
+	struct xfs_mount *mp = bp->b_target->bt_mount;
+
+	/* no verification of non-crc buffers */
+	if (!xfs_sb_version_hascrc(&mp->m_sb))
+		return;
+
+	if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+				  offsetof(struct xfs_dsymlink_hdr, sl_crc)) ||
+	    !xfs_symlink_verify(bp)) {
+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+		xfs_buf_ioerror(bp, EFSCORRUPTED);
+	}
+}
+
+static void
+xfs_symlink_write_verify(
+	struct xfs_buf	*bp)
+{
+	struct xfs_mount *mp = bp->b_target->bt_mount;
+	struct xfs_buf_log_item	*bip = bp->b_fspriv;
+
+	/* no verification of non-crc buffers */
+	if (!xfs_sb_version_hascrc(&mp->m_sb))
+		return;
+
+	if (!xfs_symlink_verify(bp)) {
+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+		xfs_buf_ioerror(bp, EFSCORRUPTED);
+		return;
+	}
+
+	if (bip) {
+		struct xfs_dsymlink_hdr *dsl = bp->b_addr;
+		dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+	}
+	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
+			 offsetof(struct xfs_dsymlink_hdr, sl_crc));
+}
+
+const struct xfs_buf_ops xfs_symlink_buf_ops = {
+	.verify_read = xfs_symlink_read_verify,
+	.verify_write = xfs_symlink_write_verify,
+};
+
+void
+xfs_symlink_local_to_remote(
+	struct xfs_trans	*tp,
+	struct xfs_buf		*bp,
+	struct xfs_inode	*ip,
+	struct xfs_ifork	*ifp)
+{
+	struct xfs_mount	*mp = ip->i_mount;
+	char			*buf;
+
+	if (!xfs_sb_version_hascrc(&mp->m_sb)) {
+		bp->b_ops = NULL;
+		memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
+		return;
+	}
+
+	/*
+	 * As this symlink fits in an inode literal area, it must also fit in
+	 * the smallest buffer the filesystem supports.
+	 */
+	ASSERT(BBTOB(bp->b_length) >=
+			ifp->if_bytes + sizeof(struct xfs_dsymlink_hdr));
+
+	bp->b_ops = &xfs_symlink_buf_ops;
+
+	buf = bp->b_addr;
+	buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp);
+	memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes);
+}
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index b6e3897..5d7b3e4 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -18,6 +18,7 @@
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_types.h"
+#include "xfs_format.h"
 #include "xfs_log.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 35a2299..5411e01 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -18,7 +18,7 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
-#include "xfs_types.h"
+#include "xfs_format.h"
 #include "xfs_log.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
@@ -49,629 +49,6 @@
 kmem_zone_t	*xfs_log_item_desc_zone;
 
 /*
- * A buffer has a format structure overhead in the log in addition
- * to the data, so we need to take this into account when reserving
- * space in a transaction for a buffer.  Round the space required up
- * to a multiple of 128 bytes so that we don't change the historical
- * reservation that has been used for this overhead.
- */
-STATIC uint
-xfs_buf_log_overhead(void)
-{
-	return round_up(sizeof(struct xlog_op_header) +
-			sizeof(struct xfs_buf_log_format), 128);
-}
-
-/*
- * Calculate out transaction log reservation per item in bytes.
- *
- * The nbufs argument is used to indicate the number of items that
- * will be changed in a transaction.  size is used to tell how many
- * bytes should be reserved per item.
- */
-STATIC uint
-xfs_calc_buf_res(
-	uint		nbufs,
-	uint		size)
-{
-	return nbufs * (size + xfs_buf_log_overhead());
-}
-
-/*
- * Various log reservation values.
- *
- * These are based on the size of the file system block because that is what
- * most transactions manipulate.  Each adds in an additional 128 bytes per
- * item logged to try to account for the overhead of the transaction mechanism.
- *
- * Note:  Most of the reservations underestimate the number of allocation
- * groups into which they could free extents in the xfs_bmap_finish() call.
- * This is because the number in the worst case is quite high and quite
- * unusual.  In order to fix this we need to change xfs_bmap_finish() to free
- * extents in only a single AG at a time.  This will require changes to the
- * EFI code as well, however, so that the EFI for the extents not freed is
- * logged again in each transaction.  See SGI PV #261917.
- *
- * Reservation functions here avoid a huge stack in xfs_trans_init due to
- * register overflow from temporaries in the calculations.
- */
-
-
-/*
- * In a write transaction we can allocate a maximum of 2
- * extents.  This gives:
- *    the inode getting the new extents: inode size
- *    the inode's bmap btree: max depth * block size
- *    the agfs of the ags from which the extents are allocated: 2 * sector
- *    the superblock free block counter: sector size
- *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- * And the bmap_finish transaction can free bmap blocks in a join:
- *    the agfs of the ags containing the blocks: 2 * sector size
- *    the agfls of the ags containing the blocks: 2 * sector size
- *    the super block free block counter: sector size
- *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- */
-STATIC uint
-xfs_calc_write_reservation(
-	struct xfs_mount	*mp)
-{
-	return XFS_DQUOT_LOGRES(mp) +
-		MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
-		     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
-				      XFS_FSB_TO_B(mp, 1)) +
-		     xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
-		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
-				      XFS_FSB_TO_B(mp, 1))),
-		    (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
-		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
-				      XFS_FSB_TO_B(mp, 1))));
-}
-
-/*
- * In truncating a file we free up to two extents at once.  We can modify:
- *    the inode being truncated: inode size
- *    the inode's bmap btree: (max depth + 1) * block size
- * And the bmap_finish transaction can free the blocks and bmap blocks:
- *    the agf for each of the ags: 4 * sector size
- *    the agfl for each of the ags: 4 * sector size
- *    the super block to reflect the freed blocks: sector size
- *    worst case split in allocation btrees per extent assuming 4 extents:
- *		4 exts * 2 trees * (2 * max depth - 1) * block size
- *    the inode btree: max depth * blocksize
- *    the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-STATIC uint
-xfs_calc_itruncate_reservation(
-	struct xfs_mount	*mp)
-{
-	return XFS_DQUOT_LOGRES(mp) +
-		MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
-		     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
-				      XFS_FSB_TO_B(mp, 1))),
-		    (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
-		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
-				      XFS_FSB_TO_B(mp, 1)) +
-		    xfs_calc_buf_res(5, 0) +
-		    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-				     XFS_FSB_TO_B(mp, 1)) +
-		    xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
-				     mp->m_in_maxlevels, 0)));
-}
-
-/*
- * In renaming a files we can modify:
- *    the four inodes involved: 4 * inode size
- *    the two directory btrees: 2 * (max depth + v2) * dir block size
- *    the two directory bmap btrees: 2 * max depth * block size
- * And the bmap_finish transaction can free dir and bmap blocks (two sets
- *	of bmap blocks) giving:
- *    the agf for the ags in which the blocks live: 3 * sector size
- *    the agfl for the ags in which the blocks live: 3 * sector size
- *    the superblock for the free block count: sector size
- *    the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
- */
-STATIC uint
-xfs_calc_rename_reservation(
-	struct xfs_mount	*mp)
-{
-	return XFS_DQUOT_LOGRES(mp) +
-		MAX((xfs_calc_buf_res(4, mp->m_sb.sb_inodesize) +
-		     xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
-				      XFS_FSB_TO_B(mp, 1))),
-		    (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
-		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 3),
-				      XFS_FSB_TO_B(mp, 1))));
-}
-
-/*
- * For creating a link to an inode:
- *    the parent directory inode: inode size
- *    the linked inode: inode size
- *    the directory btree could split: (max depth + v2) * dir block size
- *    the directory bmap btree could join or split: (max depth + v2) * blocksize
- * And the bmap_finish transaction can free some bmap blocks giving:
- *    the agf for the ag in which the blocks live: sector size
- *    the agfl for the ag in which the blocks live: sector size
- *    the superblock for the free block count: sector size
- *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
- */
-STATIC uint
-xfs_calc_link_reservation(
-	struct xfs_mount	*mp)
-{
-	return XFS_DQUOT_LOGRES(mp) +
-		MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
-		     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
-				      XFS_FSB_TO_B(mp, 1))),
-		    (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
-		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-				      XFS_FSB_TO_B(mp, 1))));
-}
-
-/*
- * For removing a directory entry we can modify:
- *    the parent directory inode: inode size
- *    the removed inode: inode size
- *    the directory btree could join: (max depth + v2) * dir block size
- *    the directory bmap btree could join or split: (max depth + v2) * blocksize
- * And the bmap_finish transaction can free the dir and bmap blocks giving:
- *    the agf for the ag in which the blocks live: 2 * sector size
- *    the agfl for the ag in which the blocks live: 2 * sector size
- *    the superblock for the free block count: sector size
- *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- */
-STATIC uint
-xfs_calc_remove_reservation(
-	struct xfs_mount	*mp)
-{
-	return XFS_DQUOT_LOGRES(mp) +
-		MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
-		     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
-				      XFS_FSB_TO_B(mp, 1))),
-		    (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
-		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
-				      XFS_FSB_TO_B(mp, 1))));
-}
-
-/*
- * For create, break it in to the two cases that the transaction
- * covers. We start with the modify case - allocation done by modification
- * of the state of existing inodes - and the allocation case.
- */
-
-/*
- * For create we can modify:
- *    the parent directory inode: inode size
- *    the new inode: inode size
- *    the inode btree entry: block size
- *    the superblock for the nlink flag: sector size
- *    the directory btree: (max depth + v2) * dir block size
- *    the directory inode's bmap btree: (max depth + v2) * block size
- */
-STATIC uint
-xfs_calc_create_resv_modify(
-	struct xfs_mount	*mp)
-{
-	return xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
-		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
-		(uint)XFS_FSB_TO_B(mp, 1) +
-		xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
-}
-
-/*
- * For create we can allocate some inodes giving:
- *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
- *    the superblock for the nlink flag: sector size
- *    the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
- *    the inode btree: max depth * blocksize
- *    the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-STATIC uint
-xfs_calc_create_resv_alloc(
-	struct xfs_mount	*mp)
-{
-	return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
-		mp->m_sb.sb_sectsize +
-		xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), XFS_FSB_TO_B(mp, 1)) +
-		xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
-		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-				 XFS_FSB_TO_B(mp, 1));
-}
-
-STATIC uint
-__xfs_calc_create_reservation(
-	struct xfs_mount	*mp)
-{
-	return XFS_DQUOT_LOGRES(mp) +
-		MAX(xfs_calc_create_resv_alloc(mp),
-		    xfs_calc_create_resv_modify(mp));
-}
-
-/*
- * For icreate we can allocate some inodes giving:
- *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
- *    the superblock for the nlink flag: sector size
- *    the inode btree: max depth * blocksize
- *    the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-STATIC uint
-xfs_calc_icreate_resv_alloc(
-	struct xfs_mount	*mp)
-{
-	return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
-		mp->m_sb.sb_sectsize +
-		xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
-		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-				 XFS_FSB_TO_B(mp, 1));
-}
-
-STATIC uint
-xfs_calc_icreate_reservation(xfs_mount_t *mp)
-{
-	return XFS_DQUOT_LOGRES(mp) +
-		MAX(xfs_calc_icreate_resv_alloc(mp),
-		    xfs_calc_create_resv_modify(mp));
-}
-
-STATIC uint
-xfs_calc_create_reservation(
-	struct xfs_mount	*mp)
-{
-	if (xfs_sb_version_hascrc(&mp->m_sb))
-		return xfs_calc_icreate_reservation(mp);
-	return __xfs_calc_create_reservation(mp);
-
-}
-
-/*
- * Making a new directory is the same as creating a new file.
- */
-STATIC uint
-xfs_calc_mkdir_reservation(
-	struct xfs_mount	*mp)
-{
-	return xfs_calc_create_reservation(mp);
-}
-
-
-/*
- * Making a new symplink is the same as creating a new file, but
- * with the added blocks for remote symlink data which can be up to 1kB in
- * length (MAXPATHLEN).
- */
-STATIC uint
-xfs_calc_symlink_reservation(
-	struct xfs_mount	*mp)
-{
-	return xfs_calc_create_reservation(mp) +
-	       xfs_calc_buf_res(1, MAXPATHLEN);
-}
-
-/*
- * In freeing an inode we can modify:
- *    the inode being freed: inode size
- *    the super block free inode counter: sector size
- *    the agi hash list and counters: sector size
- *    the inode btree entry: block size
- *    the on disk inode before ours in the agi hash list: inode cluster size
- *    the inode btree: max depth * blocksize
- *    the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-STATIC uint
-xfs_calc_ifree_reservation(
-	struct xfs_mount	*mp)
-{
-	return XFS_DQUOT_LOGRES(mp) +
-		xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
-		xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
-		xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
-		MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
-		    XFS_INODE_CLUSTER_SIZE(mp)) +
-		xfs_calc_buf_res(1, 0) +
-		xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
-				 mp->m_in_maxlevels, 0) +
-		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-				 XFS_FSB_TO_B(mp, 1));
-}
-
-/*
- * When only changing the inode we log the inode and possibly the superblock
- * We also add a bit of slop for the transaction stuff.
- */
-STATIC uint
-xfs_calc_ichange_reservation(
-	struct xfs_mount	*mp)
-{
-	return XFS_DQUOT_LOGRES(mp) +
-		mp->m_sb.sb_inodesize +
-		mp->m_sb.sb_sectsize +
-		512;
-
-}
-
-/*
- * Growing the data section of the filesystem.
- *	superblock
- *	agi and agf
- *	allocation btrees
- */
-STATIC uint
-xfs_calc_growdata_reservation(
-	struct xfs_mount	*mp)
-{
-	return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
-		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-				 XFS_FSB_TO_B(mp, 1));
-}
-
-/*
- * Growing the rt section of the filesystem.
- * In the first set of transactions (ALLOC) we allocate space to the
- * bitmap or summary files.
- *	superblock: sector size
- *	agf of the ag from which the extent is allocated: sector size
- *	bmap btree for bitmap/summary inode: max depth * blocksize
- *	bitmap/summary inode: inode size
- *	allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
- */
-STATIC uint
-xfs_calc_growrtalloc_reservation(
-	struct xfs_mount	*mp)
-{
-	return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
-		xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
-				 XFS_FSB_TO_B(mp, 1)) +
-		xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
-		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-				 XFS_FSB_TO_B(mp, 1));
-}
-
-/*
- * Growing the rt section of the filesystem.
- * In the second set of transactions (ZERO) we zero the new metadata blocks.
- *	one bitmap/summary block: blocksize
- */
-STATIC uint
-xfs_calc_growrtzero_reservation(
-	struct xfs_mount	*mp)
-{
-	return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize);
-}
-
-/*
- * Growing the rt section of the filesystem.
- * In the third set of transactions (FREE) we update metadata without
- * allocating any new blocks.
- *	superblock: sector size
- *	bitmap inode: inode size
- *	summary inode: inode size
- *	one bitmap block: blocksize
- *	summary blocks: new summary size
- */
-STATIC uint
-xfs_calc_growrtfree_reservation(
-	struct xfs_mount	*mp)
-{
-	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
-		xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) +
-		xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) +
-		xfs_calc_buf_res(1, mp->m_rsumsize);
-}
-
-/*
- * Logging the inode modification timestamp on a synchronous write.
- *	inode
- */
-STATIC uint
-xfs_calc_swrite_reservation(
-	struct xfs_mount	*mp)
-{
-	return xfs_calc_buf_res(1, mp->m_sb.sb_inodesize);
-}
-
-/*
- * Logging the inode mode bits when writing a setuid/setgid file
- *	inode
- */
-STATIC uint
-xfs_calc_writeid_reservation(xfs_mount_t *mp)
-{
-	return xfs_calc_buf_res(1, mp->m_sb.sb_inodesize);
-}
-
-/*
- * Converting the inode from non-attributed to attributed.
- *	the inode being converted: inode size
- *	agf block and superblock (for block allocation)
- *	the new block (directory sized)
- *	bmap blocks for the new directory block
- *	allocation btrees
- */
-STATIC uint
-xfs_calc_addafork_reservation(
-	struct xfs_mount	*mp)
-{
-	return XFS_DQUOT_LOGRES(mp) +
-		xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
-		xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
-		xfs_calc_buf_res(1, mp->m_dirblksize) +
-		xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
-				 XFS_FSB_TO_B(mp, 1)) +
-		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
-				 XFS_FSB_TO_B(mp, 1));
-}
-
-/*
- * Removing the attribute fork of a file
- *    the inode being truncated: inode size
- *    the inode's bmap btree: max depth * block size
- * And the bmap_finish transaction can free the blocks and bmap blocks:
- *    the agf for each of the ags: 4 * sector size
- *    the agfl for each of the ags: 4 * sector size
- *    the super block to reflect the freed blocks: sector size
- *    worst case split in allocation btrees per extent assuming 4 extents:
- *		4 exts * 2 trees * (2 * max depth - 1) * block size
- */
-STATIC uint
-xfs_calc_attrinval_reservation(
-	struct xfs_mount	*mp)
-{
-	return MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
-		    xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
-				     XFS_FSB_TO_B(mp, 1))),
-		   (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
-		    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
-				     XFS_FSB_TO_B(mp, 1))));
-}
-
-/*
- * Setting an attribute at mount time.
- *	the inode getting the attribute
- *	the superblock for allocations
- *	the agfs extents are allocated from
- *	the attribute btree * max depth
- *	the inode allocation btree
- * Since attribute transaction space is dependent on the size of the attribute,
- * the calculation is done partially at mount time and partially at runtime(see
- * below).
- */
-STATIC uint
-xfs_calc_attrsetm_reservation(
-	struct xfs_mount	*mp)
-{
-	return XFS_DQUOT_LOGRES(mp) +
-		xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
-		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
-		xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1));
-}
-
-/*
- * Setting an attribute at runtime, transaction space unit per block.
- * 	the superblock for allocations: sector size
- *	the inode bmap btree could join or split: max depth * block size
- * Since the runtime attribute transaction space is dependent on the total
- * blocks needed for the 1st bmap, here we calculate out the space unit for
- * one block so that the caller could figure out the total space according
- * to the attibute extent length in blocks by: ext * XFS_ATTRSETRT_LOG_RES(mp).
- */
-STATIC uint
-xfs_calc_attrsetrt_reservation(
-	struct xfs_mount	*mp)
-{
-	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
-		xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
-				 XFS_FSB_TO_B(mp, 1));
-}
-
-/*
- * Removing an attribute.
- *    the inode: inode size
- *    the attribute btree could join: max depth * block size
- *    the inode bmap btree could join or split: max depth * block size
- * And the bmap_finish transaction can free the attr blocks freed giving:
- *    the agf for the ag in which the blocks live: 2 * sector size
- *    the agfl for the ag in which the blocks live: 2 * sector size
- *    the superblock for the free block count: sector size
- *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- */
-STATIC uint
-xfs_calc_attrrm_reservation(
-	struct xfs_mount	*mp)
-{
-	return XFS_DQUOT_LOGRES(mp) +
-		MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) +
-		     xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH,
-				      XFS_FSB_TO_B(mp, 1)) +
-		     (uint)XFS_FSB_TO_B(mp,
-					XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
-		     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
-		    (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
-		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
-				      XFS_FSB_TO_B(mp, 1))));
-}
-
-/*
- * Clearing a bad agino number in an agi hash bucket.
- */
-STATIC uint
-xfs_calc_clear_agi_bucket_reservation(
-	struct xfs_mount	*mp)
-{
-	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
-}
-
-/*
- * Clearing the quotaflags in the superblock.
- *	the super block for changing quota flags: sector size
- */
-STATIC uint
-xfs_calc_qm_sbchange_reservation(
-	struct xfs_mount	*mp)
-{
-	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
-}
-
-/*
- * Adjusting quota limits.
- *    the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot)
- */
-STATIC uint
-xfs_calc_qm_setqlim_reservation(
-	struct xfs_mount	*mp)
-{
-	return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot));
-}
-
-/*
- * Allocating quota on disk if needed.
- *	the write transaction log space: XFS_WRITE_LOG_RES(mp)
- *	the unit of quota allocation: one system block size
- */
-STATIC uint
-xfs_calc_qm_dqalloc_reservation(
-	struct xfs_mount	*mp)
-{
-	return XFS_WRITE_LOG_RES(mp) +
-		xfs_calc_buf_res(1,
-			XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
-}
-
-/*
- * Turning off quotas.
- *    the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
- *    the superblock for the quota flags: sector size
- */
-STATIC uint
-xfs_calc_qm_quotaoff_reservation(
-	struct xfs_mount	*mp)
-{
-	return sizeof(struct xfs_qoff_logitem) * 2 +
-		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
-}
-
-/*
- * End of turning off quotas.
- *    the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
- */
-STATIC uint
-xfs_calc_qm_quotaoff_end_reservation(
-	struct xfs_mount	*mp)
-{
-	return sizeof(struct xfs_qoff_logitem) * 2;
-}
-
-/*
- * Syncing the incore super block changes to disk.
- *     the super block to reflect the changes: sector size
- */
-STATIC uint
-xfs_calc_sb_reservation(
-	struct xfs_mount	*mp)
-{
-	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
-}
-
-/*
  * Initialize the precomputed transaction reservation values
  * in the mount structure.
  */
@@ -679,36 +56,7 @@
 xfs_trans_init(
 	struct xfs_mount	*mp)
 {
-	struct xfs_trans_reservations *resp = &mp->m_reservations;
-
-	resp->tr_write = xfs_calc_write_reservation(mp);
-	resp->tr_itruncate = xfs_calc_itruncate_reservation(mp);
-	resp->tr_rename = xfs_calc_rename_reservation(mp);
-	resp->tr_link = xfs_calc_link_reservation(mp);
-	resp->tr_remove = xfs_calc_remove_reservation(mp);
-	resp->tr_symlink = xfs_calc_symlink_reservation(mp);
-	resp->tr_create = xfs_calc_create_reservation(mp);
-	resp->tr_mkdir = xfs_calc_mkdir_reservation(mp);
-	resp->tr_ifree = xfs_calc_ifree_reservation(mp);
-	resp->tr_ichange = xfs_calc_ichange_reservation(mp);
-	resp->tr_growdata = xfs_calc_growdata_reservation(mp);
-	resp->tr_swrite = xfs_calc_swrite_reservation(mp);
-	resp->tr_writeid = xfs_calc_writeid_reservation(mp);
-	resp->tr_addafork = xfs_calc_addafork_reservation(mp);
-	resp->tr_attrinval = xfs_calc_attrinval_reservation(mp);
-	resp->tr_attrsetm = xfs_calc_attrsetm_reservation(mp);
-	resp->tr_attrsetrt = xfs_calc_attrsetrt_reservation(mp);
-	resp->tr_attrrm = xfs_calc_attrrm_reservation(mp);
-	resp->tr_clearagi = xfs_calc_clear_agi_bucket_reservation(mp);
-	resp->tr_growrtalloc = xfs_calc_growrtalloc_reservation(mp);
-	resp->tr_growrtzero = xfs_calc_growrtzero_reservation(mp);
-	resp->tr_growrtfree = xfs_calc_growrtfree_reservation(mp);
-	resp->tr_qm_sbchange = xfs_calc_qm_sbchange_reservation(mp);
-	resp->tr_qm_setqlim = xfs_calc_qm_setqlim_reservation(mp);
-	resp->tr_qm_dqalloc = xfs_calc_qm_dqalloc_reservation(mp);
-	resp->tr_qm_quotaoff = xfs_calc_qm_quotaoff_reservation(mp);
-	resp->tr_qm_equotaoff = xfs_calc_qm_quotaoff_end_reservation(mp);
-	resp->tr_sb = xfs_calc_sb_reservation(mp);
+	xfs_trans_resv_calc(mp, M_RES(mp));
 }
 
 /*
@@ -744,7 +92,7 @@
 	atomic_inc(&mp->m_active_trans);
 
 	tp = kmem_zone_zalloc(xfs_trans_zone, memflags);
-	tp->t_magic = XFS_TRANS_MAGIC;
+	tp->t_magic = XFS_TRANS_HEADER_MAGIC;
 	tp->t_type = type;
 	tp->t_mountp = mp;
 	INIT_LIST_HEAD(&tp->t_items);
@@ -789,7 +137,7 @@
 	/*
 	 * Initialize the new transaction structure.
 	 */
-	ntp->t_magic = XFS_TRANS_MAGIC;
+	ntp->t_magic = XFS_TRANS_HEADER_MAGIC;
 	ntp->t_type = tp->t_type;
 	ntp->t_mountp = tp->t_mountp;
 	INIT_LIST_HEAD(&ntp->t_items);
@@ -832,12 +180,10 @@
  */
 int
 xfs_trans_reserve(
-	xfs_trans_t	*tp,
-	uint		blocks,
-	uint		logspace,
-	uint		rtextents,
-	uint		flags,
-	uint		logcount)
+	struct xfs_trans	*tp,
+	struct xfs_trans_res	*resp,
+	uint			blocks,
+	uint			rtextents)
 {
 	int		error = 0;
 	int		rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
@@ -863,13 +209,15 @@
 	/*
 	 * Reserve the log space needed for this transaction.
 	 */
-	if (logspace > 0) {
+	if (resp->tr_logres > 0) {
 		bool	permanent = false;
 
-		ASSERT(tp->t_log_res == 0 || tp->t_log_res == logspace);
-		ASSERT(tp->t_log_count == 0 || tp->t_log_count == logcount);
+		ASSERT(tp->t_log_res == 0 ||
+		       tp->t_log_res == resp->tr_logres);
+		ASSERT(tp->t_log_count == 0 ||
+		       tp->t_log_count == resp->tr_logcount);
 
-		if (flags & XFS_TRANS_PERM_LOG_RES) {
+		if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES) {
 			tp->t_flags |= XFS_TRANS_PERM_LOG_RES;
 			permanent = true;
 		} else {
@@ -878,20 +226,21 @@
 		}
 
 		if (tp->t_ticket != NULL) {
-			ASSERT(flags & XFS_TRANS_PERM_LOG_RES);
+			ASSERT(resp->tr_logflags & XFS_TRANS_PERM_LOG_RES);
 			error = xfs_log_regrant(tp->t_mountp, tp->t_ticket);
 		} else {
-			error = xfs_log_reserve(tp->t_mountp, logspace,
-						logcount, &tp->t_ticket,
-						XFS_TRANSACTION, permanent,
-						tp->t_type);
+			error = xfs_log_reserve(tp->t_mountp,
+						resp->tr_logres,
+						resp->tr_logcount,
+						&tp->t_ticket, XFS_TRANSACTION,
+						permanent, tp->t_type);
 		}
 
 		if (error)
 			goto undo_blocks;
 
-		tp->t_log_res = logspace;
-		tp->t_log_count = logcount;
+		tp->t_log_res = resp->tr_logres;
+		tp->t_log_count = resp->tr_logcount;
 	}
 
 	/*
@@ -916,10 +265,10 @@
 	 * reservations which have already been performed.
 	 */
 undo_log:
-	if (logspace > 0) {
+	if (resp->tr_logres > 0) {
 		int		log_flags;
 
-		if (flags & XFS_TRANS_PERM_LOG_RES) {
+		if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES) {
 			log_flags = XFS_LOG_REL_PERM_RESERV;
 		} else {
 			log_flags = 0;
@@ -1367,10 +716,10 @@
 		lip->li_desc = NULL;
 
 		if (commit_lsn != NULLCOMMITLSN)
-			IOP_COMMITTING(lip, commit_lsn);
+			lip->li_ops->iop_committing(lip, commit_lsn);
 		if (flags & XFS_TRANS_ABORT)
 			lip->li_flags |= XFS_LI_ABORTED;
-		IOP_UNLOCK(lip);
+		lip->li_ops->iop_unlock(lip);
 
 		xfs_trans_free_item_desc(lidp);
 	}
@@ -1390,8 +739,11 @@
 	/* xfs_trans_ail_update_bulk drops ailp->xa_lock */
 	xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn);
 
-	for (i = 0; i < nr_items; i++)
-		IOP_UNPIN(log_items[i], 0);
+	for (i = 0; i < nr_items; i++) {
+		struct xfs_log_item *lip = log_items[i];
+
+		lip->li_ops->iop_unpin(lip, 0);
+	}
 }
 
 /*
@@ -1401,11 +753,11 @@
  *
  * If we are called with the aborted flag set, it is because a log write during
  * a CIL checkpoint commit has failed. In this case, all the items in the
- * checkpoint have already gone through IOP_COMMITED and IOP_UNLOCK, which
+ * checkpoint have already gone through iop_commited and iop_unlock, which
  * means that checkpoint commit abort handling is treated exactly the same
  * as an iclog write error even though we haven't started any IO yet. Hence in
- * this case all we need to do is IOP_COMMITTED processing, followed by an
- * IOP_UNPIN(aborted) call.
+ * this case all we need to do is iop_committed processing, followed by an
+ * iop_unpin(aborted) call.
  *
  * The AIL cursor is used to optimise the insert process. If commit_lsn is not
  * at the end of the AIL, the insert cursor avoids the need to walk
@@ -1438,7 +790,7 @@
 
 		if (aborted)
 			lip->li_flags |= XFS_LI_ABORTED;
-		item_lsn = IOP_COMMITTED(lip, commit_lsn);
+		item_lsn = lip->li_ops->iop_committed(lip, commit_lsn);
 
 		/* item_lsn of -1 means the item needs no further processing */
 		if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
@@ -1450,7 +802,7 @@
 		 */
 		if (aborted) {
 			ASSERT(XFS_FORCED_SHUTDOWN(ailp->xa_mount));
-			IOP_UNPIN(lip, 1);
+			lip->li_ops->iop_unpin(lip, 1);
 			continue;
 		}
 
@@ -1468,7 +820,7 @@
 				xfs_trans_ail_update(ailp, lip, item_lsn);
 			else
 				spin_unlock(&ailp->xa_lock);
-			IOP_UNPIN(lip, 0);
+			lip->li_ops->iop_unpin(lip, 0);
 			continue;
 		}
 
@@ -1666,7 +1018,7 @@
 	struct xfs_inode	*dp)
 {
 	struct xfs_trans	*trans;
-	unsigned int		logres, count;
+	struct xfs_trans_res	tres;
 	int			error;
 
 	/*
@@ -1678,8 +1030,8 @@
 	/*
 	 * Copy the critical parameters from one trans to the next.
 	 */
-	logres = trans->t_log_res;
-	count = trans->t_log_count;
+	tres.tr_logres = trans->t_log_res;
+	tres.tr_logcount = trans->t_log_count;
 	*tpp = xfs_trans_dup(trans);
 
 	/*
@@ -1710,8 +1062,8 @@
 	 * across this call, or that anything that is locked be logged in
 	 * the prior and the next transactions.
 	 */
-	error = xfs_trans_reserve(trans, 0, logres, 0,
-				  XFS_TRANS_PERM_LOG_RES, count);
+	tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
+	error = xfs_trans_reserve(trans, &tres, 0, 0);
 	/*
 	 *  Ensure that the inode is in the new transaction and locked.
 	 */
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 2b49463..09cf40b 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -20,285 +20,9 @@
 
 struct xfs_log_item;
 
-/*
- * This is the structure written in the log at the head of
- * every transaction. It identifies the type and id of the
- * transaction, and contains the number of items logged by
- * the transaction so we know how many to expect during recovery.
- *
- * Do not change the below structure without redoing the code in
- * xlog_recover_add_to_trans() and xlog_recover_add_to_cont_trans().
- */
-typedef struct xfs_trans_header {
-	uint		th_magic;		/* magic number */
-	uint		th_type;		/* transaction type */
-	__int32_t	th_tid;			/* transaction id (unused) */
-	uint		th_num_items;		/* num items logged by trans */
-} xfs_trans_header_t;
+#include "xfs_trans_resv.h"
 
-#define	XFS_TRANS_HEADER_MAGIC	0x5452414e	/* TRAN */
-
-/*
- * Log item types.
- */
-#define	XFS_LI_EFI		0x1236
-#define	XFS_LI_EFD		0x1237
-#define	XFS_LI_IUNLINK		0x1238
-#define	XFS_LI_INODE		0x123b	/* aligned ino chunks, var-size ibufs */
-#define	XFS_LI_BUF		0x123c	/* v2 bufs, variable sized inode bufs */
-#define	XFS_LI_DQUOT		0x123d
-#define	XFS_LI_QUOTAOFF		0x123e
-#define	XFS_LI_ICREATE		0x123f
-
-#define XFS_LI_TYPE_DESC \
-	{ XFS_LI_EFI,		"XFS_LI_EFI" }, \
-	{ XFS_LI_EFD,		"XFS_LI_EFD" }, \
-	{ XFS_LI_IUNLINK,	"XFS_LI_IUNLINK" }, \
-	{ XFS_LI_INODE,		"XFS_LI_INODE" }, \
-	{ XFS_LI_BUF,		"XFS_LI_BUF" }, \
-	{ XFS_LI_DQUOT,		"XFS_LI_DQUOT" }, \
-	{ XFS_LI_QUOTAOFF,	"XFS_LI_QUOTAOFF" }
-
-/*
- * Transaction types.  Used to distinguish types of buffers.
- */
-#define XFS_TRANS_SETATTR_NOT_SIZE	1
-#define XFS_TRANS_SETATTR_SIZE		2
-#define XFS_TRANS_INACTIVE		3
-#define XFS_TRANS_CREATE		4
-#define XFS_TRANS_CREATE_TRUNC		5
-#define XFS_TRANS_TRUNCATE_FILE		6
-#define XFS_TRANS_REMOVE		7
-#define XFS_TRANS_LINK			8
-#define XFS_TRANS_RENAME		9
-#define XFS_TRANS_MKDIR			10
-#define XFS_TRANS_RMDIR			11
-#define XFS_TRANS_SYMLINK		12
-#define XFS_TRANS_SET_DMATTRS		13
-#define XFS_TRANS_GROWFS		14
-#define XFS_TRANS_STRAT_WRITE		15
-#define XFS_TRANS_DIOSTRAT		16
-/* 17 was XFS_TRANS_WRITE_SYNC */
-#define	XFS_TRANS_WRITEID		18
-#define	XFS_TRANS_ADDAFORK		19
-#define	XFS_TRANS_ATTRINVAL		20
-#define	XFS_TRANS_ATRUNCATE		21
-#define	XFS_TRANS_ATTR_SET		22
-#define	XFS_TRANS_ATTR_RM		23
-#define	XFS_TRANS_ATTR_FLAG		24
-#define	XFS_TRANS_CLEAR_AGI_BUCKET	25
-#define XFS_TRANS_QM_SBCHANGE		26
-/*
- * Dummy entries since we use the transaction type to index into the
- * trans_type[] in xlog_recover_print_trans_head()
- */
-#define XFS_TRANS_DUMMY1		27
-#define XFS_TRANS_DUMMY2		28
-#define XFS_TRANS_QM_QUOTAOFF		29
-#define XFS_TRANS_QM_DQALLOC		30
-#define XFS_TRANS_QM_SETQLIM		31
-#define XFS_TRANS_QM_DQCLUSTER		32
-#define XFS_TRANS_QM_QINOCREATE		33
-#define XFS_TRANS_QM_QUOTAOFF_END	34
-#define XFS_TRANS_SB_UNIT		35
-#define XFS_TRANS_FSYNC_TS		36
-#define	XFS_TRANS_GROWFSRT_ALLOC	37
-#define	XFS_TRANS_GROWFSRT_ZERO		38
-#define	XFS_TRANS_GROWFSRT_FREE		39
-#define	XFS_TRANS_SWAPEXT		40
-#define	XFS_TRANS_SB_COUNT		41
-#define	XFS_TRANS_CHECKPOINT		42
-#define	XFS_TRANS_ICREATE		43
-#define	XFS_TRANS_TYPE_MAX		43
-/* new transaction types need to be reflected in xfs_logprint(8) */
-
-#define XFS_TRANS_TYPES \
-	{ XFS_TRANS_SETATTR_NOT_SIZE,	"SETATTR_NOT_SIZE" }, \
-	{ XFS_TRANS_SETATTR_SIZE,	"SETATTR_SIZE" }, \
-	{ XFS_TRANS_INACTIVE,		"INACTIVE" }, \
-	{ XFS_TRANS_CREATE,		"CREATE" }, \
-	{ XFS_TRANS_CREATE_TRUNC,	"CREATE_TRUNC" }, \
-	{ XFS_TRANS_TRUNCATE_FILE,	"TRUNCATE_FILE" }, \
-	{ XFS_TRANS_REMOVE,		"REMOVE" }, \
-	{ XFS_TRANS_LINK,		"LINK" }, \
-	{ XFS_TRANS_RENAME,		"RENAME" }, \
-	{ XFS_TRANS_MKDIR,		"MKDIR" }, \
-	{ XFS_TRANS_RMDIR,		"RMDIR" }, \
-	{ XFS_TRANS_SYMLINK,		"SYMLINK" }, \
-	{ XFS_TRANS_SET_DMATTRS,	"SET_DMATTRS" }, \
-	{ XFS_TRANS_GROWFS,		"GROWFS" }, \
-	{ XFS_TRANS_STRAT_WRITE,	"STRAT_WRITE" }, \
-	{ XFS_TRANS_DIOSTRAT,		"DIOSTRAT" }, \
-	{ XFS_TRANS_WRITEID,		"WRITEID" }, \
-	{ XFS_TRANS_ADDAFORK,		"ADDAFORK" }, \
-	{ XFS_TRANS_ATTRINVAL,		"ATTRINVAL" }, \
-	{ XFS_TRANS_ATRUNCATE,		"ATRUNCATE" }, \
-	{ XFS_TRANS_ATTR_SET,		"ATTR_SET" }, \
-	{ XFS_TRANS_ATTR_RM,		"ATTR_RM" }, \
-	{ XFS_TRANS_ATTR_FLAG,		"ATTR_FLAG" }, \
-	{ XFS_TRANS_CLEAR_AGI_BUCKET,	"CLEAR_AGI_BUCKET" }, \
-	{ XFS_TRANS_QM_SBCHANGE,	"QM_SBCHANGE" }, \
-	{ XFS_TRANS_QM_QUOTAOFF,	"QM_QUOTAOFF" }, \
-	{ XFS_TRANS_QM_DQALLOC,		"QM_DQALLOC" }, \
-	{ XFS_TRANS_QM_SETQLIM,		"QM_SETQLIM" }, \
-	{ XFS_TRANS_QM_DQCLUSTER,	"QM_DQCLUSTER" }, \
-	{ XFS_TRANS_QM_QINOCREATE,	"QM_QINOCREATE" }, \
-	{ XFS_TRANS_QM_QUOTAOFF_END,	"QM_QOFF_END" }, \
-	{ XFS_TRANS_SB_UNIT,		"SB_UNIT" }, \
-	{ XFS_TRANS_FSYNC_TS,		"FSYNC_TS" }, \
-	{ XFS_TRANS_GROWFSRT_ALLOC,	"GROWFSRT_ALLOC" }, \
-	{ XFS_TRANS_GROWFSRT_ZERO,	"GROWFSRT_ZERO" }, \
-	{ XFS_TRANS_GROWFSRT_FREE,	"GROWFSRT_FREE" }, \
-	{ XFS_TRANS_SWAPEXT,		"SWAPEXT" }, \
-	{ XFS_TRANS_SB_COUNT,		"SB_COUNT" }, \
-	{ XFS_TRANS_CHECKPOINT,		"CHECKPOINT" }, \
-	{ XFS_TRANS_DUMMY1,		"DUMMY1" }, \
-	{ XFS_TRANS_DUMMY2,		"DUMMY2" }, \
-	{ XLOG_UNMOUNT_REC_TYPE,	"UNMOUNT" }
-
-/*
- * This structure is used to track log items associated with
- * a transaction.  It points to the log item and keeps some
- * flags to track the state of the log item.  It also tracks
- * the amount of space needed to log the item it describes
- * once we get to commit processing (see xfs_trans_commit()).
- */
-struct xfs_log_item_desc {
-	struct xfs_log_item	*lid_item;
-	struct list_head	lid_trans;
-	unsigned char		lid_flags;
-};
-
-#define XFS_LID_DIRTY		0x1
-
-#define	XFS_TRANS_MAGIC		0x5452414E	/* 'TRAN' */
-/*
- * Values for t_flags.
- */
-#define	XFS_TRANS_DIRTY		0x01	/* something needs to be logged */
-#define	XFS_TRANS_SB_DIRTY	0x02	/* superblock is modified */
-#define	XFS_TRANS_PERM_LOG_RES	0x04	/* xact took a permanent log res */
-#define	XFS_TRANS_SYNC		0x08	/* make commit synchronous */
-#define XFS_TRANS_DQ_DIRTY	0x10	/* at least one dquot in trx dirty */
-#define XFS_TRANS_RESERVE	0x20    /* OK to use reserved data blocks */
-#define XFS_TRANS_FREEZE_PROT	0x40	/* Transaction has elevated writer
-					   count in superblock */
-
-/*
- * Values for call flags parameter.
- */
-#define	XFS_TRANS_RELEASE_LOG_RES	0x4
-#define	XFS_TRANS_ABORT			0x8
-
-/*
- * Field values for xfs_trans_mod_sb.
- */
-#define	XFS_TRANS_SB_ICOUNT		0x00000001
-#define	XFS_TRANS_SB_IFREE		0x00000002
-#define	XFS_TRANS_SB_FDBLOCKS		0x00000004
-#define	XFS_TRANS_SB_RES_FDBLOCKS	0x00000008
-#define	XFS_TRANS_SB_FREXTENTS		0x00000010
-#define	XFS_TRANS_SB_RES_FREXTENTS	0x00000020
-#define	XFS_TRANS_SB_DBLOCKS		0x00000040
-#define	XFS_TRANS_SB_AGCOUNT		0x00000080
-#define	XFS_TRANS_SB_IMAXPCT		0x00000100
-#define	XFS_TRANS_SB_REXTSIZE		0x00000200
-#define	XFS_TRANS_SB_RBMBLOCKS		0x00000400
-#define	XFS_TRANS_SB_RBLOCKS		0x00000800
-#define	XFS_TRANS_SB_REXTENTS		0x00001000
-#define	XFS_TRANS_SB_REXTSLOG		0x00002000
-
-
-/*
- * Per-extent log reservation for the allocation btree changes
- * involved in freeing or allocating an extent.
- * 2 trees * (2 blocks/level * max depth - 1)
- */
-#define	XFS_ALLOCFREE_LOG_COUNT(mp,nx) \
-	((nx) * (2 * (2 * XFS_AG_MAXLEVELS(mp) - 1)))
-
-/*
- * Per-directory log reservation for any directory change.
- * dir blocks: (1 btree block per level + data block + free block)
- * bmap btree: (levels + 2) * max depth
- * v2 directory blocks can be fragmented below the dirblksize down to the fsb
- * size, so account for that in the DAENTER macros.
- */
-#define	XFS_DIROP_LOG_COUNT(mp)	\
-	(XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \
-	 XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1)
-
-
-#define	XFS_WRITE_LOG_RES(mp)	((mp)->m_reservations.tr_write)
-#define	XFS_ITRUNCATE_LOG_RES(mp)   ((mp)->m_reservations.tr_itruncate)
-#define	XFS_RENAME_LOG_RES(mp)	((mp)->m_reservations.tr_rename)
-#define	XFS_LINK_LOG_RES(mp)	((mp)->m_reservations.tr_link)
-#define	XFS_REMOVE_LOG_RES(mp)	((mp)->m_reservations.tr_remove)
-#define	XFS_SYMLINK_LOG_RES(mp)	((mp)->m_reservations.tr_symlink)
-#define	XFS_CREATE_LOG_RES(mp)	((mp)->m_reservations.tr_create)
-#define	XFS_MKDIR_LOG_RES(mp)	((mp)->m_reservations.tr_mkdir)
-#define	XFS_IFREE_LOG_RES(mp)	((mp)->m_reservations.tr_ifree)
-#define	XFS_ICHANGE_LOG_RES(mp)	((mp)->m_reservations.tr_ichange)
-#define	XFS_GROWDATA_LOG_RES(mp)    ((mp)->m_reservations.tr_growdata)
-#define	XFS_GROWRTALLOC_LOG_RES(mp)	((mp)->m_reservations.tr_growrtalloc)
-#define	XFS_GROWRTZERO_LOG_RES(mp)	((mp)->m_reservations.tr_growrtzero)
-#define	XFS_GROWRTFREE_LOG_RES(mp)	((mp)->m_reservations.tr_growrtfree)
-#define	XFS_SWRITE_LOG_RES(mp)	((mp)->m_reservations.tr_swrite)
-/*
- * Logging the inode timestamps on an fsync -- same as SWRITE
- * as long as SWRITE logs the entire inode core
- */
-#define XFS_FSYNC_TS_LOG_RES(mp)        ((mp)->m_reservations.tr_swrite)
-#define	XFS_WRITEID_LOG_RES(mp)		((mp)->m_reservations.tr_swrite)
-#define	XFS_ADDAFORK_LOG_RES(mp)	((mp)->m_reservations.tr_addafork)
-#define	XFS_ATTRINVAL_LOG_RES(mp)	((mp)->m_reservations.tr_attrinval)
-#define	XFS_ATTRSETM_LOG_RES(mp)	((mp)->m_reservations.tr_attrsetm)
-#define XFS_ATTRSETRT_LOG_RES(mp)	((mp)->m_reservations.tr_attrsetrt)
-#define	XFS_ATTRRM_LOG_RES(mp)		((mp)->m_reservations.tr_attrrm)
-#define	XFS_CLEAR_AGI_BUCKET_LOG_RES(mp)  ((mp)->m_reservations.tr_clearagi)
-#define XFS_QM_SBCHANGE_LOG_RES(mp)	((mp)->m_reservations.tr_qm_sbchange)
-#define XFS_QM_SETQLIM_LOG_RES(mp)	((mp)->m_reservations.tr_qm_setqlim)
-#define XFS_QM_DQALLOC_LOG_RES(mp)	((mp)->m_reservations.tr_qm_dqalloc)
-#define XFS_QM_QUOTAOFF_LOG_RES(mp)	((mp)->m_reservations.tr_qm_quotaoff)
-#define XFS_QM_QUOTAOFF_END_LOG_RES(mp)	((mp)->m_reservations.tr_qm_equotaoff)
-#define XFS_SB_LOG_RES(mp)		((mp)->m_reservations.tr_sb)
-
-/*
- * Various log count values.
- */
-#define	XFS_DEFAULT_LOG_COUNT		1
-#define	XFS_DEFAULT_PERM_LOG_COUNT	2
-#define	XFS_ITRUNCATE_LOG_COUNT		2
-#define XFS_INACTIVE_LOG_COUNT		2
-#define	XFS_CREATE_LOG_COUNT		2
-#define	XFS_MKDIR_LOG_COUNT		3
-#define	XFS_SYMLINK_LOG_COUNT		3
-#define	XFS_REMOVE_LOG_COUNT		2
-#define	XFS_LINK_LOG_COUNT		2
-#define	XFS_RENAME_LOG_COUNT		2
-#define	XFS_WRITE_LOG_COUNT		2
-#define	XFS_ADDAFORK_LOG_COUNT		2
-#define	XFS_ATTRINVAL_LOG_COUNT		1
-#define	XFS_ATTRSET_LOG_COUNT		3
-#define	XFS_ATTRRM_LOG_COUNT		3
-
-/*
- * Here we centralize the specification of XFS meta-data buffer
- * reference count values.  This determine how hard the buffer
- * cache tries to hold onto the buffer.
- */
-#define	XFS_AGF_REF		4
-#define	XFS_AGI_REF		4
-#define	XFS_AGFL_REF		3
-#define	XFS_INO_BTREE_REF	3
-#define	XFS_ALLOC_BTREE_REF	2
-#define	XFS_BMAP_BTREE_REF	2
-#define	XFS_DIR_BTREE_REF	2
-#define	XFS_INO_REF		2
-#define	XFS_ATTR_BTREE_REF	1
-#define	XFS_DQUOT_REF		1
-
-#ifdef __KERNEL__
+/* kernel only transaction subsystem defines */
 
 struct xfs_buf;
 struct xfs_buftarg;
@@ -310,6 +34,7 @@
 struct xfs_log_item_desc;
 struct xfs_mount;
 struct xfs_trans;
+struct xfs_trans_res;
 struct xfs_dquot_acct;
 struct xfs_busy_extent;
 
@@ -342,7 +67,7 @@
 	{ XFS_LI_ABORTED,	"ABORTED" }
 
 struct xfs_item_ops {
-	uint (*iop_size)(xfs_log_item_t *);
+	void (*iop_size)(xfs_log_item_t *, int *, int *);
 	void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
 	void (*iop_pin)(xfs_log_item_t *);
 	void (*iop_unpin)(xfs_log_item_t *, int remove);
@@ -352,17 +77,8 @@
 	void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
 };
 
-#define IOP_SIZE(ip)		(*(ip)->li_ops->iop_size)(ip)
-#define IOP_FORMAT(ip,vp)	(*(ip)->li_ops->iop_format)(ip, vp)
-#define IOP_PIN(ip)		(*(ip)->li_ops->iop_pin)(ip)
-#define IOP_UNPIN(ip, remove)	(*(ip)->li_ops->iop_unpin)(ip, remove)
-#define IOP_PUSH(ip, list)	(*(ip)->li_ops->iop_push)(ip, list)
-#define IOP_UNLOCK(ip)		(*(ip)->li_ops->iop_unlock)(ip)
-#define IOP_COMMITTED(ip, lsn)	(*(ip)->li_ops->iop_committed)(ip, lsn)
-#define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn)
-
 /*
- * Return values for the IOP_PUSH() routines.
+ * Return values for the iop_push() routines.
  */
 #define XFS_ITEM_SUCCESS	0
 #define XFS_ITEM_PINNED		1
@@ -446,7 +162,7 @@
 xfs_trans_t	*xfs_trans_alloc(struct xfs_mount *, uint);
 xfs_trans_t	*_xfs_trans_alloc(struct xfs_mount *, uint, xfs_km_flags_t);
 xfs_trans_t	*xfs_trans_dup(xfs_trans_t *);
-int		xfs_trans_reserve(xfs_trans_t *, uint, uint, uint,
+int		xfs_trans_reserve(struct xfs_trans *, struct xfs_trans_res *,
 				  uint, uint);
 void		xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
 
@@ -528,9 +244,4 @@
 extern kmem_zone_t	*xfs_trans_zone;
 extern kmem_zone_t	*xfs_log_item_desc_zone;
 
-#endif	/* __KERNEL__ */
-
-void		xfs_trans_init(struct xfs_mount *);
-int		xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
-
 #endif	/* __XFS_TRANS_H__ */
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 0eda725..21c6d7d 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -61,20 +61,6 @@
 #endif /* DEBUG */
 
 /*
- * Return a pointer to the first item in the AIL.  If the AIL is empty, then
- * return NULL.
- */
-xfs_log_item_t *
-xfs_ail_min(
-	struct xfs_ail  *ailp)
-{
-	if (list_empty(&ailp->xa_ail))
-		return NULL;
-
-	return list_first_entry(&ailp->xa_ail, xfs_log_item_t, li_ail);
-}
-
- /*
  * Return a pointer to the last item in the AIL.  If the AIL is empty, then
  * return NULL.
  */
@@ -393,11 +379,11 @@
 		int	lock_result;
 
 		/*
-		 * Note that IOP_PUSH may unlock and reacquire the AIL lock.  We
+		 * Note that iop_push may unlock and reacquire the AIL lock.  We
 		 * rely on the AIL cursor implementation to be able to deal with
 		 * the dropped lock.
 		 */
-		lock_result = IOP_PUSH(lip, &ailp->xa_buf_list);
+		lock_result = lip->li_ops->iop_push(lip, &ailp->xa_buf_list);
 		switch (lock_result) {
 		case XFS_ITEM_SUCCESS:
 			XFS_STATS_INC(xs_push_ail_success);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index aa5a04b..8c75b8f 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -505,7 +505,7 @@
 
 /*
  * Mark the buffer as not needing to be unlocked when the buf item's
- * IOP_UNLOCK() routine is called.  The buffer must already be locked
+ * iop_unlock() routine is called.  The buffer must already be locked
  * and associated with the given transaction.
  */
 /* ARGSUSED */
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 61407a8..54ee3c5 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -17,6 +17,7 @@
  */
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_format.h"
 #include "xfs_log.h"
 #include "xfs_trans.h"
 #include "xfs_sb.h"
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 53b7c9b..c52def0 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -25,6 +25,9 @@
 struct xfs_ail;
 struct xfs_log_vec;
 
+
+void	xfs_trans_init(struct xfs_mount *);
+int	xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
 void	xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
 void	xfs_trans_del_item(struct xfs_log_item *);
 void	xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
@@ -83,6 +86,18 @@
 				struct xfs_ail_cursor *cur,
 				struct xfs_log_item **log_items, int nr_items,
 				xfs_lsn_t lsn) __releases(ailp->xa_lock);
+/*
+ * Return a pointer to the first item in the AIL.  If the AIL is empty, then
+ * return NULL.
+ */
+static inline struct xfs_log_item *
+xfs_ail_min(
+	struct xfs_ail  *ailp)
+{
+	return list_first_entry_or_null(&ailp->xa_ail, struct xfs_log_item,
+					li_ail);
+}
+
 static inline void
 xfs_trans_ail_update(
 	struct xfs_ail		*ailp,
diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c
new file mode 100644
index 0000000..a65a3cc4
--- /dev/null
+++ b/fs/xfs/xfs_trans_resv.c
@@ -0,0 +1,803 @@
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * Copyright (C) 2010 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_format.h"
+#include "xfs_log.h"
+#include "xfs_trans_resv.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_mount.h"
+#include "xfs_error.h"
+#include "xfs_da_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_alloc.h"
+#include "xfs_extent_busy.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
+#include "xfs_quota.h"
+#include "xfs_qm.h"
+#include "xfs_trans_space.h"
+#include "xfs_trace.h"
+
+/*
+ * A buffer has a format structure overhead in the log in addition
+ * to the data, so we need to take this into account when reserving
+ * space in a transaction for a buffer.  Round the space required up
+ * to a multiple of 128 bytes so that we don't change the historical
+ * reservation that has been used for this overhead.
+ */
+STATIC uint
+xfs_buf_log_overhead(void)
+{
+	return round_up(sizeof(struct xlog_op_header) +
+			sizeof(struct xfs_buf_log_format), 128);
+}
+
+/*
+ * Calculate out transaction log reservation per item in bytes.
+ *
+ * The nbufs argument is used to indicate the number of items that
+ * will be changed in a transaction.  size is used to tell how many
+ * bytes should be reserved per item.
+ */
+STATIC uint
+xfs_calc_buf_res(
+	uint		nbufs,
+	uint		size)
+{
+	return nbufs * (size + xfs_buf_log_overhead());
+}
+
+/*
+ * Logging inodes is really tricksy. They are logged in memory format,
+ * which means that what we write into the log doesn't directly translate into
+ * the amount of space they use on disk.
+ *
+ * Case in point - btree format forks in memory format use more space than the
+ * on-disk format. In memory, the buffer contains a normal btree block header so
+ * the btree code can treat it as though it is just another generic buffer.
+ * However, when we write it to the inode fork, we don't write all of this
+ * header as it isn't needed. e.g. the root is only ever in the inode, so
+ * there's no need for sibling pointers which would waste 16 bytes of space.
+ *
+ * Hence when we have an inode with a maximally sized btree format fork, then
+ * amount of information we actually log is greater than the size of the inode
+ * on disk. Hence we need an inode reservation function that calculates all this
+ * correctly. So, we log:
+ *
+ * - log op headers for object
+ * - inode log format object
+ * - the entire inode contents (core + 2 forks)
+ * - two bmap btree block headers
+ */
+STATIC uint
+xfs_calc_inode_res(
+	struct xfs_mount	*mp,
+	uint			ninodes)
+{
+	return ninodes * (sizeof(struct xlog_op_header) +
+			  sizeof(struct xfs_inode_log_format) +
+			  mp->m_sb.sb_inodesize +
+			  2 * XFS_BMBT_BLOCK_LEN(mp));
+}
+
+/*
+ * Various log reservation values.
+ *
+ * These are based on the size of the file system block because that is what
+ * most transactions manipulate.  Each adds in an additional 128 bytes per
+ * item logged to try to account for the overhead of the transaction mechanism.
+ *
+ * Note:  Most of the reservations underestimate the number of allocation
+ * groups into which they could free extents in the xfs_bmap_finish() call.
+ * This is because the number in the worst case is quite high and quite
+ * unusual.  In order to fix this we need to change xfs_bmap_finish() to free
+ * extents in only a single AG at a time.  This will require changes to the
+ * EFI code as well, however, so that the EFI for the extents not freed is
+ * logged again in each transaction.  See SGI PV #261917.
+ *
+ * Reservation functions here avoid a huge stack in xfs_trans_init due to
+ * register overflow from temporaries in the calculations.
+ */
+
+
+/*
+ * In a write transaction we can allocate a maximum of 2
+ * extents.  This gives:
+ *    the inode getting the new extents: inode size
+ *    the inode's bmap btree: max depth * block size
+ *    the agfs of the ags from which the extents are allocated: 2 * sector
+ *    the superblock free block counter: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ * And the bmap_finish transaction can free bmap blocks in a join:
+ *    the agfs of the ags containing the blocks: 2 * sector size
+ *    the agfls of the ags containing the blocks: 2 * sector size
+ *    the super block free block counter: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+STATIC uint
+xfs_calc_write_reservation(
+	struct xfs_mount	*mp)
+{
+	return XFS_DQUOT_LOGRES(mp) +
+		MAX((xfs_calc_inode_res(mp, 1) +
+		     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
+				      XFS_FSB_TO_B(mp, 1)) +
+		     xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
+		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+				      XFS_FSB_TO_B(mp, 1))),
+		    (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
+		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+				      XFS_FSB_TO_B(mp, 1))));
+}
+
+/*
+ * In truncating a file we free up to two extents at once.  We can modify:
+ *    the inode being truncated: inode size
+ *    the inode's bmap btree: (max depth + 1) * block size
+ * And the bmap_finish transaction can free the blocks and bmap blocks:
+ *    the agf for each of the ags: 4 * sector size
+ *    the agfl for each of the ags: 4 * sector size
+ *    the super block to reflect the freed blocks: sector size
+ *    worst case split in allocation btrees per extent assuming 4 extents:
+ *		4 exts * 2 trees * (2 * max depth - 1) * block size
+ *    the inode btree: max depth * blocksize
+ *    the allocation btrees: 2 trees * (max depth - 1) * block size
+ */
+STATIC uint
+xfs_calc_itruncate_reservation(
+	struct xfs_mount	*mp)
+{
+	return XFS_DQUOT_LOGRES(mp) +
+		MAX((xfs_calc_inode_res(mp, 1) +
+		     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
+				      XFS_FSB_TO_B(mp, 1))),
+		    (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
+		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
+				      XFS_FSB_TO_B(mp, 1)) +
+		    xfs_calc_buf_res(5, 0) +
+		    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+				     XFS_FSB_TO_B(mp, 1)) +
+		    xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
+				     mp->m_in_maxlevels, 0)));
+}
+
+/*
+ * In renaming a files we can modify:
+ *    the four inodes involved: 4 * inode size
+ *    the two directory btrees: 2 * (max depth + v2) * dir block size
+ *    the two directory bmap btrees: 2 * max depth * block size
+ * And the bmap_finish transaction can free dir and bmap blocks (two sets
+ *	of bmap blocks) giving:
+ *    the agf for the ags in which the blocks live: 3 * sector size
+ *    the agfl for the ags in which the blocks live: 3 * sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+STATIC uint
+xfs_calc_rename_reservation(
+	struct xfs_mount	*mp)
+{
+	return XFS_DQUOT_LOGRES(mp) +
+		MAX((xfs_calc_inode_res(mp, 4) +
+		     xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
+				      XFS_FSB_TO_B(mp, 1))),
+		    (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
+		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 3),
+				      XFS_FSB_TO_B(mp, 1))));
+}
+
+/*
+ * For creating a link to an inode:
+ *    the parent directory inode: inode size
+ *    the linked inode: inode size
+ *    the directory btree could split: (max depth + v2) * dir block size
+ *    the directory bmap btree could join or split: (max depth + v2) * blocksize
+ * And the bmap_finish transaction can free some bmap blocks giving:
+ *    the agf for the ag in which the blocks live: sector size
+ *    the agfl for the ag in which the blocks live: sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
+ */
+STATIC uint
+xfs_calc_link_reservation(
+	struct xfs_mount	*mp)
+{
+	return XFS_DQUOT_LOGRES(mp) +
+		MAX((xfs_calc_inode_res(mp, 2) +
+		     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
+				      XFS_FSB_TO_B(mp, 1))),
+		    (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
+		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+				      XFS_FSB_TO_B(mp, 1))));
+}
+
+/*
+ * For removing a directory entry we can modify:
+ *    the parent directory inode: inode size
+ *    the removed inode: inode size
+ *    the directory btree could join: (max depth + v2) * dir block size
+ *    the directory bmap btree could join or split: (max depth + v2) * blocksize
+ * And the bmap_finish transaction can free the dir and bmap blocks giving:
+ *    the agf for the ag in which the blocks live: 2 * sector size
+ *    the agfl for the ag in which the blocks live: 2 * sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+STATIC uint
+xfs_calc_remove_reservation(
+	struct xfs_mount	*mp)
+{
+	return XFS_DQUOT_LOGRES(mp) +
+		MAX((xfs_calc_inode_res(mp, 2) +
+		     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
+				      XFS_FSB_TO_B(mp, 1))),
+		    (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
+		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+				      XFS_FSB_TO_B(mp, 1))));
+}
+
+/*
+ * For create, break it in to the two cases that the transaction
+ * covers. We start with the modify case - allocation done by modification
+ * of the state of existing inodes - and the allocation case.
+ */
+
+/*
+ * For create we can modify:
+ *    the parent directory inode: inode size
+ *    the new inode: inode size
+ *    the inode btree entry: block size
+ *    the superblock for the nlink flag: sector size
+ *    the directory btree: (max depth + v2) * dir block size
+ *    the directory inode's bmap btree: (max depth + v2) * block size
+ */
+STATIC uint
+xfs_calc_create_resv_modify(
+	struct xfs_mount	*mp)
+{
+	return xfs_calc_inode_res(mp, 2) +
+		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+		(uint)XFS_FSB_TO_B(mp, 1) +
+		xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
+}
+
+/*
+ * For create we can allocate some inodes giving:
+ *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
+ *    the superblock for the nlink flag: sector size
+ *    the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
+ *    the inode btree: max depth * blocksize
+ *    the allocation btrees: 2 trees * (max depth - 1) * block size
+ */
+STATIC uint
+xfs_calc_create_resv_alloc(
+	struct xfs_mount	*mp)
+{
+	return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
+		mp->m_sb.sb_sectsize +
+		xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), XFS_FSB_TO_B(mp, 1)) +
+		xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
+		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+				 XFS_FSB_TO_B(mp, 1));
+}
+
+STATIC uint
+__xfs_calc_create_reservation(
+	struct xfs_mount	*mp)
+{
+	return XFS_DQUOT_LOGRES(mp) +
+		MAX(xfs_calc_create_resv_alloc(mp),
+		    xfs_calc_create_resv_modify(mp));
+}
+
+/*
+ * For icreate we can allocate some inodes giving:
+ *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
+ *    the superblock for the nlink flag: sector size
+ *    the inode btree: max depth * blocksize
+ *    the allocation btrees: 2 trees * (max depth - 1) * block size
+ */
+STATIC uint
+xfs_calc_icreate_resv_alloc(
+	struct xfs_mount	*mp)
+{
+	return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
+		mp->m_sb.sb_sectsize +
+		xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
+		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+				 XFS_FSB_TO_B(mp, 1));
+}
+
+STATIC uint
+xfs_calc_icreate_reservation(xfs_mount_t *mp)
+{
+	return XFS_DQUOT_LOGRES(mp) +
+		MAX(xfs_calc_icreate_resv_alloc(mp),
+		    xfs_calc_create_resv_modify(mp));
+}
+
+STATIC uint
+xfs_calc_create_reservation(
+	struct xfs_mount	*mp)
+{
+	if (xfs_sb_version_hascrc(&mp->m_sb))
+		return xfs_calc_icreate_reservation(mp);
+	return __xfs_calc_create_reservation(mp);
+
+}
+
+/*
+ * Making a new directory is the same as creating a new file.
+ */
+STATIC uint
+xfs_calc_mkdir_reservation(
+	struct xfs_mount	*mp)
+{
+	return xfs_calc_create_reservation(mp);
+}
+
+
+/*
+ * Making a new symplink is the same as creating a new file, but
+ * with the added blocks for remote symlink data which can be up to 1kB in
+ * length (MAXPATHLEN).
+ */
+STATIC uint
+xfs_calc_symlink_reservation(
+	struct xfs_mount	*mp)
+{
+	return xfs_calc_create_reservation(mp) +
+	       xfs_calc_buf_res(1, MAXPATHLEN);
+}
+
+/*
+ * In freeing an inode we can modify:
+ *    the inode being freed: inode size
+ *    the super block free inode counter: sector size
+ *    the agi hash list and counters: sector size
+ *    the inode btree entry: block size
+ *    the on disk inode before ours in the agi hash list: inode cluster size
+ *    the inode btree: max depth * blocksize
+ *    the allocation btrees: 2 trees * (max depth - 1) * block size
+ */
+STATIC uint
+xfs_calc_ifree_reservation(
+	struct xfs_mount	*mp)
+{
+	return XFS_DQUOT_LOGRES(mp) +
+		xfs_calc_inode_res(mp, 1) +
+		xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
+		xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
+		MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
+		    XFS_INODE_CLUSTER_SIZE(mp)) +
+		xfs_calc_buf_res(1, 0) +
+		xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
+				 mp->m_in_maxlevels, 0) +
+		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+				 XFS_FSB_TO_B(mp, 1));
+}
+
+/*
+ * When only changing the inode we log the inode and possibly the superblock
+ * We also add a bit of slop for the transaction stuff.
+ */
+STATIC uint
+xfs_calc_ichange_reservation(
+	struct xfs_mount	*mp)
+{
+	return XFS_DQUOT_LOGRES(mp) +
+		xfs_calc_inode_res(mp, 1) +
+		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
+
+}
+
+/*
+ * Growing the data section of the filesystem.
+ *	superblock
+ *	agi and agf
+ *	allocation btrees
+ */
+STATIC uint
+xfs_calc_growdata_reservation(
+	struct xfs_mount	*mp)
+{
+	return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
+		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+				 XFS_FSB_TO_B(mp, 1));
+}
+
+/*
+ * Growing the rt section of the filesystem.
+ * In the first set of transactions (ALLOC) we allocate space to the
+ * bitmap or summary files.
+ *	superblock: sector size
+ *	agf of the ag from which the extent is allocated: sector size
+ *	bmap btree for bitmap/summary inode: max depth * blocksize
+ *	bitmap/summary inode: inode size
+ *	allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
+ */
+STATIC uint
+xfs_calc_growrtalloc_reservation(
+	struct xfs_mount	*mp)
+{
+	return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
+		xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
+				 XFS_FSB_TO_B(mp, 1)) +
+		xfs_calc_inode_res(mp, 1) +
+		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+				 XFS_FSB_TO_B(mp, 1));
+}
+
+/*
+ * Growing the rt section of the filesystem.
+ * In the second set of transactions (ZERO) we zero the new metadata blocks.
+ *	one bitmap/summary block: blocksize
+ */
+STATIC uint
+xfs_calc_growrtzero_reservation(
+	struct xfs_mount	*mp)
+{
+	return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize);
+}
+
+/*
+ * Growing the rt section of the filesystem.
+ * In the third set of transactions (FREE) we update metadata without
+ * allocating any new blocks.
+ *	superblock: sector size
+ *	bitmap inode: inode size
+ *	summary inode: inode size
+ *	one bitmap block: blocksize
+ *	summary blocks: new summary size
+ */
+STATIC uint
+xfs_calc_growrtfree_reservation(
+	struct xfs_mount	*mp)
+{
+	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+		xfs_calc_inode_res(mp, 2) +
+		xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) +
+		xfs_calc_buf_res(1, mp->m_rsumsize);
+}
+
+/*
+ * Logging the inode modification timestamp on a synchronous write.
+ *	inode
+ */
+STATIC uint
+xfs_calc_swrite_reservation(
+	struct xfs_mount	*mp)
+{
+	return xfs_calc_inode_res(mp, 1);
+}
+
+/*
+ * Logging the inode mode bits when writing a setuid/setgid file
+ *	inode
+ */
+STATIC uint
+xfs_calc_writeid_reservation(
+	struct xfs_mount	*mp)
+{
+	return xfs_calc_inode_res(mp, 1);
+}
+
+/*
+ * Converting the inode from non-attributed to attributed.
+ *	the inode being converted: inode size
+ *	agf block and superblock (for block allocation)
+ *	the new block (directory sized)
+ *	bmap blocks for the new directory block
+ *	allocation btrees
+ */
+STATIC uint
+xfs_calc_addafork_reservation(
+	struct xfs_mount	*mp)
+{
+	return XFS_DQUOT_LOGRES(mp) +
+		xfs_calc_inode_res(mp, 1) +
+		xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
+		xfs_calc_buf_res(1, mp->m_dirblksize) +
+		xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
+				 XFS_FSB_TO_B(mp, 1)) +
+		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),
+				 XFS_FSB_TO_B(mp, 1));
+}
+
+/*
+ * Removing the attribute fork of a file
+ *    the inode being truncated: inode size
+ *    the inode's bmap btree: max depth * block size
+ * And the bmap_finish transaction can free the blocks and bmap blocks:
+ *    the agf for each of the ags: 4 * sector size
+ *    the agfl for each of the ags: 4 * sector size
+ *    the super block to reflect the freed blocks: sector size
+ *    worst case split in allocation btrees per extent assuming 4 extents:
+ *		4 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+STATIC uint
+xfs_calc_attrinval_reservation(
+	struct xfs_mount	*mp)
+{
+	return MAX((xfs_calc_inode_res(mp, 1) +
+		    xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
+				     XFS_FSB_TO_B(mp, 1))),
+		   (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
+		    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4),
+				     XFS_FSB_TO_B(mp, 1))));
+}
+
+/*
+ * Setting an attribute at mount time.
+ *	the inode getting the attribute
+ *	the superblock for allocations
+ *	the agfs extents are allocated from
+ *	the attribute btree * max depth
+ *	the inode allocation btree
+ * Since attribute transaction space is dependent on the size of the attribute,
+ * the calculation is done partially at mount time and partially at runtime(see
+ * below).
+ */
+STATIC uint
+xfs_calc_attrsetm_reservation(
+	struct xfs_mount	*mp)
+{
+	return XFS_DQUOT_LOGRES(mp) +
+		xfs_calc_inode_res(mp, 1) +
+		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+		xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1));
+}
+
+/*
+ * Setting an attribute at runtime, transaction space unit per block.
+ * 	the superblock for allocations: sector size
+ *	the inode bmap btree could join or split: max depth * block size
+ * Since the runtime attribute transaction space is dependent on the total
+ * blocks needed for the 1st bmap, here we calculate out the space unit for
+ * one block so that the caller could figure out the total space according
+ * to the attibute extent length in blocks by:
+ *	ext * M_RES(mp)->tr_attrsetrt.tr_logres
+ */
+STATIC uint
+xfs_calc_attrsetrt_reservation(
+	struct xfs_mount	*mp)
+{
+	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+		xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
+				 XFS_FSB_TO_B(mp, 1));
+}
+
+/*
+ * Removing an attribute.
+ *    the inode: inode size
+ *    the attribute btree could join: max depth * block size
+ *    the inode bmap btree could join or split: max depth * block size
+ * And the bmap_finish transaction can free the attr blocks freed giving:
+ *    the agf for the ag in which the blocks live: 2 * sector size
+ *    the agfl for the ag in which the blocks live: 2 * sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
+STATIC uint
+xfs_calc_attrrm_reservation(
+	struct xfs_mount	*mp)
+{
+	return XFS_DQUOT_LOGRES(mp) +
+		MAX((xfs_calc_inode_res(mp, 1) +
+		     xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH,
+				      XFS_FSB_TO_B(mp, 1)) +
+		     (uint)XFS_FSB_TO_B(mp,
+					XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
+		     xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
+		    (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
+		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
+				      XFS_FSB_TO_B(mp, 1))));
+}
+
+/*
+ * Clearing a bad agino number in an agi hash bucket.
+ */
+STATIC uint
+xfs_calc_clear_agi_bucket_reservation(
+	struct xfs_mount	*mp)
+{
+	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
+}
+
+/*
+ * Clearing the quotaflags in the superblock.
+ *	the super block for changing quota flags: sector size
+ */
+STATIC uint
+xfs_calc_qm_sbchange_reservation(
+	struct xfs_mount	*mp)
+{
+	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
+}
+
+/*
+ * Adjusting quota limits.
+ *    the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot)
+ */
+STATIC uint
+xfs_calc_qm_setqlim_reservation(
+	struct xfs_mount	*mp)
+{
+	return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot));
+}
+
+/*
+ * Allocating quota on disk if needed.
+ *	the write transaction log space: M_RES(mp)->tr_write.tr_logres
+ *	the unit of quota allocation: one system block size
+ */
+STATIC uint
+xfs_calc_qm_dqalloc_reservation(
+	struct xfs_mount	*mp)
+{
+	ASSERT(M_RES(mp)->tr_write.tr_logres);
+	return M_RES(mp)->tr_write.tr_logres +
+		xfs_calc_buf_res(1,
+			XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
+}
+
+/*
+ * Turning off quotas.
+ *    the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
+ *    the superblock for the quota flags: sector size
+ */
+STATIC uint
+xfs_calc_qm_quotaoff_reservation(
+	struct xfs_mount	*mp)
+{
+	return sizeof(struct xfs_qoff_logitem) * 2 +
+		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
+}
+
+/*
+ * End of turning off quotas.
+ *    the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
+ */
+STATIC uint
+xfs_calc_qm_quotaoff_end_reservation(
+	struct xfs_mount	*mp)
+{
+	return sizeof(struct xfs_qoff_logitem) * 2;
+}
+
+/*
+ * Syncing the incore super block changes to disk.
+ *     the super block to reflect the changes: sector size
+ */
+STATIC uint
+xfs_calc_sb_reservation(
+	struct xfs_mount	*mp)
+{
+	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
+}
+
+void
+xfs_trans_resv_calc(
+	struct xfs_mount	*mp,
+	struct xfs_trans_resv	*resp)
+{
+	/*
+	 * The following transactions are logged in physical format and
+	 * require a permanent reservation on space.
+	 */
+	resp->tr_write.tr_logres = xfs_calc_write_reservation(mp);
+	resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
+	resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+	resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp);
+	resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
+	resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+	resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp);
+	resp->tr_rename.tr_logcount = XFS_RENAME_LOG_COUNT;
+	resp->tr_rename.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+	resp->tr_link.tr_logres = xfs_calc_link_reservation(mp);
+	resp->tr_link.tr_logcount = XFS_LINK_LOG_COUNT;
+	resp->tr_link.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+	resp->tr_remove.tr_logres = xfs_calc_remove_reservation(mp);
+	resp->tr_remove.tr_logcount = XFS_REMOVE_LOG_COUNT;
+	resp->tr_remove.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+	resp->tr_symlink.tr_logres = xfs_calc_symlink_reservation(mp);
+	resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT;
+	resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+	resp->tr_create.tr_logres = xfs_calc_create_reservation(mp);
+	resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT;
+	resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+	resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
+	resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT;
+	resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+	resp->tr_ifree.tr_logres = xfs_calc_ifree_reservation(mp);
+	resp->tr_ifree.tr_logcount = XFS_INACTIVE_LOG_COUNT;
+	resp->tr_ifree.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+	resp->tr_addafork.tr_logres = xfs_calc_addafork_reservation(mp);
+	resp->tr_addafork.tr_logcount = XFS_ADDAFORK_LOG_COUNT;
+	resp->tr_addafork.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+	resp->tr_attrinval.tr_logres = xfs_calc_attrinval_reservation(mp);
+	resp->tr_attrinval.tr_logcount = XFS_ATTRINVAL_LOG_COUNT;
+	resp->tr_attrinval.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+	resp->tr_attrsetm.tr_logres = xfs_calc_attrsetm_reservation(mp);
+	resp->tr_attrsetm.tr_logcount = XFS_ATTRSET_LOG_COUNT;
+	resp->tr_attrsetm.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+	resp->tr_attrrm.tr_logres = xfs_calc_attrrm_reservation(mp);
+	resp->tr_attrrm.tr_logcount = XFS_ATTRRM_LOG_COUNT;
+	resp->tr_attrrm.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+	resp->tr_growrtalloc.tr_logres = xfs_calc_growrtalloc_reservation(mp);
+	resp->tr_growrtalloc.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT;
+	resp->tr_growrtalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+	resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp);
+	resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
+	resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
+	/*
+	 * The following transactions are logged in logical format with
+	 * a default log count.
+	 */
+	resp->tr_qm_sbchange.tr_logres = xfs_calc_qm_sbchange_reservation(mp);
+	resp->tr_qm_sbchange.tr_logcount = XFS_DEFAULT_LOG_COUNT;
+
+	resp->tr_qm_setqlim.tr_logres = xfs_calc_qm_setqlim_reservation(mp);
+	resp->tr_qm_setqlim.tr_logcount = XFS_DEFAULT_LOG_COUNT;
+
+	resp->tr_qm_quotaoff.tr_logres = xfs_calc_qm_quotaoff_reservation(mp);
+	resp->tr_qm_quotaoff.tr_logcount = XFS_DEFAULT_LOG_COUNT;
+
+	resp->tr_qm_equotaoff.tr_logres =
+		xfs_calc_qm_quotaoff_end_reservation(mp);
+	resp->tr_qm_equotaoff.tr_logcount = XFS_DEFAULT_LOG_COUNT;
+
+	resp->tr_sb.tr_logres = xfs_calc_sb_reservation(mp);
+	resp->tr_sb.tr_logcount = XFS_DEFAULT_LOG_COUNT;
+
+	/* The following transaction are logged in logical format */
+	resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp);
+	resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp);
+	resp->tr_swrite.tr_logres = xfs_calc_swrite_reservation(mp);
+	resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp);
+	resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp);
+	resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp);
+	resp->tr_clearagi.tr_logres = xfs_calc_clear_agi_bucket_reservation(mp);
+	resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp);
+	resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp);
+}
diff --git a/fs/xfs/xfs_trans_resv.h b/fs/xfs/xfs_trans_resv.h
new file mode 100644
index 0000000..de7de9a
--- /dev/null
+++ b/fs/xfs/xfs_trans_resv.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef	__XFS_TRANS_RESV_H__
+#define	__XFS_TRANS_RESV_H__
+
+struct xfs_mount;
+
+/*
+ * structure for maintaining pre-calculated transaction reservations.
+ */
+struct xfs_trans_res {
+	uint	tr_logres;	/* log space unit in bytes per log ticket */
+	int	tr_logcount;	/* number of log operations per log ticket */
+	int	tr_logflags;	/* log flags, currently only used for indicating
+				 * a reservation request is permanent or not */
+};
+
+struct xfs_trans_resv {
+	struct xfs_trans_res	tr_write;	/* extent alloc trans */
+	struct xfs_trans_res	tr_itruncate;	/* truncate trans */
+	struct xfs_trans_res	tr_rename;	/* rename trans */
+	struct xfs_trans_res	tr_link;	/* link trans */
+	struct xfs_trans_res	tr_remove;	/* unlink trans */
+	struct xfs_trans_res	tr_symlink;	/* symlink trans */
+	struct xfs_trans_res	tr_create;	/* create trans */
+	struct xfs_trans_res	tr_mkdir;	/* mkdir trans */
+	struct xfs_trans_res	tr_ifree;	/* inode free trans */
+	struct xfs_trans_res	tr_ichange;	/* inode update trans */
+	struct xfs_trans_res	tr_growdata;	/* fs data section grow trans */
+	struct xfs_trans_res	tr_swrite;	/* sync write inode trans */
+	struct xfs_trans_res	tr_addafork;	/* add inode attr fork trans */
+	struct xfs_trans_res	tr_writeid;	/* write setuid/setgid file */
+	struct xfs_trans_res	tr_attrinval;	/* attr fork buffer
+						 * invalidation */
+	struct xfs_trans_res	tr_attrsetm;	/* set/create an attribute at
+						 * mount time */
+	struct xfs_trans_res	tr_attrsetrt;	/* set/create an attribute at
+						 * runtime */
+	struct xfs_trans_res	tr_attrrm;	/* remove an attribute */
+	struct xfs_trans_res	tr_clearagi;	/* clear agi unlinked bucket */
+	struct xfs_trans_res	tr_growrtalloc;	/* grow realtime allocations */
+	struct xfs_trans_res	tr_growrtzero;	/* grow realtime zeroing */
+	struct xfs_trans_res	tr_growrtfree;	/* grow realtime freeing */
+	struct xfs_trans_res	tr_qm_sbchange;	/* change quota flags */
+	struct xfs_trans_res	tr_qm_setqlim;	/* adjust quota limits */
+	struct xfs_trans_res	tr_qm_dqalloc;	/* allocate quota on disk */
+	struct xfs_trans_res	tr_qm_quotaoff;	/* turn quota off */
+	struct xfs_trans_res	tr_qm_equotaoff;/* end of turn quota off */
+	struct xfs_trans_res	tr_sb;		/* modify superblock */
+	struct xfs_trans_res	tr_fsyncts;	/* update timestamps on fsync */
+};
+
+/* shorthand way of accessing reservation structure */
+#define M_RES(mp)	(&(mp)->m_resv)
+
+/*
+ * Per-extent log reservation for the allocation btree changes
+ * involved in freeing or allocating an extent.
+ * 2 trees * (2 blocks/level * max depth - 1) * block size
+ */
+#define	XFS_ALLOCFREE_LOG_RES(mp,nx) \
+	((nx) * (2 * XFS_FSB_TO_B((mp), 2 * XFS_AG_MAXLEVELS(mp) - 1)))
+#define	XFS_ALLOCFREE_LOG_COUNT(mp,nx) \
+	((nx) * (2 * (2 * XFS_AG_MAXLEVELS(mp) - 1)))
+
+/*
+ * Per-directory log reservation for any directory change.
+ * dir blocks: (1 btree block per level + data block + free block) * dblock size
+ * bmap btree: (levels + 2) * max depth * block size
+ * v2 directory blocks can be fragmented below the dirblksize down to the fsb
+ * size, so account for that in the DAENTER macros.
+ */
+#define	XFS_DIROP_LOG_RES(mp)	\
+	(XFS_FSB_TO_B(mp, XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK)) + \
+	 (XFS_FSB_TO_B(mp, XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1)))
+#define	XFS_DIROP_LOG_COUNT(mp)	\
+	(XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \
+	 XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1)
+
+/*
+ * Various log count values.
+ */
+#define	XFS_DEFAULT_LOG_COUNT		1
+#define	XFS_DEFAULT_PERM_LOG_COUNT	2
+#define	XFS_ITRUNCATE_LOG_COUNT		2
+#define XFS_INACTIVE_LOG_COUNT		2
+#define	XFS_CREATE_LOG_COUNT		2
+#define	XFS_MKDIR_LOG_COUNT		3
+#define	XFS_SYMLINK_LOG_COUNT		3
+#define	XFS_REMOVE_LOG_COUNT		2
+#define	XFS_LINK_LOG_COUNT		2
+#define	XFS_RENAME_LOG_COUNT		2
+#define	XFS_WRITE_LOG_COUNT		2
+#define	XFS_ADDAFORK_LOG_COUNT		2
+#define	XFS_ATTRINVAL_LOG_COUNT		1
+#define	XFS_ATTRSET_LOG_COUNT		3
+#define	XFS_ATTRRM_LOG_COUNT		3
+
+void xfs_trans_resv_calc(struct xfs_mount *mp, struct xfs_trans_resv *resp);
+
+#endif	/* __XFS_TRANS_RESV_H__ */
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 61ba1cf..82bbc34 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -18,42 +18,7 @@
 #ifndef __XFS_TYPES_H__
 #define	__XFS_TYPES_H__
 
-#ifdef __KERNEL__
-
-/*
- * Additional type declarations for XFS
- */
-typedef signed char		__int8_t;
-typedef unsigned char		__uint8_t;
-typedef signed short int	__int16_t;
-typedef unsigned short int	__uint16_t;
-typedef signed int		__int32_t;
-typedef unsigned int		__uint32_t;
-typedef signed long long int	__int64_t;
-typedef unsigned long long int	__uint64_t;
-
-typedef __uint32_t		prid_t;		/* project ID */
-typedef __uint32_t		inst_t;		/* an instruction */
-
-typedef __s64			xfs_off_t;	/* <file offset> type */
-typedef unsigned long long	xfs_ino_t;	/* <inode> type */
-typedef __s64			xfs_daddr_t;	/* <disk address> type */
-typedef char *			xfs_caddr_t;	/* <core address> type */
-typedef __u32			xfs_dev_t;
-typedef __u32			xfs_nlink_t;
-
-/* __psint_t is the same size as a pointer */
-#if (BITS_PER_LONG == 32)
-typedef __int32_t __psint_t;
-typedef __uint32_t __psunsigned_t;
-#elif (BITS_PER_LONG == 64)
-typedef __int64_t __psint_t;
-typedef __uint64_t __psunsigned_t;
-#else
-#error BITS_PER_LONG must be 32 or 64
-#endif
-
-#endif	/* __KERNEL__ */
+typedef __uint32_t	prid_t;		/* project ID */
 
 typedef __uint32_t	xfs_agblock_t;	/* blockno in alloc. group */
 typedef	__uint32_t	xfs_agino_t;	/* inode # within allocation grp */
@@ -146,6 +111,12 @@
 #define XFS_MAX_SECTORSIZE	(1 << XFS_MAX_SECTORSIZE_LOG)
 
 /*
+ * Inode fork identifiers.
+ */
+#define	XFS_DATA_FORK	0
+#define	XFS_ATTR_FORK	1
+
+/*
  * Min numbers of data/attr fork btree root pointers.
  */
 #define MINDBTPTRS	3
@@ -169,6 +140,23 @@
 struct xfs_name {
 	const unsigned char	*name;
 	int			len;
+	int			type;
 };
 
+/*
+ * uid_t and gid_t are hard-coded to 32 bits in the inode.
+ * Hence, an 'id' in a dquot is 32 bits..
+ */
+typedef __uint32_t	xfs_dqid_t;
+
+/*
+ * Constants for bit manipulations.
+ */
+#define	XFS_NBBYLOG	3		/* log2(NBBY) */
+#define	XFS_WORDLOG	2		/* log2(sizeof(xfs_rtword_t)) */
+#define	XFS_NBWORDLOG	(XFS_NBBYLOG + XFS_WORDLOG)
+#define	XFS_NBWORD	(1 << XFS_NBWORDLOG)
+#define	XFS_WORDMASK	((1 << XFS_WORDLOG) - 1)
+
+
 #endif	/* __XFS_TYPES_H__ */
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
deleted file mode 100644
index 0025c78..0000000
--- a/fs/xfs/xfs_utils.c
+++ /dev/null
@@ -1,314 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_log.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_inode_item.h"
-#include "xfs_bmap.h"
-#include "xfs_error.h"
-#include "xfs_quota.h"
-#include "xfs_itable.h"
-#include "xfs_utils.h"
-
-
-/*
- * Allocates a new inode from disk and return a pointer to the
- * incore copy. This routine will internally commit the current
- * transaction and allocate a new one if the Space Manager needed
- * to do an allocation to replenish the inode free-list.
- *
- * This routine is designed to be called from xfs_create and
- * xfs_create_dir.
- *
- */
-int
-xfs_dir_ialloc(
-	xfs_trans_t	**tpp,		/* input: current transaction;
-					   output: may be a new transaction. */
-	xfs_inode_t	*dp,		/* directory within whose allocate
-					   the inode. */
-	umode_t		mode,
-	xfs_nlink_t	nlink,
-	xfs_dev_t	rdev,
-	prid_t		prid,		/* project id */
-	int		okalloc,	/* ok to allocate new space */
-	xfs_inode_t	**ipp,		/* pointer to inode; it will be
-					   locked. */
-	int		*committed)
-
-{
-	xfs_trans_t	*tp;
-	xfs_trans_t	*ntp;
-	xfs_inode_t	*ip;
-	xfs_buf_t	*ialloc_context = NULL;
-	int		code;
-	uint		log_res;
-	uint		log_count;
-	void		*dqinfo;
-	uint		tflags;
-
-	tp = *tpp;
-	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
-
-	/*
-	 * xfs_ialloc will return a pointer to an incore inode if
-	 * the Space Manager has an available inode on the free
-	 * list. Otherwise, it will do an allocation and replenish
-	 * the freelist.  Since we can only do one allocation per
-	 * transaction without deadlocks, we will need to commit the
-	 * current transaction and start a new one.  We will then
-	 * need to call xfs_ialloc again to get the inode.
-	 *
-	 * If xfs_ialloc did an allocation to replenish the freelist,
-	 * it returns the bp containing the head of the freelist as
-	 * ialloc_context. We will hold a lock on it across the
-	 * transaction commit so that no other process can steal
-	 * the inode(s) that we've just allocated.
-	 */
-	code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc,
-			  &ialloc_context, &ip);
-
-	/*
-	 * Return an error if we were unable to allocate a new inode.
-	 * This should only happen if we run out of space on disk or
-	 * encounter a disk error.
-	 */
-	if (code) {
-		*ipp = NULL;
-		return code;
-	}
-	if (!ialloc_context && !ip) {
-		*ipp = NULL;
-		return XFS_ERROR(ENOSPC);
-	}
-
-	/*
-	 * If the AGI buffer is non-NULL, then we were unable to get an
-	 * inode in one operation.  We need to commit the current
-	 * transaction and call xfs_ialloc() again.  It is guaranteed
-	 * to succeed the second time.
-	 */
-	if (ialloc_context) {
-		/*
-		 * Normally, xfs_trans_commit releases all the locks.
-		 * We call bhold to hang on to the ialloc_context across
-		 * the commit.  Holding this buffer prevents any other
-		 * processes from doing any allocations in this
-		 * allocation group.
-		 */
-		xfs_trans_bhold(tp, ialloc_context);
-		/*
-		 * Save the log reservation so we can use
-		 * them in the next transaction.
-		 */
-		log_res = xfs_trans_get_log_res(tp);
-		log_count = xfs_trans_get_log_count(tp);
-
-		/*
-		 * We want the quota changes to be associated with the next
-		 * transaction, NOT this one. So, detach the dqinfo from this
-		 * and attach it to the next transaction.
-		 */
-		dqinfo = NULL;
-		tflags = 0;
-		if (tp->t_dqinfo) {
-			dqinfo = (void *)tp->t_dqinfo;
-			tp->t_dqinfo = NULL;
-			tflags = tp->t_flags & XFS_TRANS_DQ_DIRTY;
-			tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY);
-		}
-
-		ntp = xfs_trans_dup(tp);
-		code = xfs_trans_commit(tp, 0);
-		tp = ntp;
-		if (committed != NULL) {
-			*committed = 1;
-		}
-		/*
-		 * If we get an error during the commit processing,
-		 * release the buffer that is still held and return
-		 * to the caller.
-		 */
-		if (code) {
-			xfs_buf_relse(ialloc_context);
-			if (dqinfo) {
-				tp->t_dqinfo = dqinfo;
-				xfs_trans_free_dqinfo(tp);
-			}
-			*tpp = ntp;
-			*ipp = NULL;
-			return code;
-		}
-
-		/*
-		 * transaction commit worked ok so we can drop the extra ticket
-		 * reference that we gained in xfs_trans_dup()
-		 */
-		xfs_log_ticket_put(tp->t_ticket);
-		code = xfs_trans_reserve(tp, 0, log_res, 0,
-					 XFS_TRANS_PERM_LOG_RES, log_count);
-		/*
-		 * Re-attach the quota info that we detached from prev trx.
-		 */
-		if (dqinfo) {
-			tp->t_dqinfo = dqinfo;
-			tp->t_flags |= tflags;
-		}
-
-		if (code) {
-			xfs_buf_relse(ialloc_context);
-			*tpp = ntp;
-			*ipp = NULL;
-			return code;
-		}
-		xfs_trans_bjoin(tp, ialloc_context);
-
-		/*
-		 * Call ialloc again. Since we've locked out all
-		 * other allocations in this allocation group,
-		 * this call should always succeed.
-		 */
-		code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid,
-				  okalloc, &ialloc_context, &ip);
-
-		/*
-		 * If we get an error at this point, return to the caller
-		 * so that the current transaction can be aborted.
-		 */
-		if (code) {
-			*tpp = tp;
-			*ipp = NULL;
-			return code;
-		}
-		ASSERT(!ialloc_context && ip);
-
-	} else {
-		if (committed != NULL)
-			*committed = 0;
-	}
-
-	*ipp = ip;
-	*tpp = tp;
-
-	return 0;
-}
-
-/*
- * Decrement the link count on an inode & log the change.
- * If this causes the link count to go to zero, initiate the
- * logging activity required to truncate a file.
- */
-int				/* error */
-xfs_droplink(
-	xfs_trans_t *tp,
-	xfs_inode_t *ip)
-{
-	int	error;
-
-	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
-
-	ASSERT (ip->i_d.di_nlink > 0);
-	ip->i_d.di_nlink--;
-	drop_nlink(VFS_I(ip));
-	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
-	error = 0;
-	if (ip->i_d.di_nlink == 0) {
-		/*
-		 * We're dropping the last link to this file.
-		 * Move the on-disk inode to the AGI unlinked list.
-		 * From xfs_inactive() we will pull the inode from
-		 * the list and free it.
-		 */
-		error = xfs_iunlink(tp, ip);
-	}
-	return error;
-}
-
-/*
- * This gets called when the inode's version needs to be changed from 1 to 2.
- * Currently this happens when the nlink field overflows the old 16-bit value
- * or when chproj is called to change the project for the first time.
- * As a side effect the superblock version will also get rev'd
- * to contain the NLINK bit.
- */
-void
-xfs_bump_ino_vers2(
-	xfs_trans_t	*tp,
-	xfs_inode_t	*ip)
-{
-	xfs_mount_t	*mp;
-
-	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-	ASSERT(ip->i_d.di_version == 1);
-
-	ip->i_d.di_version = 2;
-	ip->i_d.di_onlink = 0;
-	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
-	mp = tp->t_mountp;
-	if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
-		spin_lock(&mp->m_sb_lock);
-		if (!xfs_sb_version_hasnlink(&mp->m_sb)) {
-			xfs_sb_version_addnlink(&mp->m_sb);
-			spin_unlock(&mp->m_sb_lock);
-			xfs_mod_sb(tp, XFS_SB_VERSIONNUM);
-		} else {
-			spin_unlock(&mp->m_sb_lock);
-		}
-	}
-	/* Caller must log the inode */
-}
-
-/*
- * Increment the link count on an inode & log the change.
- */
-int
-xfs_bumplink(
-	xfs_trans_t *tp,
-	xfs_inode_t *ip)
-{
-	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
-
-	ASSERT(ip->i_d.di_nlink > 0);
-	ip->i_d.di_nlink++;
-	inc_nlink(VFS_I(ip));
-	if ((ip->i_d.di_version == 1) &&
-	    (ip->i_d.di_nlink > XFS_MAXLINK_1)) {
-		/*
-		 * The inode has increased its number of links beyond
-		 * what can fit in an old format inode.  It now needs
-		 * to be converted to a version 2 inode with a 32 bit
-		 * link count.  If this is the first inode in the file
-		 * system to do this, then we need to bump the superblock
-		 * version number as well.
-		 */
-		xfs_bump_ino_vers2(tp, ip);
-	}
-
-	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-	return 0;
-}
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
deleted file mode 100644
index 5eeab46..0000000
--- a/fs/xfs/xfs_utils.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_UTILS_H__
-#define __XFS_UTILS_H__
-
-extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, umode_t, xfs_nlink_t,
-				xfs_dev_t, prid_t, int, xfs_inode_t **, int *);
-extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *);
-extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *);
-extern void xfs_bump_ino_vers2(xfs_trans_t *, xfs_inode_t *);
-
-#endif	/* __XFS_UTILS_H__ */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
deleted file mode 100644
index dc730ac..0000000
--- a/fs/xfs/xfs_vnodeops.c
+++ /dev/null
@@ -1,1870 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * Copyright (c) 2012 Red Hat, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_mount.h"
-#include "xfs_da_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_inode_item.h"
-#include "xfs_itable.h"
-#include "xfs_ialloc.h"
-#include "xfs_alloc.h"
-#include "xfs_bmap.h"
-#include "xfs_acl.h"
-#include "xfs_attr.h"
-#include "xfs_error.h"
-#include "xfs_quota.h"
-#include "xfs_utils.h"
-#include "xfs_rtalloc.h"
-#include "xfs_trans_space.h"
-#include "xfs_log_priv.h"
-#include "xfs_filestream.h"
-#include "xfs_vnodeops.h"
-#include "xfs_trace.h"
-#include "xfs_icache.h"
-#include "xfs_symlink.h"
-
-
-/*
- * This is called by xfs_inactive to free any blocks beyond eof
- * when the link count isn't zero and by xfs_dm_punch_hole() when
- * punching a hole to EOF.
- */
-int
-xfs_free_eofblocks(
-	xfs_mount_t	*mp,
-	xfs_inode_t	*ip,
-	bool		need_iolock)
-{
-	xfs_trans_t	*tp;
-	int		error;
-	xfs_fileoff_t	end_fsb;
-	xfs_fileoff_t	last_fsb;
-	xfs_filblks_t	map_len;
-	int		nimaps;
-	xfs_bmbt_irec_t	imap;
-
-	/*
-	 * Figure out if there are any blocks beyond the end
-	 * of the file.  If not, then there is nothing to do.
-	 */
-	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
-	last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
-	if (last_fsb <= end_fsb)
-		return 0;
-	map_len = last_fsb - end_fsb;
-
-	nimaps = 1;
-	xfs_ilock(ip, XFS_ILOCK_SHARED);
-	error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0);
-	xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
-	if (!error && (nimaps != 0) &&
-	    (imap.br_startblock != HOLESTARTBLOCK ||
-	     ip->i_delayed_blks)) {
-		/*
-		 * Attach the dquots to the inode up front.
-		 */
-		error = xfs_qm_dqattach(ip, 0);
-		if (error)
-			return error;
-
-		/*
-		 * There are blocks after the end of file.
-		 * Free them up now by truncating the file to
-		 * its current size.
-		 */
-		tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
-
-		if (need_iolock) {
-			if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
-				xfs_trans_cancel(tp, 0);
-				return EAGAIN;
-			}
-		}
-
-		error = xfs_trans_reserve(tp, 0,
-					  XFS_ITRUNCATE_LOG_RES(mp),
-					  0, XFS_TRANS_PERM_LOG_RES,
-					  XFS_ITRUNCATE_LOG_COUNT);
-		if (error) {
-			ASSERT(XFS_FORCED_SHUTDOWN(mp));
-			xfs_trans_cancel(tp, 0);
-			if (need_iolock)
-				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-			return error;
-		}
-
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		xfs_trans_ijoin(tp, ip, 0);
-
-		/*
-		 * Do not update the on-disk file size.  If we update the
-		 * on-disk file size and then the system crashes before the
-		 * contents of the file are flushed to disk then the files
-		 * may be full of holes (ie NULL files bug).
-		 */
-		error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK,
-					      XFS_ISIZE(ip));
-		if (error) {
-			/*
-			 * If we get an error at this point we simply don't
-			 * bother truncating the file.
-			 */
-			xfs_trans_cancel(tp,
-					 (XFS_TRANS_RELEASE_LOG_RES |
-					  XFS_TRANS_ABORT));
-		} else {
-			error = xfs_trans_commit(tp,
-						XFS_TRANS_RELEASE_LOG_RES);
-			if (!error)
-				xfs_inode_clear_eofblocks_tag(ip);
-		}
-
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-		if (need_iolock)
-			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-	}
-	return error;
-}
-
-int
-xfs_release(
-	xfs_inode_t	*ip)
-{
-	xfs_mount_t	*mp = ip->i_mount;
-	int		error;
-
-	if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0))
-		return 0;
-
-	/* If this is a read-only mount, don't do this (would generate I/O) */
-	if (mp->m_flags & XFS_MOUNT_RDONLY)
-		return 0;
-
-	if (!XFS_FORCED_SHUTDOWN(mp)) {
-		int truncated;
-
-		/*
-		 * If we are using filestreams, and we have an unlinked
-		 * file that we are processing the last close on, then nothing
-		 * will be able to reopen and write to this file. Purge this
-		 * inode from the filestreams cache so that it doesn't delay
-		 * teardown of the inode.
-		 */
-		if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip))
-			xfs_filestream_deassociate(ip);
-
-		/*
-		 * If we previously truncated this file and removed old data
-		 * in the process, we want to initiate "early" writeout on
-		 * the last close.  This is an attempt to combat the notorious
-		 * NULL files problem which is particularly noticeable from a
-		 * truncate down, buffered (re-)write (delalloc), followed by
-		 * a crash.  What we are effectively doing here is
-		 * significantly reducing the time window where we'd otherwise
-		 * be exposed to that problem.
-		 */
-		truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
-		if (truncated) {
-			xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
-			if (VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) {
-				error = -filemap_flush(VFS_I(ip)->i_mapping);
-				if (error)
-					return error;
-			}
-		}
-	}
-
-	if (ip->i_d.di_nlink == 0)
-		return 0;
-
-	if (xfs_can_free_eofblocks(ip, false)) {
-
-		/*
-		 * If we can't get the iolock just skip truncating the blocks
-		 * past EOF because we could deadlock with the mmap_sem
-		 * otherwise.  We'll get another chance to drop them once the
-		 * last reference to the inode is dropped, so we'll never leak
-		 * blocks permanently.
-		 *
-		 * Further, check if the inode is being opened, written and
-		 * closed frequently and we have delayed allocation blocks
-		 * outstanding (e.g. streaming writes from the NFS server),
-		 * truncating the blocks past EOF will cause fragmentation to
-		 * occur.
-		 *
-		 * In this case don't do the truncation, either, but we have to
-		 * be careful how we detect this case. Blocks beyond EOF show
-		 * up as i_delayed_blks even when the inode is clean, so we
-		 * need to truncate them away first before checking for a dirty
-		 * release. Hence on the first dirty close we will still remove
-		 * the speculative allocation, but after that we will leave it
-		 * in place.
-		 */
-		if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
-			return 0;
-
-		error = xfs_free_eofblocks(mp, ip, true);
-		if (error && error != EAGAIN)
-			return error;
-
-		/* delalloc blocks after truncation means it really is dirty */
-		if (ip->i_delayed_blks)
-			xfs_iflags_set(ip, XFS_IDIRTY_RELEASE);
-	}
-	return 0;
-}
-
-/*
- * xfs_inactive
- *
- * This is called when the vnode reference count for the vnode
- * goes to zero.  If the file has been unlinked, then it must
- * now be truncated.  Also, we clear all of the read-ahead state
- * kept for the inode here since the file is now closed.
- */
-int
-xfs_inactive(
-	xfs_inode_t	*ip)
-{
-	xfs_bmap_free_t	free_list;
-	xfs_fsblock_t	first_block;
-	int		committed;
-	xfs_trans_t	*tp;
-	xfs_mount_t	*mp;
-	int		error;
-	int		truncate = 0;
-
-	/*
-	 * If the inode is already free, then there can be nothing
-	 * to clean up here.
-	 */
-	if (ip->i_d.di_mode == 0 || is_bad_inode(VFS_I(ip))) {
-		ASSERT(ip->i_df.if_real_bytes == 0);
-		ASSERT(ip->i_df.if_broot_bytes == 0);
-		return VN_INACTIVE_CACHE;
-	}
-
-	mp = ip->i_mount;
-
-	error = 0;
-
-	/* If this is a read-only mount, don't do this (would generate I/O) */
-	if (mp->m_flags & XFS_MOUNT_RDONLY)
-		goto out;
-
-	if (ip->i_d.di_nlink != 0) {
-		/*
-		 * force is true because we are evicting an inode from the
-		 * cache. Post-eof blocks must be freed, lest we end up with
-		 * broken free space accounting.
-		 */
-		if (xfs_can_free_eofblocks(ip, true)) {
-			error = xfs_free_eofblocks(mp, ip, false);
-			if (error)
-				return VN_INACTIVE_CACHE;
-		}
-		goto out;
-	}
-
-	if (S_ISREG(ip->i_d.di_mode) &&
-	    (ip->i_d.di_size != 0 || XFS_ISIZE(ip) != 0 ||
-	     ip->i_d.di_nextents > 0 || ip->i_delayed_blks > 0))
-		truncate = 1;
-
-	error = xfs_qm_dqattach(ip, 0);
-	if (error)
-		return VN_INACTIVE_CACHE;
-
-	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
-	error = xfs_trans_reserve(tp, 0,
-			(truncate || S_ISLNK(ip->i_d.di_mode)) ?
-				XFS_ITRUNCATE_LOG_RES(mp) :
-				XFS_IFREE_LOG_RES(mp),
-			0,
-			XFS_TRANS_PERM_LOG_RES,
-			XFS_ITRUNCATE_LOG_COUNT);
-	if (error) {
-		ASSERT(XFS_FORCED_SHUTDOWN(mp));
-		xfs_trans_cancel(tp, 0);
-		return VN_INACTIVE_CACHE;
-	}
-
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	xfs_trans_ijoin(tp, ip, 0);
-
-	if (S_ISLNK(ip->i_d.di_mode)) {
-		error = xfs_inactive_symlink(ip, &tp);
-		if (error)
-			goto out_cancel;
-	} else if (truncate) {
-		ip->i_d.di_size = 0;
-		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
-		error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
-		if (error)
-			goto out_cancel;
-
-		ASSERT(ip->i_d.di_nextents == 0);
-	}
-
-	/*
-	 * If there are attributes associated with the file then blow them away
-	 * now.  The code calls a routine that recursively deconstructs the
-	 * attribute fork.  We need to just commit the current transaction
-	 * because we can't use it for xfs_attr_inactive().
-	 */
-	if (ip->i_d.di_anextents > 0) {
-		ASSERT(ip->i_d.di_forkoff != 0);
-
-		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-		if (error)
-			goto out_unlock;
-
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-		error = xfs_attr_inactive(ip);
-		if (error)
-			goto out;
-
-		tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
-		error = xfs_trans_reserve(tp, 0,
-					  XFS_IFREE_LOG_RES(mp),
-					  0, XFS_TRANS_PERM_LOG_RES,
-					  XFS_INACTIVE_LOG_COUNT);
-		if (error) {
-			xfs_trans_cancel(tp, 0);
-			goto out;
-		}
-
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		xfs_trans_ijoin(tp, ip, 0);
-	}
-
-	if (ip->i_afp)
-		xfs_idestroy_fork(ip, XFS_ATTR_FORK);
-
-	ASSERT(ip->i_d.di_anextents == 0);
-
-	/*
-	 * Free the inode.
-	 */
-	xfs_bmap_init(&free_list, &first_block);
-	error = xfs_ifree(tp, ip, &free_list);
-	if (error) {
-		/*
-		 * If we fail to free the inode, shut down.  The cancel
-		 * might do that, we need to make sure.  Otherwise the
-		 * inode might be lost for a long time or forever.
-		 */
-		if (!XFS_FORCED_SHUTDOWN(mp)) {
-			xfs_notice(mp, "%s: xfs_ifree returned error %d",
-				__func__, error);
-			xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
-		}
-		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
-	} else {
-		/*
-		 * Credit the quota account(s). The inode is gone.
-		 */
-		xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
-
-		/*
-		 * Just ignore errors at this point.  There is nothing we can
-		 * do except to try to keep going. Make sure it's not a silent
-		 * error.
-		 */
-		error = xfs_bmap_finish(&tp,  &free_list, &committed);
-		if (error)
-			xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
-				__func__, error);
-		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-		if (error)
-			xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
-				__func__, error);
-	}
-
-	/*
-	 * Release the dquots held by inode, if any.
-	 */
-	xfs_qm_dqdetach(ip);
-out_unlock:
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-out:
-	return VN_INACTIVE_CACHE;
-out_cancel:
-	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
-	goto out_unlock;
-}
-
-/*
- * Lookups up an inode from "name". If ci_name is not NULL, then a CI match
- * is allowed, otherwise it has to be an exact match. If a CI match is found,
- * ci_name->name will point to a the actual name (caller must free) or
- * will be set to NULL if an exact match is found.
- */
-int
-xfs_lookup(
-	xfs_inode_t		*dp,
-	struct xfs_name		*name,
-	xfs_inode_t		**ipp,
-	struct xfs_name		*ci_name)
-{
-	xfs_ino_t		inum;
-	int			error;
-	uint			lock_mode;
-
-	trace_xfs_lookup(dp, name);
-
-	if (XFS_FORCED_SHUTDOWN(dp->i_mount))
-		return XFS_ERROR(EIO);
-
-	lock_mode = xfs_ilock_map_shared(dp);
-	error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
-	xfs_iunlock_map_shared(dp, lock_mode);
-
-	if (error)
-		goto out;
-
-	error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
-	if (error)
-		goto out_free_name;
-
-	return 0;
-
-out_free_name:
-	if (ci_name)
-		kmem_free(ci_name->name);
-out:
-	*ipp = NULL;
-	return error;
-}
-
-int
-xfs_create(
-	xfs_inode_t		*dp,
-	struct xfs_name		*name,
-	umode_t			mode,
-	xfs_dev_t		rdev,
-	xfs_inode_t		**ipp)
-{
-	int			is_dir = S_ISDIR(mode);
-	struct xfs_mount	*mp = dp->i_mount;
-	struct xfs_inode	*ip = NULL;
-	struct xfs_trans	*tp = NULL;
-	int			error;
-	xfs_bmap_free_t		free_list;
-	xfs_fsblock_t		first_block;
-	bool                    unlock_dp_on_error = false;
-	uint			cancel_flags;
-	int			committed;
-	prid_t			prid;
-	struct xfs_dquot	*udqp = NULL;
-	struct xfs_dquot	*gdqp = NULL;
-	struct xfs_dquot	*pdqp = NULL;
-	uint			resblks;
-	uint			log_res;
-	uint			log_count;
-
-	trace_xfs_create(dp, name);
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return XFS_ERROR(EIO);
-
-	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
-		prid = xfs_get_projid(dp);
-	else
-		prid = XFS_PROJID_DEFAULT;
-
-	/*
-	 * Make sure that we have allocated dquot(s) on disk.
-	 */
-	error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
-					XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
-					&udqp, &gdqp, &pdqp);
-	if (error)
-		return error;
-
-	if (is_dir) {
-		rdev = 0;
-		resblks = XFS_MKDIR_SPACE_RES(mp, name->len);
-		log_res = XFS_MKDIR_LOG_RES(mp);
-		log_count = XFS_MKDIR_LOG_COUNT;
-		tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);
-	} else {
-		resblks = XFS_CREATE_SPACE_RES(mp, name->len);
-		log_res = XFS_CREATE_LOG_RES(mp);
-		log_count = XFS_CREATE_LOG_COUNT;
-		tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
-	}
-
-	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
-
-	/*
-	 * Initially assume that the file does not exist and
-	 * reserve the resources for that case.  If that is not
-	 * the case we'll drop the one we have and get a more
-	 * appropriate transaction later.
-	 */
-	error = xfs_trans_reserve(tp, resblks, log_res, 0,
-			XFS_TRANS_PERM_LOG_RES, log_count);
-	if (error == ENOSPC) {
-		/* flush outstanding delalloc blocks and retry */
-		xfs_flush_inodes(mp);
-		error = xfs_trans_reserve(tp, resblks, log_res, 0,
-				XFS_TRANS_PERM_LOG_RES, log_count);
-	}
-	if (error == ENOSPC) {
-		/* No space at all so try a "no-allocation" reservation */
-		resblks = 0;
-		error = xfs_trans_reserve(tp, 0, log_res, 0,
-				XFS_TRANS_PERM_LOG_RES, log_count);
-	}
-	if (error) {
-		cancel_flags = 0;
-		goto out_trans_cancel;
-	}
-
-	xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
-	unlock_dp_on_error = true;
-
-	xfs_bmap_init(&free_list, &first_block);
-
-	/*
-	 * Reserve disk quota and the inode.
-	 */
-	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
-						pdqp, resblks, 1, 0);
-	if (error)
-		goto out_trans_cancel;
-
-	error = xfs_dir_canenter(tp, dp, name, resblks);
-	if (error)
-		goto out_trans_cancel;
-
-	/*
-	 * A newly created regular or special file just has one directory
-	 * entry pointing to them, but a directory also the "." entry
-	 * pointing to itself.
-	 */
-	error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
-			       prid, resblks > 0, &ip, &committed);
-	if (error) {
-		if (error == ENOSPC)
-			goto out_trans_cancel;
-		goto out_trans_abort;
-	}
-
-	/*
-	 * Now we join the directory inode to the transaction.  We do not do it
-	 * earlier because xfs_dir_ialloc might commit the previous transaction
-	 * (and release all the locks).  An error from here on will result in
-	 * the transaction cancel unlocking dp so don't do it explicitly in the
-	 * error path.
-	 */
-	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
-	unlock_dp_on_error = false;
-
-	error = xfs_dir_createname(tp, dp, name, ip->i_ino,
-					&first_block, &free_list, resblks ?
-					resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
-	if (error) {
-		ASSERT(error != ENOSPC);
-		goto out_trans_abort;
-	}
-	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-	xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
-
-	if (is_dir) {
-		error = xfs_dir_init(tp, ip, dp);
-		if (error)
-			goto out_bmap_cancel;
-
-		error = xfs_bumplink(tp, dp);
-		if (error)
-			goto out_bmap_cancel;
-	}
-
-	/*
-	 * If this is a synchronous mount, make sure that the
-	 * create transaction goes to disk before returning to
-	 * the user.
-	 */
-	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
-		xfs_trans_set_sync(tp);
-
-	/*
-	 * Attach the dquot(s) to the inodes and modify them incore.
-	 * These ids of the inode couldn't have changed since the new
-	 * inode has been locked ever since it was created.
-	 */
-	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
-
-	error = xfs_bmap_finish(&tp, &free_list, &committed);
-	if (error)
-		goto out_bmap_cancel;
-
-	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-	if (error)
-		goto out_release_inode;
-
-	xfs_qm_dqrele(udqp);
-	xfs_qm_dqrele(gdqp);
-	xfs_qm_dqrele(pdqp);
-
-	*ipp = ip;
-	return 0;
-
- out_bmap_cancel:
-	xfs_bmap_cancel(&free_list);
- out_trans_abort:
-	cancel_flags |= XFS_TRANS_ABORT;
- out_trans_cancel:
-	xfs_trans_cancel(tp, cancel_flags);
- out_release_inode:
-	/*
-	 * Wait until after the current transaction is aborted to
-	 * release the inode.  This prevents recursive transactions
-	 * and deadlocks from xfs_inactive.
-	 */
-	if (ip)
-		IRELE(ip);
-
-	xfs_qm_dqrele(udqp);
-	xfs_qm_dqrele(gdqp);
-	xfs_qm_dqrele(pdqp);
-
-	if (unlock_dp_on_error)
-		xfs_iunlock(dp, XFS_ILOCK_EXCL);
-	return error;
-}
-
-#ifdef DEBUG
-int xfs_locked_n;
-int xfs_small_retries;
-int xfs_middle_retries;
-int xfs_lots_retries;
-int xfs_lock_delays;
-#endif
-
-/*
- * Bump the subclass so xfs_lock_inodes() acquires each lock with
- * a different value
- */
-static inline int
-xfs_lock_inumorder(int lock_mode, int subclass)
-{
-	if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
-		lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
-	if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
-		lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
-
-	return lock_mode;
-}
-
-/*
- * The following routine will lock n inodes in exclusive mode.
- * We assume the caller calls us with the inodes in i_ino order.
- *
- * We need to detect deadlock where an inode that we lock
- * is in the AIL and we start waiting for another inode that is locked
- * by a thread in a long running transaction (such as truncate). This can
- * result in deadlock since the long running trans might need to wait
- * for the inode we just locked in order to push the tail and free space
- * in the log.
- */
-void
-xfs_lock_inodes(
-	xfs_inode_t	**ips,
-	int		inodes,
-	uint		lock_mode)
-{
-	int		attempts = 0, i, j, try_lock;
-	xfs_log_item_t	*lp;
-
-	ASSERT(ips && (inodes >= 2)); /* we need at least two */
-
-	try_lock = 0;
-	i = 0;
-
-again:
-	for (; i < inodes; i++) {
-		ASSERT(ips[i]);
-
-		if (i && (ips[i] == ips[i-1]))	/* Already locked */
-			continue;
-
-		/*
-		 * If try_lock is not set yet, make sure all locked inodes
-		 * are not in the AIL.
-		 * If any are, set try_lock to be used later.
-		 */
-
-		if (!try_lock) {
-			for (j = (i - 1); j >= 0 && !try_lock; j--) {
-				lp = (xfs_log_item_t *)ips[j]->i_itemp;
-				if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
-					try_lock++;
-				}
-			}
-		}
-
-		/*
-		 * If any of the previous locks we have locked is in the AIL,
-		 * we must TRY to get the second and subsequent locks. If
-		 * we can't get any, we must release all we have
-		 * and try again.
-		 */
-
-		if (try_lock) {
-			/* try_lock must be 0 if i is 0. */
-			/*
-			 * try_lock means we have an inode locked
-			 * that is in the AIL.
-			 */
-			ASSERT(i != 0);
-			if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) {
-				attempts++;
-
-				/*
-				 * Unlock all previous guys and try again.
-				 * xfs_iunlock will try to push the tail
-				 * if the inode is in the AIL.
-				 */
-
-				for(j = i - 1; j >= 0; j--) {
-
-					/*
-					 * Check to see if we've already
-					 * unlocked this one.
-					 * Not the first one going back,
-					 * and the inode ptr is the same.
-					 */
-					if ((j != (i - 1)) && ips[j] ==
-								ips[j+1])
-						continue;
-
-					xfs_iunlock(ips[j], lock_mode);
-				}
-
-				if ((attempts % 5) == 0) {
-					delay(1); /* Don't just spin the CPU */
-#ifdef DEBUG
-					xfs_lock_delays++;
-#endif
-				}
-				i = 0;
-				try_lock = 0;
-				goto again;
-			}
-		} else {
-			xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
-		}
-	}
-
-#ifdef DEBUG
-	if (attempts) {
-		if (attempts < 5) xfs_small_retries++;
-		else if (attempts < 100) xfs_middle_retries++;
-		else xfs_lots_retries++;
-	} else {
-		xfs_locked_n++;
-	}
-#endif
-}
-
-/*
- * xfs_lock_two_inodes() can only be used to lock one type of lock
- * at a time - the iolock or the ilock, but not both at once. If
- * we lock both at once, lockdep will report false positives saying
- * we have violated locking orders.
- */
-void
-xfs_lock_two_inodes(
-	xfs_inode_t		*ip0,
-	xfs_inode_t		*ip1,
-	uint			lock_mode)
-{
-	xfs_inode_t		*temp;
-	int			attempts = 0;
-	xfs_log_item_t		*lp;
-
-	if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
-		ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0);
-	ASSERT(ip0->i_ino != ip1->i_ino);
-
-	if (ip0->i_ino > ip1->i_ino) {
-		temp = ip0;
-		ip0 = ip1;
-		ip1 = temp;
-	}
-
- again:
-	xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0));
-
-	/*
-	 * If the first lock we have locked is in the AIL, we must TRY to get
-	 * the second lock. If we can't get it, we must release the first one
-	 * and try again.
-	 */
-	lp = (xfs_log_item_t *)ip0->i_itemp;
-	if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
-		if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) {
-			xfs_iunlock(ip0, lock_mode);
-			if ((++attempts % 5) == 0)
-				delay(1); /* Don't just spin the CPU */
-			goto again;
-		}
-	} else {
-		xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1));
-	}
-}
-
-int
-xfs_remove(
-	xfs_inode_t             *dp,
-	struct xfs_name		*name,
-	xfs_inode_t		*ip)
-{
-	xfs_mount_t		*mp = dp->i_mount;
-	xfs_trans_t             *tp = NULL;
-	int			is_dir = S_ISDIR(ip->i_d.di_mode);
-	int                     error = 0;
-	xfs_bmap_free_t         free_list;
-	xfs_fsblock_t           first_block;
-	int			cancel_flags;
-	int			committed;
-	int			link_zero;
-	uint			resblks;
-	uint			log_count;
-
-	trace_xfs_remove(dp, name);
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return XFS_ERROR(EIO);
-
-	error = xfs_qm_dqattach(dp, 0);
-	if (error)
-		goto std_return;
-
-	error = xfs_qm_dqattach(ip, 0);
-	if (error)
-		goto std_return;
-
-	if (is_dir) {
-		tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
-		log_count = XFS_DEFAULT_LOG_COUNT;
-	} else {
-		tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
-		log_count = XFS_REMOVE_LOG_COUNT;
-	}
-	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
-
-	/*
-	 * We try to get the real space reservation first,
-	 * allowing for directory btree deletion(s) implying
-	 * possible bmap insert(s).  If we can't get the space
-	 * reservation then we use 0 instead, and avoid the bmap
-	 * btree insert(s) in the directory code by, if the bmap
-	 * insert tries to happen, instead trimming the LAST
-	 * block from the directory.
-	 */
-	resblks = XFS_REMOVE_SPACE_RES(mp);
-	error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
-				  XFS_TRANS_PERM_LOG_RES, log_count);
-	if (error == ENOSPC) {
-		resblks = 0;
-		error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
-					  XFS_TRANS_PERM_LOG_RES, log_count);
-	}
-	if (error) {
-		ASSERT(error != ENOSPC);
-		cancel_flags = 0;
-		goto out_trans_cancel;
-	}
-
-	xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
-
-	xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
-	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-
-	/*
-	 * If we're removing a directory perform some additional validation.
-	 */
-	if (is_dir) {
-		ASSERT(ip->i_d.di_nlink >= 2);
-		if (ip->i_d.di_nlink != 2) {
-			error = XFS_ERROR(ENOTEMPTY);
-			goto out_trans_cancel;
-		}
-		if (!xfs_dir_isempty(ip)) {
-			error = XFS_ERROR(ENOTEMPTY);
-			goto out_trans_cancel;
-		}
-	}
-
-	xfs_bmap_init(&free_list, &first_block);
-	error = xfs_dir_removename(tp, dp, name, ip->i_ino,
-					&first_block, &free_list, resblks);
-	if (error) {
-		ASSERT(error != ENOENT);
-		goto out_bmap_cancel;
-	}
-	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-
-	if (is_dir) {
-		/*
-		 * Drop the link from ip's "..".
-		 */
-		error = xfs_droplink(tp, dp);
-		if (error)
-			goto out_bmap_cancel;
-
-		/*
-		 * Drop the "." link from ip to self.
-		 */
-		error = xfs_droplink(tp, ip);
-		if (error)
-			goto out_bmap_cancel;
-	} else {
-		/*
-		 * When removing a non-directory we need to log the parent
-		 * inode here.  For a directory this is done implicitly
-		 * by the xfs_droplink call for the ".." entry.
-		 */
-		xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
-	}
-
-	/*
-	 * Drop the link from dp to ip.
-	 */
-	error = xfs_droplink(tp, ip);
-	if (error)
-		goto out_bmap_cancel;
-
-	/*
-	 * Determine if this is the last link while
-	 * we are in the transaction.
-	 */
-	link_zero = (ip->i_d.di_nlink == 0);
-
-	/*
-	 * If this is a synchronous mount, make sure that the
-	 * remove transaction goes to disk before returning to
-	 * the user.
-	 */
-	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
-		xfs_trans_set_sync(tp);
-
-	error = xfs_bmap_finish(&tp, &free_list, &committed);
-	if (error)
-		goto out_bmap_cancel;
-
-	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-	if (error)
-		goto std_return;
-
-	/*
-	 * If we are using filestreams, kill the stream association.
-	 * If the file is still open it may get a new one but that
-	 * will get killed on last close in xfs_close() so we don't
-	 * have to worry about that.
-	 */
-	if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
-		xfs_filestream_deassociate(ip);
-
-	return 0;
-
- out_bmap_cancel:
-	xfs_bmap_cancel(&free_list);
-	cancel_flags |= XFS_TRANS_ABORT;
- out_trans_cancel:
-	xfs_trans_cancel(tp, cancel_flags);
- std_return:
-	return error;
-}
-
-int
-xfs_link(
-	xfs_inode_t		*tdp,
-	xfs_inode_t		*sip,
-	struct xfs_name		*target_name)
-{
-	xfs_mount_t		*mp = tdp->i_mount;
-	xfs_trans_t		*tp;
-	int			error;
-	xfs_bmap_free_t         free_list;
-	xfs_fsblock_t           first_block;
-	int			cancel_flags;
-	int			committed;
-	int			resblks;
-
-	trace_xfs_link(tdp, target_name);
-
-	ASSERT(!S_ISDIR(sip->i_d.di_mode));
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return XFS_ERROR(EIO);
-
-	error = xfs_qm_dqattach(sip, 0);
-	if (error)
-		goto std_return;
-
-	error = xfs_qm_dqattach(tdp, 0);
-	if (error)
-		goto std_return;
-
-	tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
-	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
-	resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
-	error = xfs_trans_reserve(tp, resblks, XFS_LINK_LOG_RES(mp), 0,
-			XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT);
-	if (error == ENOSPC) {
-		resblks = 0;
-		error = xfs_trans_reserve(tp, 0, XFS_LINK_LOG_RES(mp), 0,
-				XFS_TRANS_PERM_LOG_RES, XFS_LINK_LOG_COUNT);
-	}
-	if (error) {
-		cancel_flags = 0;
-		goto error_return;
-	}
-
-	xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
-
-	xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
-	xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
-
-	/*
-	 * If we are using project inheritance, we only allow hard link
-	 * creation in our tree when the project IDs are the same; else
-	 * the tree quota mechanism could be circumvented.
-	 */
-	if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
-		     (xfs_get_projid(tdp) != xfs_get_projid(sip)))) {
-		error = XFS_ERROR(EXDEV);
-		goto error_return;
-	}
-
-	error = xfs_dir_canenter(tp, tdp, target_name, resblks);
-	if (error)
-		goto error_return;
-
-	xfs_bmap_init(&free_list, &first_block);
-
-	error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
-					&first_block, &free_list, resblks);
-	if (error)
-		goto abort_return;
-	xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-	xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
-
-	error = xfs_bumplink(tp, sip);
-	if (error)
-		goto abort_return;
-
-	/*
-	 * If this is a synchronous mount, make sure that the
-	 * link transaction goes to disk before returning to
-	 * the user.
-	 */
-	if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
-		xfs_trans_set_sync(tp);
-	}
-
-	error = xfs_bmap_finish (&tp, &free_list, &committed);
-	if (error) {
-		xfs_bmap_cancel(&free_list);
-		goto abort_return;
-	}
-
-	return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-
- abort_return:
-	cancel_flags |= XFS_TRANS_ABORT;
- error_return:
-	xfs_trans_cancel(tp, cancel_flags);
- std_return:
-	return error;
-}
-
-int
-xfs_set_dmattrs(
-	xfs_inode_t     *ip,
-	u_int		evmask,
-	u_int16_t	state)
-{
-	xfs_mount_t	*mp = ip->i_mount;
-	xfs_trans_t	*tp;
-	int		error;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return XFS_ERROR(EPERM);
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return XFS_ERROR(EIO);
-
-	tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS);
-	error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES (mp), 0, 0, 0);
-	if (error) {
-		xfs_trans_cancel(tp, 0);
-		return error;
-	}
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-
-	ip->i_d.di_dmevmask = evmask;
-	ip->i_d.di_dmstate  = state;
-
-	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-	error = xfs_trans_commit(tp, 0);
-
-	return error;
-}
-
-/*
- * xfs_alloc_file_space()
- *      This routine allocates disk space for the given file.
- *
- *	If alloc_type == 0, this request is for an ALLOCSP type
- *	request which will change the file size.  In this case, no
- *	DMAPI event will be generated by the call.  A TRUNCATE event
- *	will be generated later by xfs_setattr.
- *
- *	If alloc_type != 0, this request is for a RESVSP type
- *	request, and a DMAPI DM_EVENT_WRITE will be generated if the
- *	lower block boundary byte address is less than the file's
- *	length.
- *
- * RETURNS:
- *       0 on success
- *      errno on error
- *
- */
-STATIC int
-xfs_alloc_file_space(
-	xfs_inode_t		*ip,
-	xfs_off_t		offset,
-	xfs_off_t		len,
-	int			alloc_type,
-	int			attr_flags)
-{
-	xfs_mount_t		*mp = ip->i_mount;
-	xfs_off_t		count;
-	xfs_filblks_t		allocated_fsb;
-	xfs_filblks_t		allocatesize_fsb;
-	xfs_extlen_t		extsz, temp;
-	xfs_fileoff_t		startoffset_fsb;
-	xfs_fsblock_t		firstfsb;
-	int			nimaps;
-	int			quota_flag;
-	int			rt;
-	xfs_trans_t		*tp;
-	xfs_bmbt_irec_t		imaps[1], *imapp;
-	xfs_bmap_free_t		free_list;
-	uint			qblocks, resblks, resrtextents;
-	int			committed;
-	int			error;
-
-	trace_xfs_alloc_file_space(ip);
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return XFS_ERROR(EIO);
-
-	error = xfs_qm_dqattach(ip, 0);
-	if (error)
-		return error;
-
-	if (len <= 0)
-		return XFS_ERROR(EINVAL);
-
-	rt = XFS_IS_REALTIME_INODE(ip);
-	extsz = xfs_get_extsz_hint(ip);
-
-	count = len;
-	imapp = &imaps[0];
-	nimaps = 1;
-	startoffset_fsb	= XFS_B_TO_FSBT(mp, offset);
-	allocatesize_fsb = XFS_B_TO_FSB(mp, count);
-
-	/*
-	 * Allocate file space until done or until there is an error
-	 */
-	while (allocatesize_fsb && !error) {
-		xfs_fileoff_t	s, e;
-
-		/*
-		 * Determine space reservations for data/realtime.
-		 */
-		if (unlikely(extsz)) {
-			s = startoffset_fsb;
-			do_div(s, extsz);
-			s *= extsz;
-			e = startoffset_fsb + allocatesize_fsb;
-			if ((temp = do_mod(startoffset_fsb, extsz)))
-				e += temp;
-			if ((temp = do_mod(e, extsz)))
-				e += extsz - temp;
-		} else {
-			s = 0;
-			e = allocatesize_fsb;
-		}
-
-		/*
-		 * The transaction reservation is limited to a 32-bit block
-		 * count, hence we need to limit the number of blocks we are
-		 * trying to reserve to avoid an overflow. We can't allocate
-		 * more than @nimaps extents, and an extent is limited on disk
-		 * to MAXEXTLEN (21 bits), so use that to enforce the limit.
-		 */
-		resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps));
-		if (unlikely(rt)) {
-			resrtextents = qblocks = resblks;
-			resrtextents /= mp->m_sb.sb_rextsize;
-			resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
-			quota_flag = XFS_QMOPT_RES_RTBLKS;
-		} else {
-			resrtextents = 0;
-			resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks);
-			quota_flag = XFS_QMOPT_RES_REGBLKS;
-		}
-
-		/*
-		 * Allocate and setup the transaction.
-		 */
-		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-		error = xfs_trans_reserve(tp, resblks,
-					  XFS_WRITE_LOG_RES(mp), resrtextents,
-					  XFS_TRANS_PERM_LOG_RES,
-					  XFS_WRITE_LOG_COUNT);
-		/*
-		 * Check for running out of space
-		 */
-		if (error) {
-			/*
-			 * Free the transaction structure.
-			 */
-			ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
-			xfs_trans_cancel(tp, 0);
-			break;
-		}
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks,
-						      0, quota_flag);
-		if (error)
-			goto error1;
-
-		xfs_trans_ijoin(tp, ip, 0);
-
-		xfs_bmap_init(&free_list, &firstfsb);
-		error = xfs_bmapi_write(tp, ip, startoffset_fsb,
-					allocatesize_fsb, alloc_type, &firstfsb,
-					0, imapp, &nimaps, &free_list);
-		if (error) {
-			goto error0;
-		}
-
-		/*
-		 * Complete the transaction
-		 */
-		error = xfs_bmap_finish(&tp, &free_list, &committed);
-		if (error) {
-			goto error0;
-		}
-
-		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-		if (error) {
-			break;
-		}
-
-		allocated_fsb = imapp->br_blockcount;
-
-		if (nimaps == 0) {
-			error = XFS_ERROR(ENOSPC);
-			break;
-		}
-
-		startoffset_fsb += allocated_fsb;
-		allocatesize_fsb -= allocated_fsb;
-	}
-
-	return error;
-
-error0:	/* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
-	xfs_bmap_cancel(&free_list);
-	xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
-
-error1:	/* Just cancel transaction */
-	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	return error;
-}
-
-/*
- * Zero file bytes between startoff and endoff inclusive.
- * The iolock is held exclusive and no blocks are buffered.
- *
- * This function is used by xfs_free_file_space() to zero
- * partial blocks when the range to free is not block aligned.
- * When unreserving space with boundaries that are not block
- * aligned we round up the start and round down the end
- * boundaries and then use this function to zero the parts of
- * the blocks that got dropped during the rounding.
- */
-STATIC int
-xfs_zero_remaining_bytes(
-	xfs_inode_t		*ip,
-	xfs_off_t		startoff,
-	xfs_off_t		endoff)
-{
-	xfs_bmbt_irec_t		imap;
-	xfs_fileoff_t		offset_fsb;
-	xfs_off_t		lastoffset;
-	xfs_off_t		offset;
-	xfs_buf_t		*bp;
-	xfs_mount_t		*mp = ip->i_mount;
-	int			nimap;
-	int			error = 0;
-
-	/*
-	 * Avoid doing I/O beyond eof - it's not necessary
-	 * since nothing can read beyond eof.  The space will
-	 * be zeroed when the file is extended anyway.
-	 */
-	if (startoff >= XFS_ISIZE(ip))
-		return 0;
-
-	if (endoff > XFS_ISIZE(ip))
-		endoff = XFS_ISIZE(ip);
-
-	bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ?
-					mp->m_rtdev_targp : mp->m_ddev_targp,
-				  BTOBB(mp->m_sb.sb_blocksize), 0);
-	if (!bp)
-		return XFS_ERROR(ENOMEM);
-
-	xfs_buf_unlock(bp);
-
-	for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
-		offset_fsb = XFS_B_TO_FSBT(mp, offset);
-		nimap = 1;
-		error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0);
-		if (error || nimap < 1)
-			break;
-		ASSERT(imap.br_blockcount >= 1);
-		ASSERT(imap.br_startoff == offset_fsb);
-		lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1;
-		if (lastoffset > endoff)
-			lastoffset = endoff;
-		if (imap.br_startblock == HOLESTARTBLOCK)
-			continue;
-		ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
-		if (imap.br_state == XFS_EXT_UNWRITTEN)
-			continue;
-		XFS_BUF_UNDONE(bp);
-		XFS_BUF_UNWRITE(bp);
-		XFS_BUF_READ(bp);
-		XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
-		xfsbdstrat(mp, bp);
-		error = xfs_buf_iowait(bp);
-		if (error) {
-			xfs_buf_ioerror_alert(bp,
-					"xfs_zero_remaining_bytes(read)");
-			break;
-		}
-		memset(bp->b_addr +
-			(offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
-		      0, lastoffset - offset + 1);
-		XFS_BUF_UNDONE(bp);
-		XFS_BUF_UNREAD(bp);
-		XFS_BUF_WRITE(bp);
-		xfsbdstrat(mp, bp);
-		error = xfs_buf_iowait(bp);
-		if (error) {
-			xfs_buf_ioerror_alert(bp,
-					"xfs_zero_remaining_bytes(write)");
-			break;
-		}
-	}
-	xfs_buf_free(bp);
-	return error;
-}
-
-/*
- * xfs_free_file_space()
- *      This routine frees disk space for the given file.
- *
- *	This routine is only called by xfs_change_file_space
- *	for an UNRESVSP type call.
- *
- * RETURNS:
- *       0 on success
- *      errno on error
- *
- */
-STATIC int
-xfs_free_file_space(
-	xfs_inode_t		*ip,
-	xfs_off_t		offset,
-	xfs_off_t		len,
-	int			attr_flags)
-{
-	int			committed;
-	int			done;
-	xfs_fileoff_t		endoffset_fsb;
-	int			error;
-	xfs_fsblock_t		firstfsb;
-	xfs_bmap_free_t		free_list;
-	xfs_bmbt_irec_t		imap;
-	xfs_off_t		ioffset;
-	xfs_extlen_t		mod=0;
-	xfs_mount_t		*mp;
-	int			nimap;
-	uint			resblks;
-	xfs_off_t		rounding;
-	int			rt;
-	xfs_fileoff_t		startoffset_fsb;
-	xfs_trans_t		*tp;
-	int			need_iolock = 1;
-
-	mp = ip->i_mount;
-
-	trace_xfs_free_file_space(ip);
-
-	error = xfs_qm_dqattach(ip, 0);
-	if (error)
-		return error;
-
-	error = 0;
-	if (len <= 0)	/* if nothing being freed */
-		return error;
-	rt = XFS_IS_REALTIME_INODE(ip);
-	startoffset_fsb	= XFS_B_TO_FSB(mp, offset);
-	endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
-
-	if (attr_flags & XFS_ATTR_NOLOCK)
-		need_iolock = 0;
-	if (need_iolock) {
-		xfs_ilock(ip, XFS_IOLOCK_EXCL);
-		/* wait for the completion of any pending DIOs */
-		inode_dio_wait(VFS_I(ip));
-	}
-
-	rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
-	ioffset = offset & ~(rounding - 1);
-	error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
-					      ioffset, -1);
-	if (error)
-		goto out_unlock_iolock;
-	truncate_pagecache_range(VFS_I(ip), ioffset, -1);
-
-	/*
-	 * Need to zero the stuff we're not freeing, on disk.
-	 * If it's a realtime file & can't use unwritten extents then we
-	 * actually need to zero the extent edges.  Otherwise xfs_bunmapi
-	 * will take care of it for us.
-	 */
-	if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
-		nimap = 1;
-		error = xfs_bmapi_read(ip, startoffset_fsb, 1,
-					&imap, &nimap, 0);
-		if (error)
-			goto out_unlock_iolock;
-		ASSERT(nimap == 0 || nimap == 1);
-		if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
-			xfs_daddr_t	block;
-
-			ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
-			block = imap.br_startblock;
-			mod = do_div(block, mp->m_sb.sb_rextsize);
-			if (mod)
-				startoffset_fsb += mp->m_sb.sb_rextsize - mod;
-		}
-		nimap = 1;
-		error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1,
-					&imap, &nimap, 0);
-		if (error)
-			goto out_unlock_iolock;
-		ASSERT(nimap == 0 || nimap == 1);
-		if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
-			ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
-			mod++;
-			if (mod && (mod != mp->m_sb.sb_rextsize))
-				endoffset_fsb -= mod;
-		}
-	}
-	if ((done = (endoffset_fsb <= startoffset_fsb)))
-		/*
-		 * One contiguous piece to clear
-		 */
-		error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1);
-	else {
-		/*
-		 * Some full blocks, possibly two pieces to clear
-		 */
-		if (offset < XFS_FSB_TO_B(mp, startoffset_fsb))
-			error = xfs_zero_remaining_bytes(ip, offset,
-				XFS_FSB_TO_B(mp, startoffset_fsb) - 1);
-		if (!error &&
-		    XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len)
-			error = xfs_zero_remaining_bytes(ip,
-				XFS_FSB_TO_B(mp, endoffset_fsb),
-				offset + len - 1);
-	}
-
-	/*
-	 * free file space until done or until there is an error
-	 */
-	resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
-	while (!error && !done) {
-
-		/*
-		 * allocate and setup the transaction. Allow this
-		 * transaction to dip into the reserve blocks to ensure
-		 * the freeing of the space succeeds at ENOSPC.
-		 */
-		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-		tp->t_flags |= XFS_TRANS_RESERVE;
-		error = xfs_trans_reserve(tp,
-					  resblks,
-					  XFS_WRITE_LOG_RES(mp),
-					  0,
-					  XFS_TRANS_PERM_LOG_RES,
-					  XFS_WRITE_LOG_COUNT);
-
-		/*
-		 * check for running out of space
-		 */
-		if (error) {
-			/*
-			 * Free the transaction structure.
-			 */
-			ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
-			xfs_trans_cancel(tp, 0);
-			break;
-		}
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		error = xfs_trans_reserve_quota(tp, mp,
-				ip->i_udquot, ip->i_gdquot, ip->i_pdquot,
-				resblks, 0, XFS_QMOPT_RES_REGBLKS);
-		if (error)
-			goto error1;
-
-		xfs_trans_ijoin(tp, ip, 0);
-
-		/*
-		 * issue the bunmapi() call to free the blocks
-		 */
-		xfs_bmap_init(&free_list, &firstfsb);
-		error = xfs_bunmapi(tp, ip, startoffset_fsb,
-				  endoffset_fsb - startoffset_fsb,
-				  0, 2, &firstfsb, &free_list, &done);
-		if (error) {
-			goto error0;
-		}
-
-		/*
-		 * complete the transaction
-		 */
-		error = xfs_bmap_finish(&tp, &free_list, &committed);
-		if (error) {
-			goto error0;
-		}
-
-		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	}
-
- out_unlock_iolock:
-	if (need_iolock)
-		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-	return error;
-
- error0:
-	xfs_bmap_cancel(&free_list);
- error1:
-	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
-	xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) :
-		    XFS_ILOCK_EXCL);
-	return error;
-}
-
-
-STATIC int
-xfs_zero_file_space(
-	struct xfs_inode	*ip,
-	xfs_off_t		offset,
-	xfs_off_t		len,
-	int			attr_flags)
-{
-	struct xfs_mount	*mp = ip->i_mount;
-	uint			granularity;
-	xfs_off_t		start_boundary;
-	xfs_off_t		end_boundary;
-	int			error;
-
-	granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
-
-	/*
-	 * Round the range of extents we are going to convert inwards.  If the
-	 * offset is aligned, then it doesn't get changed so we zero from the
-	 * start of the block offset points to.
-	 */
-	start_boundary = round_up(offset, granularity);
-	end_boundary = round_down(offset + len, granularity);
-
-	ASSERT(start_boundary >= offset);
-	ASSERT(end_boundary <= offset + len);
-
-	if (!(attr_flags & XFS_ATTR_NOLOCK))
-		xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
-	if (start_boundary < end_boundary - 1) {
-		/* punch out the page cache over the conversion range */
-		truncate_pagecache_range(VFS_I(ip), start_boundary,
-					 end_boundary - 1);
-		/* convert the blocks */
-		error = xfs_alloc_file_space(ip, start_boundary,
-					end_boundary - start_boundary - 1,
-					XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT,
-					attr_flags);
-		if (error)
-			goto out_unlock;
-
-		/* We've handled the interior of the range, now for the edges */
-		if (start_boundary != offset)
-			error = xfs_iozero(ip, offset, start_boundary - offset);
-		if (error)
-			goto out_unlock;
-
-		if (end_boundary != offset + len)
-			error = xfs_iozero(ip, end_boundary,
-					   offset + len - end_boundary);
-
-	} else {
-		/*
-		 * It's either a sub-granularity range or the range spanned lies
-		 * partially across two adjacent blocks.
-		 */
-		error = xfs_iozero(ip, offset, len);
-	}
-
-out_unlock:
-	if (!(attr_flags & XFS_ATTR_NOLOCK))
-		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-	return error;
-
-}
-
-/*
- * xfs_change_file_space()
- *      This routine allocates or frees disk space for the given file.
- *      The user specified parameters are checked for alignment and size
- *      limitations.
- *
- * RETURNS:
- *       0 on success
- *      errno on error
- *
- */
-int
-xfs_change_file_space(
-	xfs_inode_t	*ip,
-	int		cmd,
-	xfs_flock64_t	*bf,
-	xfs_off_t	offset,
-	int		attr_flags)
-{
-	xfs_mount_t	*mp = ip->i_mount;
-	int		clrprealloc;
-	int		error;
-	xfs_fsize_t	fsize;
-	int		setprealloc;
-	xfs_off_t	startoffset;
-	xfs_trans_t	*tp;
-	struct iattr	iattr;
-
-	if (!S_ISREG(ip->i_d.di_mode))
-		return XFS_ERROR(EINVAL);
-
-	switch (bf->l_whence) {
-	case 0: /*SEEK_SET*/
-		break;
-	case 1: /*SEEK_CUR*/
-		bf->l_start += offset;
-		break;
-	case 2: /*SEEK_END*/
-		bf->l_start += XFS_ISIZE(ip);
-		break;
-	default:
-		return XFS_ERROR(EINVAL);
-	}
-
-	/*
-	 * length of <= 0 for resv/unresv/zero is invalid.  length for
-	 * alloc/free is ignored completely and we have no idea what userspace
-	 * might have set it to, so set it to zero to allow range
-	 * checks to pass.
-	 */
-	switch (cmd) {
-	case XFS_IOC_ZERO_RANGE:
-	case XFS_IOC_RESVSP:
-	case XFS_IOC_RESVSP64:
-	case XFS_IOC_UNRESVSP:
-	case XFS_IOC_UNRESVSP64:
-		if (bf->l_len <= 0)
-			return XFS_ERROR(EINVAL);
-		break;
-	default:
-		bf->l_len = 0;
-		break;
-	}
-
-	if (bf->l_start < 0 ||
-	    bf->l_start > mp->m_super->s_maxbytes ||
-	    bf->l_start + bf->l_len < 0 ||
-	    bf->l_start + bf->l_len >= mp->m_super->s_maxbytes)
-		return XFS_ERROR(EINVAL);
-
-	bf->l_whence = 0;
-
-	startoffset = bf->l_start;
-	fsize = XFS_ISIZE(ip);
-
-	setprealloc = clrprealloc = 0;
-	switch (cmd) {
-	case XFS_IOC_ZERO_RANGE:
-		error = xfs_zero_file_space(ip, startoffset, bf->l_len,
-						attr_flags);
-		if (error)
-			return error;
-		setprealloc = 1;
-		break;
-
-	case XFS_IOC_RESVSP:
-	case XFS_IOC_RESVSP64:
-		error = xfs_alloc_file_space(ip, startoffset, bf->l_len,
-						XFS_BMAPI_PREALLOC, attr_flags);
-		if (error)
-			return error;
-		setprealloc = 1;
-		break;
-
-	case XFS_IOC_UNRESVSP:
-	case XFS_IOC_UNRESVSP64:
-		if ((error = xfs_free_file_space(ip, startoffset, bf->l_len,
-								attr_flags)))
-			return error;
-		break;
-
-	case XFS_IOC_ALLOCSP:
-	case XFS_IOC_ALLOCSP64:
-	case XFS_IOC_FREESP:
-	case XFS_IOC_FREESP64:
-		/*
-		 * These operations actually do IO when extending the file, but
-		 * the allocation is done seperately to the zeroing that is
-		 * done. This set of operations need to be serialised against
-		 * other IO operations, such as truncate and buffered IO. We
-		 * need to take the IOLOCK here to serialise the allocation and
-		 * zeroing IO to prevent other IOLOCK holders (e.g. getbmap,
-		 * truncate, direct IO) from racing against the transient
-		 * allocated but not written state we can have here.
-		 */
-		xfs_ilock(ip, XFS_IOLOCK_EXCL);
-		if (startoffset > fsize) {
-			error = xfs_alloc_file_space(ip, fsize,
-					startoffset - fsize, 0,
-					attr_flags | XFS_ATTR_NOLOCK);
-			if (error) {
-				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-				break;
-			}
-		}
-
-		iattr.ia_valid = ATTR_SIZE;
-		iattr.ia_size = startoffset;
-
-		error = xfs_setattr_size(ip, &iattr,
-					 attr_flags | XFS_ATTR_NOLOCK);
-		xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-
-		if (error)
-			return error;
-
-		clrprealloc = 1;
-		break;
-
-	default:
-		ASSERT(0);
-		return XFS_ERROR(EINVAL);
-	}
-
-	/*
-	 * update the inode timestamp, mode, and prealloc flag bits
-	 */
-	tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID);
-
-	if ((error = xfs_trans_reserve(tp, 0, XFS_WRITEID_LOG_RES(mp),
-				      0, 0, 0))) {
-		/* ASSERT(0); */
-		xfs_trans_cancel(tp, 0);
-		return error;
-	}
-
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-
-	if ((attr_flags & XFS_ATTR_DMI) == 0) {
-		ip->i_d.di_mode &= ~S_ISUID;
-
-		/*
-		 * Note that we don't have to worry about mandatory
-		 * file locking being disabled here because we only
-		 * clear the S_ISGID bit if the Group execute bit is
-		 * on, but if it was on then mandatory locking wouldn't
-		 * have been enabled.
-		 */
-		if (ip->i_d.di_mode & S_IXGRP)
-			ip->i_d.di_mode &= ~S_ISGID;
-
-		xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-	}
-	if (setprealloc)
-		ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
-	else if (clrprealloc)
-		ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC;
-
-	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-	if (attr_flags & XFS_ATTR_SYNC)
-		xfs_trans_set_sync(tp);
-	return xfs_trans_commit(tp, 0);
-}
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
deleted file mode 100644
index 38c67c3..0000000
--- a/fs/xfs/xfs_vnodeops.h
+++ /dev/null
@@ -1,55 +0,0 @@
-#ifndef _XFS_VNODEOPS_H
-#define _XFS_VNODEOPS_H 1
-
-struct attrlist_cursor_kern;
-struct file;
-struct iattr;
-struct inode;
-struct iovec;
-struct kiocb;
-struct pipe_inode_info;
-struct uio;
-struct xfs_inode;
-
-
-int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags);
-int xfs_setattr_size(struct xfs_inode *ip, struct iattr *vap, int flags);
-#define	XFS_ATTR_DMI		0x01	/* invocation from a DMI function */
-#define	XFS_ATTR_NONBLOCK	0x02	/* return EAGAIN if operation would block */
-#define XFS_ATTR_NOLOCK		0x04	/* Don't grab any conflicting locks */
-#define XFS_ATTR_NOACL		0x08	/* Don't call xfs_acl_chmod */
-#define XFS_ATTR_SYNC		0x10	/* synchronous operation required */
-
-int xfs_readlink(struct xfs_inode *ip, char *link);
-int xfs_release(struct xfs_inode *ip);
-int xfs_inactive(struct xfs_inode *ip);
-int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
-		struct xfs_inode **ipp, struct xfs_name *ci_name);
-int xfs_create(struct xfs_inode *dp, struct xfs_name *name, umode_t mode,
-		xfs_dev_t rdev, struct xfs_inode **ipp);
-int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
-		struct xfs_inode *ip);
-int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
-		struct xfs_name *target_name);
-int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx, size_t bufsize);
-int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
-		const char *target_path, umode_t mode, struct xfs_inode **ipp);
-int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
-int xfs_change_file_space(struct xfs_inode *ip, int cmd,
-		xfs_flock64_t *bf, xfs_off_t offset, int attr_flags);
-int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
-		struct xfs_inode *src_ip, struct xfs_inode *target_dp,
-		struct xfs_name *target_name, struct xfs_inode *target_ip);
-int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name,
-		unsigned char *value, int *valuelenp, int flags);
-int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,
-		unsigned char *value, int valuelen, int flags);
-int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags);
-int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
-		int flags, struct attrlist_cursor_kern *cursor);
-
-int xfs_iozero(struct xfs_inode *, loff_t, size_t);
-int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
-int xfs_free_eofblocks(struct xfs_mount *, struct xfs_inode *, bool);
-
-#endif /* _XFS_VNODEOPS_H */
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index 87d3e03..e01f35e 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -17,13 +17,13 @@
  */
 
 #include "xfs.h"
+#include "xfs_log_format.h"
 #include "xfs_da_btree.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_inode.h"
 #include "xfs_attr.h"
 #include "xfs_attr_leaf.h"
 #include "xfs_acl.h"
-#include "xfs_vnodeops.h"
 
 #include <linux/posix_acl_xattr.h>
 #include <linux/xattr.h>
diff --git a/include/asm-generic/dma-contiguous.h b/include/asm-generic/dma-contiguous.h
deleted file mode 100644
index 294b1e7..0000000
--- a/include/asm-generic/dma-contiguous.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef ASM_DMA_CONTIGUOUS_H
-#define ASM_DMA_CONTIGUOUS_H
-
-#ifdef __KERNEL__
-#ifdef CONFIG_CMA
-
-#include <linux/device.h>
-#include <linux/dma-contiguous.h>
-
-static inline struct cma *dev_get_cma_area(struct device *dev)
-{
-	if (dev && dev->cma_area)
-		return dev->cma_area;
-	return dma_contiguous_default_area;
-}
-
-static inline void dev_set_cma_area(struct device *dev, struct cma *cma)
-{
-	if (dev)
-		dev->cma_area = cma;
-	if (!dev && !dma_contiguous_default_area)
-		dma_contiguous_default_area = cma;
-}
-
-#endif
-#endif
-
-#endif
diff --git a/include/dt-bindings/clock/samsung,s3c64xx-clock.h b/include/dt-bindings/clock/samsung,s3c64xx-clock.h
new file mode 100644
index 0000000..ad95c7f
--- /dev/null
+++ b/include/dt-bindings/clock/samsung,s3c64xx-clock.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2013 Tomasz Figa <tomasz.figa at gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Device Tree binding constants for Samsung S3C64xx clock controller.
+*/
+
+#ifndef _DT_BINDINGS_CLOCK_SAMSUNG_S3C64XX_CLOCK_H
+#define _DT_BINDINGS_CLOCK_SAMSUNG_S3C64XX_CLOCK_H
+
+/*
+ * Let each exported clock get a unique index, which is used on DT-enabled
+ * platforms to lookup the clock from a clock specifier. These indices are
+ * therefore considered an ABI and so must not be changed. This implies
+ * that new clocks should be added either in free spaces between clock groups
+ * or at the end.
+ */
+
+/* Core clocks. */
+#define CLK27M			1
+#define CLK48M			2
+#define FOUT_APLL		3
+#define FOUT_MPLL		4
+#define FOUT_EPLL		5
+#define ARMCLK			6
+#define HCLKX2			7
+#define HCLK			8
+#define PCLK			9
+
+/* HCLK bus clocks. */
+#define HCLK_3DSE		16
+#define HCLK_UHOST		17
+#define HCLK_SECUR		18
+#define HCLK_SDMA1		19
+#define HCLK_SDMA0		20
+#define HCLK_IROM		21
+#define HCLK_DDR1		22
+#define HCLK_MEM1		23
+#define HCLK_MEM0		24
+#define HCLK_USB		25
+#define HCLK_HSMMC2		26
+#define HCLK_HSMMC1		27
+#define HCLK_HSMMC0		28
+#define HCLK_MDP		29
+#define HCLK_DHOST		30
+#define HCLK_IHOST		31
+#define HCLK_DMA1		32
+#define HCLK_DMA0		33
+#define HCLK_JPEG		34
+#define HCLK_CAMIF		35
+#define HCLK_SCALER		36
+#define HCLK_2D			37
+#define HCLK_TV			38
+#define HCLK_POST0		39
+#define HCLK_ROT		40
+#define HCLK_LCD		41
+#define HCLK_TZIC		42
+#define HCLK_INTC		43
+#define HCLK_MFC		44
+#define HCLK_DDR0		45
+
+/* PCLK bus clocks. */
+#define PCLK_IIC1		48
+#define PCLK_IIS2		49
+#define PCLK_SKEY		50
+#define PCLK_CHIPID		51
+#define PCLK_SPI1		52
+#define PCLK_SPI0		53
+#define PCLK_HSIRX		54
+#define PCLK_HSITX		55
+#define PCLK_GPIO		56
+#define PCLK_IIC0		57
+#define PCLK_IIS1		58
+#define PCLK_IIS0		59
+#define PCLK_AC97		60
+#define PCLK_TZPC		61
+#define PCLK_TSADC		62
+#define PCLK_KEYPAD		63
+#define PCLK_IRDA		64
+#define PCLK_PCM1		65
+#define PCLK_PCM0		66
+#define PCLK_PWM		67
+#define PCLK_RTC		68
+#define PCLK_WDT		69
+#define PCLK_UART3		70
+#define PCLK_UART2		71
+#define PCLK_UART1		72
+#define PCLK_UART0		73
+#define PCLK_MFC		74
+
+/* Special clocks. */
+#define SCLK_UHOST		80
+#define SCLK_MMC2_48		81
+#define SCLK_MMC1_48		82
+#define SCLK_MMC0_48		83
+#define SCLK_MMC2		84
+#define SCLK_MMC1		85
+#define SCLK_MMC0		86
+#define SCLK_SPI1_48		87
+#define SCLK_SPI0_48		88
+#define SCLK_SPI1		89
+#define SCLK_SPI0		90
+#define SCLK_DAC27		91
+#define SCLK_TV27		92
+#define SCLK_SCALER27		93
+#define SCLK_SCALER		94
+#define SCLK_LCD27		95
+#define SCLK_LCD		96
+#define SCLK_FIMC		97
+#define SCLK_POST0_27		98
+#define SCLK_AUDIO2		99
+#define SCLK_POST0		100
+#define SCLK_AUDIO1		101
+#define SCLK_AUDIO0		102
+#define SCLK_SECUR		103
+#define SCLK_IRDA		104
+#define SCLK_UART		105
+#define SCLK_MFC		106
+#define SCLK_CAM		107
+#define SCLK_JPEG		108
+#define SCLK_ONENAND		109
+
+/* MEM0 bus clocks - S3C6410-specific. */
+#define MEM0_CFCON		112
+#define MEM0_ONENAND1		113
+#define MEM0_ONENAND0		114
+#define MEM0_NFCON		115
+#define MEM0_SROM		116
+
+/* Muxes. */
+#define MOUT_APLL		128
+#define MOUT_MPLL		129
+#define MOUT_EPLL		130
+#define MOUT_MFC		131
+#define MOUT_AUDIO0		132
+#define MOUT_AUDIO1		133
+#define MOUT_UART		134
+#define MOUT_SPI0		135
+#define MOUT_SPI1		136
+#define MOUT_MMC0		137
+#define MOUT_MMC1		138
+#define MOUT_MMC2		139
+#define MOUT_UHOST		140
+#define MOUT_IRDA		141
+#define MOUT_LCD		142
+#define MOUT_SCALER		143
+#define MOUT_DAC27		144
+#define MOUT_TV27		145
+#define MOUT_AUDIO2		146
+
+/* Dividers. */
+#define DOUT_MPLL		160
+#define DOUT_SECUR		161
+#define DOUT_CAM		162
+#define DOUT_JPEG		163
+#define DOUT_MFC		164
+#define DOUT_MMC0		165
+#define DOUT_MMC1		166
+#define DOUT_MMC2		167
+#define DOUT_LCD		168
+#define DOUT_SCALER		169
+#define DOUT_UHOST		170
+#define DOUT_SPI0		171
+#define DOUT_SPI1		172
+#define DOUT_AUDIO0		173
+#define DOUT_AUDIO1		174
+#define DOUT_UART		175
+#define DOUT_IRDA		176
+#define DOUT_FIMC		177
+#define DOUT_AUDIO2		178
+
+/* Total number of clocks. */
+#define NR_CLKS			(DOUT_AUDIO2 + 1)
+
+#endif /* _DT_BINDINGS_CLOCK_SAMSUNG_S3C64XX_CLOCK_H */
diff --git a/include/dt-bindings/input/input.h b/include/dt-bindings/input/input.h
new file mode 100644
index 0000000..042e7b3
--- /dev/null
+++ b/include/dt-bindings/input/input.h
@@ -0,0 +1,525 @@
+/*
+ * This header provides constants for most input bindings.
+ *
+ * Most input bindings include key code, matrix key code format.
+ * In most cases, key code and matrix key code format uses
+ * the standard values/macro defined in this header.
+ */
+
+#ifndef _DT_BINDINGS_INPUT_INPUT_H
+#define _DT_BINDINGS_INPUT_INPUT_H
+
+#define KEY_RESERVED		0
+#define KEY_ESC			1
+#define KEY_1			2
+#define KEY_2			3
+#define KEY_3			4
+#define KEY_4			5
+#define KEY_5			6
+#define KEY_6			7
+#define KEY_7			8
+#define KEY_8			9
+#define KEY_9			10
+#define KEY_0			11
+#define KEY_MINUS		12
+#define KEY_EQUAL		13
+#define KEY_BACKSPACE		14
+#define KEY_TAB			15
+#define KEY_Q			16
+#define KEY_W			17
+#define KEY_E			18
+#define KEY_R			19
+#define KEY_T			20
+#define KEY_Y			21
+#define KEY_U			22
+#define KEY_I			23
+#define KEY_O			24
+#define KEY_P			25
+#define KEY_LEFTBRACE		26
+#define KEY_RIGHTBRACE		27
+#define KEY_ENTER		28
+#define KEY_LEFTCTRL		29
+#define KEY_A			30
+#define KEY_S			31
+#define KEY_D			32
+#define KEY_F			33
+#define KEY_G			34
+#define KEY_H			35
+#define KEY_J			36
+#define KEY_K			37
+#define KEY_L			38
+#define KEY_SEMICOLON		39
+#define KEY_APOSTROPHE		40
+#define KEY_GRAVE		41
+#define KEY_LEFTSHIFT		42
+#define KEY_BACKSLASH		43
+#define KEY_Z			44
+#define KEY_X			45
+#define KEY_C			46
+#define KEY_V			47
+#define KEY_B			48
+#define KEY_N			49
+#define KEY_M			50
+#define KEY_COMMA		51
+#define KEY_DOT			52
+#define KEY_SLASH		53
+#define KEY_RIGHTSHIFT		54
+#define KEY_KPASTERISK		55
+#define KEY_LEFTALT		56
+#define KEY_SPACE		57
+#define KEY_CAPSLOCK		58
+#define KEY_F1			59
+#define KEY_F2			60
+#define KEY_F3			61
+#define KEY_F4			62
+#define KEY_F5			63
+#define KEY_F6			64
+#define KEY_F7			65
+#define KEY_F8			66
+#define KEY_F9			67
+#define KEY_F10			68
+#define KEY_NUMLOCK		69
+#define KEY_SCROLLLOCK		70
+#define KEY_KP7			71
+#define KEY_KP8			72
+#define KEY_KP9			73
+#define KEY_KPMINUS		74
+#define KEY_KP4			75
+#define KEY_KP5			76
+#define KEY_KP6			77
+#define KEY_KPPLUS		78
+#define KEY_KP1			79
+#define KEY_KP2			80
+#define KEY_KP3			81
+#define KEY_KP0			82
+#define KEY_KPDOT		83
+
+#define KEY_ZENKAKUHANKAKU	85
+#define KEY_102ND		86
+#define KEY_F11			87
+#define KEY_F12			88
+#define KEY_RO			89
+#define KEY_KATAKANA		90
+#define KEY_HIRAGANA		91
+#define KEY_HENKAN		92
+#define KEY_KATAKANAHIRAGANA	93
+#define KEY_MUHENKAN		94
+#define KEY_KPJPCOMMA		95
+#define KEY_KPENTER		96
+#define KEY_RIGHTCTRL		97
+#define KEY_KPSLASH		98
+#define KEY_SYSRQ		99
+#define KEY_RIGHTALT		100
+#define KEY_LINEFEED		101
+#define KEY_HOME		102
+#define KEY_UP			103
+#define KEY_PAGEUP		104
+#define KEY_LEFT		105
+#define KEY_RIGHT		106
+#define KEY_END			107
+#define KEY_DOWN		108
+#define KEY_PAGEDOWN		109
+#define KEY_INSERT		110
+#define KEY_DELETE		111
+#define KEY_MACRO		112
+#define KEY_MUTE		113
+#define KEY_VOLUMEDOWN		114
+#define KEY_VOLUMEUP		115
+#define KEY_POWER		116	/* SC System Power Down */
+#define KEY_KPEQUAL		117
+#define KEY_KPPLUSMINUS		118
+#define KEY_PAUSE		119
+#define KEY_SCALE		120	/* AL Compiz Scale (Expose) */
+
+#define KEY_KPCOMMA		121
+#define KEY_HANGEUL		122
+#define KEY_HANGUEL		KEY_HANGEUL
+#define KEY_HANJA		123
+#define KEY_YEN			124
+#define KEY_LEFTMETA		125
+#define KEY_RIGHTMETA		126
+#define KEY_COMPOSE		127
+
+#define KEY_STOP		128	/* AC Stop */
+#define KEY_AGAIN		129
+#define KEY_PROPS		130	/* AC Properties */
+#define KEY_UNDO		131	/* AC Undo */
+#define KEY_FRONT		132
+#define KEY_COPY		133	/* AC Copy */
+#define KEY_OPEN		134	/* AC Open */
+#define KEY_PASTE		135	/* AC Paste */
+#define KEY_FIND		136	/* AC Search */
+#define KEY_CUT			137	/* AC Cut */
+#define KEY_HELP		138	/* AL Integrated Help Center */
+#define KEY_MENU		139	/* Menu (show menu) */
+#define KEY_CALC		140	/* AL Calculator */
+#define KEY_SETUP		141
+#define KEY_SLEEP		142	/* SC System Sleep */
+#define KEY_WAKEUP		143	/* System Wake Up */
+#define KEY_FILE		144	/* AL Local Machine Browser */
+#define KEY_SENDFILE		145
+#define KEY_DELETEFILE		146
+#define KEY_XFER		147
+#define KEY_PROG1		148
+#define KEY_PROG2		149
+#define KEY_WWW			150	/* AL Internet Browser */
+#define KEY_MSDOS		151
+#define KEY_COFFEE		152	/* AL Terminal Lock/Screensaver */
+#define KEY_SCREENLOCK		KEY_COFFEE
+#define KEY_DIRECTION		153
+#define KEY_CYCLEWINDOWS	154
+#define KEY_MAIL		155
+#define KEY_BOOKMARKS		156	/* AC Bookmarks */
+#define KEY_COMPUTER		157
+#define KEY_BACK		158	/* AC Back */
+#define KEY_FORWARD		159	/* AC Forward */
+#define KEY_CLOSECD		160
+#define KEY_EJECTCD		161
+#define KEY_EJECTCLOSECD	162
+#define KEY_NEXTSONG		163
+#define KEY_PLAYPAUSE		164
+#define KEY_PREVIOUSSONG	165
+#define KEY_STOPCD		166
+#define KEY_RECORD		167
+#define KEY_REWIND		168
+#define KEY_PHONE		169	/* Media Select Telephone */
+#define KEY_ISO			170
+#define KEY_CONFIG		171	/* AL Consumer Control Configuration */
+#define KEY_HOMEPAGE		172	/* AC Home */
+#define KEY_REFRESH		173	/* AC Refresh */
+#define KEY_EXIT		174	/* AC Exit */
+#define KEY_MOVE		175
+#define KEY_EDIT		176
+#define KEY_SCROLLUP		177
+#define KEY_SCROLLDOWN		178
+#define KEY_KPLEFTPAREN		179
+#define KEY_KPRIGHTPAREN	180
+#define KEY_NEW			181	/* AC New */
+#define KEY_REDO		182	/* AC Redo/Repeat */
+
+#define KEY_F13			183
+#define KEY_F14			184
+#define KEY_F15			185
+#define KEY_F16			186
+#define KEY_F17			187
+#define KEY_F18			188
+#define KEY_F19			189
+#define KEY_F20			190
+#define KEY_F21			191
+#define KEY_F22			192
+#define KEY_F23			193
+#define KEY_F24			194
+
+#define KEY_PLAYCD		200
+#define KEY_PAUSECD		201
+#define KEY_PROG3		202
+#define KEY_PROG4		203
+#define KEY_DASHBOARD		204	/* AL Dashboard */
+#define KEY_SUSPEND		205
+#define KEY_CLOSE		206	/* AC Close */
+#define KEY_PLAY		207
+#define KEY_FASTFORWARD		208
+#define KEY_BASSBOOST		209
+#define KEY_PRINT		210	/* AC Print */
+#define KEY_HP			211
+#define KEY_CAMERA		212
+#define KEY_SOUND		213
+#define KEY_QUESTION		214
+#define KEY_EMAIL		215
+#define KEY_CHAT		216
+#define KEY_SEARCH		217
+#define KEY_CONNECT		218
+#define KEY_FINANCE		219	/* AL Checkbook/Finance */
+#define KEY_SPORT		220
+#define KEY_SHOP		221
+#define KEY_ALTERASE		222
+#define KEY_CANCEL		223	/* AC Cancel */
+#define KEY_BRIGHTNESSDOWN	224
+#define KEY_BRIGHTNESSUP	225
+#define KEY_MEDIA		226
+
+#define KEY_SWITCHVIDEOMODE	227	/* Cycle between available video
+					   outputs (Monitor/LCD/TV-out/etc) */
+#define KEY_KBDILLUMTOGGLE	228
+#define KEY_KBDILLUMDOWN	229
+#define KEY_KBDILLUMUP		230
+
+#define KEY_SEND		231	/* AC Send */
+#define KEY_REPLY		232	/* AC Reply */
+#define KEY_FORWARDMAIL		233	/* AC Forward Msg */
+#define KEY_SAVE		234	/* AC Save */
+#define KEY_DOCUMENTS		235
+
+#define KEY_BATTERY		236
+
+#define KEY_BLUETOOTH		237
+#define KEY_WLAN		238
+#define KEY_UWB			239
+
+#define KEY_UNKNOWN		240
+
+#define KEY_VIDEO_NEXT		241	/* drive next video source */
+#define KEY_VIDEO_PREV		242	/* drive previous video source */
+#define KEY_BRIGHTNESS_CYCLE	243	/* brightness up, after max is min */
+#define KEY_BRIGHTNESS_ZERO	244	/* brightness off, use ambient */
+#define KEY_DISPLAY_OFF		245	/* display device to off state */
+
+#define KEY_WIMAX		246
+#define KEY_RFKILL		247	/* Key that controls all radios */
+
+#define KEY_MICMUTE		248	/* Mute / unmute the microphone */
+
+/* Code 255 is reserved for special needs of AT keyboard driver */
+
+#define BTN_MISC		0x100
+#define BTN_0			0x100
+#define BTN_1			0x101
+#define BTN_2			0x102
+#define BTN_3			0x103
+#define BTN_4			0x104
+#define BTN_5			0x105
+#define BTN_6			0x106
+#define BTN_7			0x107
+#define BTN_8			0x108
+#define BTN_9			0x109
+
+#define BTN_MOUSE		0x110
+#define BTN_LEFT		0x110
+#define BTN_RIGHT		0x111
+#define BTN_MIDDLE		0x112
+#define BTN_SIDE		0x113
+#define BTN_EXTRA		0x114
+#define BTN_FORWARD		0x115
+#define BTN_BACK		0x116
+#define BTN_TASK		0x117
+
+#define BTN_JOYSTICK		0x120
+#define BTN_TRIGGER		0x120
+#define BTN_THUMB		0x121
+#define BTN_THUMB2		0x122
+#define BTN_TOP			0x123
+#define BTN_TOP2		0x124
+#define BTN_PINKIE		0x125
+#define BTN_BASE		0x126
+#define BTN_BASE2		0x127
+#define BTN_BASE3		0x128
+#define BTN_BASE4		0x129
+#define BTN_BASE5		0x12a
+#define BTN_BASE6		0x12b
+#define BTN_DEAD		0x12f
+
+#define BTN_GAMEPAD		0x130
+#define BTN_SOUTH		0x130
+#define BTN_A			BTN_SOUTH
+#define BTN_EAST		0x131
+#define BTN_B			BTN_EAST
+#define BTN_C			0x132
+#define BTN_NORTH		0x133
+#define BTN_X			BTN_NORTH
+#define BTN_WEST		0x134
+#define BTN_Y			BTN_WEST
+#define BTN_Z			0x135
+#define BTN_TL			0x136
+#define BTN_TR			0x137
+#define BTN_TL2			0x138
+#define BTN_TR2			0x139
+#define BTN_SELECT		0x13a
+#define BTN_START		0x13b
+#define BTN_MODE		0x13c
+#define BTN_THUMBL		0x13d
+#define BTN_THUMBR		0x13e
+
+#define BTN_DIGI		0x140
+#define BTN_TOOL_PEN		0x140
+#define BTN_TOOL_RUBBER		0x141
+#define BTN_TOOL_BRUSH		0x142
+#define BTN_TOOL_PENCIL		0x143
+#define BTN_TOOL_AIRBRUSH	0x144
+#define BTN_TOOL_FINGER		0x145
+#define BTN_TOOL_MOUSE		0x146
+#define BTN_TOOL_LENS		0x147
+#define BTN_TOOL_QUINTTAP	0x148	/* Five fingers on trackpad */
+#define BTN_TOUCH		0x14a
+#define BTN_STYLUS		0x14b
+#define BTN_STYLUS2		0x14c
+#define BTN_TOOL_DOUBLETAP	0x14d
+#define BTN_TOOL_TRIPLETAP	0x14e
+#define BTN_TOOL_QUADTAP	0x14f	/* Four fingers on trackpad */
+
+#define BTN_WHEEL		0x150
+#define BTN_GEAR_DOWN		0x150
+#define BTN_GEAR_UP		0x151
+
+#define KEY_OK			0x160
+#define KEY_SELECT		0x161
+#define KEY_GOTO		0x162
+#define KEY_CLEAR		0x163
+#define KEY_POWER2		0x164
+#define KEY_OPTION		0x165
+#define KEY_INFO		0x166	/* AL OEM Features/Tips/Tutorial */
+#define KEY_TIME		0x167
+#define KEY_VENDOR		0x168
+#define KEY_ARCHIVE		0x169
+#define KEY_PROGRAM		0x16a	/* Media Select Program Guide */
+#define KEY_CHANNEL		0x16b
+#define KEY_FAVORITES		0x16c
+#define KEY_EPG			0x16d
+#define KEY_PVR			0x16e	/* Media Select Home */
+#define KEY_MHP			0x16f
+#define KEY_LANGUAGE		0x170
+#define KEY_TITLE		0x171
+#define KEY_SUBTITLE		0x172
+#define KEY_ANGLE		0x173
+#define KEY_ZOOM		0x174
+#define KEY_MODE		0x175
+#define KEY_KEYBOARD		0x176
+#define KEY_SCREEN		0x177
+#define KEY_PC			0x178	/* Media Select Computer */
+#define KEY_TV			0x179	/* Media Select TV */
+#define KEY_TV2			0x17a	/* Media Select Cable */
+#define KEY_VCR			0x17b	/* Media Select VCR */
+#define KEY_VCR2		0x17c	/* VCR Plus */
+#define KEY_SAT			0x17d	/* Media Select Satellite */
+#define KEY_SAT2		0x17e
+#define KEY_CD			0x17f	/* Media Select CD */
+#define KEY_TAPE		0x180	/* Media Select Tape */
+#define KEY_RADIO		0x181
+#define KEY_TUNER		0x182	/* Media Select Tuner */
+#define KEY_PLAYER		0x183
+#define KEY_TEXT		0x184
+#define KEY_DVD			0x185	/* Media Select DVD */
+#define KEY_AUX			0x186
+#define KEY_MP3			0x187
+#define KEY_AUDIO		0x188	/* AL Audio Browser */
+#define KEY_VIDEO		0x189	/* AL Movie Browser */
+#define KEY_DIRECTORY		0x18a
+#define KEY_LIST		0x18b
+#define KEY_MEMO		0x18c	/* Media Select Messages */
+#define KEY_CALENDAR		0x18d
+#define KEY_RED			0x18e
+#define KEY_GREEN		0x18f
+#define KEY_YELLOW		0x190
+#define KEY_BLUE		0x191
+#define KEY_CHANNELUP		0x192	/* Channel Increment */
+#define KEY_CHANNELDOWN		0x193	/* Channel Decrement */
+#define KEY_FIRST		0x194
+#define KEY_LAST		0x195	/* Recall Last */
+#define KEY_AB			0x196
+#define KEY_NEXT		0x197
+#define KEY_RESTART		0x198
+#define KEY_SLOW		0x199
+#define KEY_SHUFFLE		0x19a
+#define KEY_BREAK		0x19b
+#define KEY_PREVIOUS		0x19c
+#define KEY_DIGITS		0x19d
+#define KEY_TEEN		0x19e
+#define KEY_TWEN		0x19f
+#define KEY_VIDEOPHONE		0x1a0	/* Media Select Video Phone */
+#define KEY_GAMES		0x1a1	/* Media Select Games */
+#define KEY_ZOOMIN		0x1a2	/* AC Zoom In */
+#define KEY_ZOOMOUT		0x1a3	/* AC Zoom Out */
+#define KEY_ZOOMRESET		0x1a4	/* AC Zoom */
+#define KEY_WORDPROCESSOR	0x1a5	/* AL Word Processor */
+#define KEY_EDITOR		0x1a6	/* AL Text Editor */
+#define KEY_SPREADSHEET		0x1a7	/* AL Spreadsheet */
+#define KEY_GRAPHICSEDITOR	0x1a8	/* AL Graphics Editor */
+#define KEY_PRESENTATION	0x1a9	/* AL Presentation App */
+#define KEY_DATABASE		0x1aa	/* AL Database App */
+#define KEY_NEWS		0x1ab	/* AL Newsreader */
+#define KEY_VOICEMAIL		0x1ac	/* AL Voicemail */
+#define KEY_ADDRESSBOOK		0x1ad	/* AL Contacts/Address Book */
+#define KEY_MESSENGER		0x1ae	/* AL Instant Messaging */
+#define KEY_DISPLAYTOGGLE	0x1af	/* Turn display (LCD) on and off */
+#define KEY_SPELLCHECK		0x1b0   /* AL Spell Check */
+#define KEY_LOGOFF		0x1b1   /* AL Logoff */
+
+#define KEY_DOLLAR		0x1b2
+#define KEY_EURO		0x1b3
+
+#define KEY_FRAMEBACK		0x1b4	/* Consumer - transport controls */
+#define KEY_FRAMEFORWARD	0x1b5
+#define KEY_CONTEXT_MENU	0x1b6	/* GenDesc - system context menu */
+#define KEY_MEDIA_REPEAT	0x1b7	/* Consumer - transport control */
+#define KEY_10CHANNELSUP	0x1b8	/* 10 channels up (10+) */
+#define KEY_10CHANNELSDOWN	0x1b9	/* 10 channels down (10-) */
+#define KEY_IMAGES		0x1ba	/* AL Image Browser */
+
+#define KEY_DEL_EOL		0x1c0
+#define KEY_DEL_EOS		0x1c1
+#define KEY_INS_LINE		0x1c2
+#define KEY_DEL_LINE		0x1c3
+
+#define KEY_FN			0x1d0
+#define KEY_FN_ESC		0x1d1
+#define KEY_FN_F1		0x1d2
+#define KEY_FN_F2		0x1d3
+#define KEY_FN_F3		0x1d4
+#define KEY_FN_F4		0x1d5
+#define KEY_FN_F5		0x1d6
+#define KEY_FN_F6		0x1d7
+#define KEY_FN_F7		0x1d8
+#define KEY_FN_F8		0x1d9
+#define KEY_FN_F9		0x1da
+#define KEY_FN_F10		0x1db
+#define KEY_FN_F11		0x1dc
+#define KEY_FN_F12		0x1dd
+#define KEY_FN_1		0x1de
+#define KEY_FN_2		0x1df
+#define KEY_FN_D		0x1e0
+#define KEY_FN_E		0x1e1
+#define KEY_FN_F		0x1e2
+#define KEY_FN_S		0x1e3
+#define KEY_FN_B		0x1e4
+
+#define KEY_BRL_DOT1		0x1f1
+#define KEY_BRL_DOT2		0x1f2
+#define KEY_BRL_DOT3		0x1f3
+#define KEY_BRL_DOT4		0x1f4
+#define KEY_BRL_DOT5		0x1f5
+#define KEY_BRL_DOT6		0x1f6
+#define KEY_BRL_DOT7		0x1f7
+#define KEY_BRL_DOT8		0x1f8
+#define KEY_BRL_DOT9		0x1f9
+#define KEY_BRL_DOT10		0x1fa
+
+#define KEY_NUMERIC_0		0x200	/* used by phones, remote controls, */
+#define KEY_NUMERIC_1		0x201	/* and other keypads */
+#define KEY_NUMERIC_2		0x202
+#define KEY_NUMERIC_3		0x203
+#define KEY_NUMERIC_4		0x204
+#define KEY_NUMERIC_5		0x205
+#define KEY_NUMERIC_6		0x206
+#define KEY_NUMERIC_7		0x207
+#define KEY_NUMERIC_8		0x208
+#define KEY_NUMERIC_9		0x209
+#define KEY_NUMERIC_STAR	0x20a
+#define KEY_NUMERIC_POUND	0x20b
+
+#define KEY_CAMERA_FOCUS	0x210
+#define KEY_WPS_BUTTON		0x211	/* WiFi Protected Setup key */
+
+#define KEY_TOUCHPAD_TOGGLE	0x212	/* Request switch touchpad on or off */
+#define KEY_TOUCHPAD_ON		0x213
+#define KEY_TOUCHPAD_OFF	0x214
+
+#define KEY_CAMERA_ZOOMIN	0x215
+#define KEY_CAMERA_ZOOMOUT	0x216
+#define KEY_CAMERA_UP		0x217
+#define KEY_CAMERA_DOWN		0x218
+#define KEY_CAMERA_LEFT		0x219
+#define KEY_CAMERA_RIGHT	0x21a
+
+#define KEY_ATTENDANT_ON	0x21b
+#define KEY_ATTENDANT_OFF	0x21c
+#define KEY_ATTENDANT_TOGGLE	0x21d	/* Attendant call on or off */
+#define KEY_LIGHTS_TOGGLE	0x21e	/* Reading light on or off */
+
+#define BTN_DPAD_UP		0x220
+#define BTN_DPAD_DOWN		0x221
+#define BTN_DPAD_LEFT		0x222
+#define BTN_DPAD_RIGHT		0x223
+
+#define MATRIX_KEY(row, col, code)	\
+	((((row) & 0xFF) << 24) | (((col) & 0xFF) << 16) | ((code) & 0xFFFF))
+
+#endif /* _DT_BINDINGS_INPUT_INPUT_H */
diff --git a/include/linux/amba/pl080.h b/include/linux/amba/pl080.h
index 3e7b62f..91b84a7 100644
--- a/include/linux/amba/pl080.h
+++ b/include/linux/amba/pl080.h
@@ -87,6 +87,7 @@
 #define PL080_CONTROL_SB_SIZE_MASK		(0x7 << 12)
 #define PL080_CONTROL_SB_SIZE_SHIFT		(12)
 #define PL080_CONTROL_TRANSFER_SIZE_MASK	(0xfff << 0)
+#define PL080S_CONTROL_TRANSFER_SIZE_MASK	(0x1ffffff << 0)
 #define PL080_CONTROL_TRANSFER_SIZE_SHIFT	(0)
 
 #define PL080_BSIZE_1				(0x0)
diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h
index dd7adff..8138c94 100644
--- a/include/linux/clk-private.h
+++ b/include/linux/clk-private.h
@@ -33,8 +33,11 @@
 	const char		**parent_names;
 	struct clk		**parents;
 	u8			num_parents;
+	u8			new_parent_index;
 	unsigned long		rate;
 	unsigned long		new_rate;
+	struct clk		*new_parent;
+	struct clk		*new_child;
 	unsigned long		flags;
 	unsigned int		enable_count;
 	unsigned int		prepare_count;
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 1ec14a7..73bdb69 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -12,6 +12,7 @@
 #define __LINUX_CLK_PROVIDER_H
 
 #include <linux/clk.h>
+#include <linux/io.h>
 
 #ifdef CONFIG_COMMON_CLK
 
@@ -27,6 +28,7 @@
 #define CLK_IS_ROOT		BIT(4) /* root clk, has no parent */
 #define CLK_IS_BASIC		BIT(5) /* Basic clk, can't do a to_clk_foo() */
 #define CLK_GET_RATE_NOCACHE	BIT(6) /* do not use the cached clk rate */
+#define CLK_SET_RATE_NO_REPARENT BIT(7) /* don't re-parent on rate change */
 
 struct clk_hw;
 
@@ -79,6 +81,10 @@
  * @round_rate:	Given a target rate as input, returns the closest rate actually
  * 		supported by the clock.
  *
+ * @determine_rate: Given a target rate as input, returns the closest rate
+ *		actually supported by the clock, and optionally the parent clock
+ *		that should be used to provide the clock rate.
+ *
  * @get_parent:	Queries the hardware to determine the parent of a clock.  The
  * 		return value is a u8 which specifies the index corresponding to
  * 		the parent clock.  This index can be applied to either the
@@ -126,6 +132,9 @@
 					unsigned long parent_rate);
 	long		(*round_rate)(struct clk_hw *hw, unsigned long,
 					unsigned long *);
+	long		(*determine_rate)(struct clk_hw *hw, unsigned long rate,
+					unsigned long *best_parent_rate,
+					struct clk **best_parent_clk);
 	int		(*set_parent)(struct clk_hw *hw, u8 index);
 	u8		(*get_parent)(struct clk_hw *hw);
 	int		(*set_rate)(struct clk_hw *hw, unsigned long,
@@ -327,8 +336,10 @@
 #define CLK_MUX_INDEX_ONE		BIT(0)
 #define CLK_MUX_INDEX_BIT		BIT(1)
 #define CLK_MUX_HIWORD_MASK		BIT(2)
+#define CLK_MUX_READ_ONLY	BIT(3) /* mux setting cannot be changed */
 
 extern const struct clk_ops clk_mux_ops;
+extern const struct clk_ops clk_mux_ro_ops;
 
 struct clk *clk_register_mux(struct device *dev, const char *name,
 		const char **parent_names, u8 num_parents, unsigned long flags,
@@ -418,6 +429,7 @@
 struct clk_hw *__clk_get_hw(struct clk *clk);
 u8 __clk_get_num_parents(struct clk *clk);
 struct clk *__clk_get_parent(struct clk *clk);
+struct clk *clk_get_parent_by_index(struct clk *clk, u8 index);
 unsigned int __clk_get_enable_count(struct clk *clk);
 unsigned int __clk_get_prepare_count(struct clk *clk);
 unsigned long __clk_get_rate(struct clk *clk);
@@ -425,6 +437,9 @@
 bool __clk_is_prepared(struct clk *clk);
 bool __clk_is_enabled(struct clk *clk);
 struct clk *__clk_lookup(const char *name);
+long __clk_mux_determine_rate(struct clk_hw *hw, unsigned long rate,
+			      unsigned long *best_parent_rate,
+			      struct clk **best_parent_p);
 
 /*
  * FIXME clock api without lock protection
@@ -490,5 +505,21 @@
 #define of_clk_init(matches) \
 	{ while (0); }
 #endif /* CONFIG_OF */
+
+/*
+ * wrap access to peripherals in accessor routines
+ * for improved portability across platforms
+ */
+
+static inline u32 clk_readl(u32 __iomem *reg)
+{
+	return readl(reg);
+}
+
+static inline void clk_writel(u32 val, u32 __iomem *reg)
+{
+	writel(val, reg);
+}
+
 #endif /* CONFIG_COMMON_CLK */
 #endif /* CLK_PROVIDER_H */
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index e151d4c..653073d 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -10,6 +10,7 @@
 
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <linux/math64.h>
 #include <linux/ratelimit.h>
 
 struct dm_dev;
@@ -550,6 +551,14 @@
 #define DM_MAPIO_REMAPPED	1
 #define DM_MAPIO_REQUEUE	DM_ENDIO_REQUEUE
 
+#define dm_sector_div64(x, y)( \
+{ \
+	u64 _res; \
+	(x) = div64_u64_rem(x, y, &_res); \
+	_res; \
+} \
+)
+
 /*
  * Ceiling(n / sz)
  */
diff --git a/include/linux/device.h b/include/linux/device.h
index f46646e..2a9d6ed 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -737,7 +737,7 @@
 
 	struct dma_coherent_mem	*dma_mem; /* internal for coherent mem
 					     override */
-#ifdef CONFIG_CMA
+#ifdef CONFIG_DMA_CMA
 	struct cma *cma_area;		/* contiguous memory area for dma
 					   allocations */
 #endif
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
index 00141d3..3b28f93 100644
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -67,9 +67,53 @@
 
 extern struct cma *dma_contiguous_default_area;
 
+static inline struct cma *dev_get_cma_area(struct device *dev)
+{
+	if (dev && dev->cma_area)
+		return dev->cma_area;
+	return dma_contiguous_default_area;
+}
+
+static inline void dev_set_cma_area(struct device *dev, struct cma *cma)
+{
+	if (dev)
+		dev->cma_area = cma;
+}
+
+static inline void dma_contiguous_set_default(struct cma *cma)
+{
+	dma_contiguous_default_area = cma;
+}
+
 void dma_contiguous_reserve(phys_addr_t addr_limit);
-int dma_declare_contiguous(struct device *dev, phys_addr_t size,
-			   phys_addr_t base, phys_addr_t limit);
+
+int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
+				       phys_addr_t limit, struct cma **res_cma);
+
+/**
+ * dma_declare_contiguous() - reserve area for contiguous memory handling
+ *			      for particular device
+ * @dev:   Pointer to device structure.
+ * @size:  Size of the reserved memory.
+ * @base:  Start address of the reserved memory (optional, 0 for any).
+ * @limit: End address of the reserved memory (optional, 0 for any).
+ *
+ * This function reserves memory for specified device. It should be
+ * called by board specific code when early allocator (memblock or bootmem)
+ * is still activate.
+ */
+
+static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size,
+					 phys_addr_t base, phys_addr_t limit)
+{
+	struct cma *cma;
+	int ret;
+	ret = dma_contiguous_reserve_area(size, base, limit, &cma);
+	if (ret == 0)
+		dev_set_cma_area(dev, cma);
+
+	return ret;
+}
 
 struct page *dma_alloc_from_contiguous(struct device *dev, int count,
 				       unsigned int order);
@@ -80,8 +124,22 @@
 
 #define MAX_CMA_AREAS	(0)
 
+static inline struct cma *dev_get_cma_area(struct device *dev)
+{
+	return NULL;
+}
+
+static inline void dev_set_cma_area(struct device *dev, struct cma *cma) { }
+
+static inline void dma_contiguous_set_default(struct cma *cma) { }
+
 static inline void dma_contiguous_reserve(phys_addr_t limit) { }
 
+static inline int dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
+				       phys_addr_t limit, struct cma **res_cma) {
+	return -ENOSYS;
+}
+
 static inline
 int dma_declare_contiguous(struct device *dev, phys_addr_t size,
 			   phys_addr_t base, phys_addr_t limit)
diff --git a/include/linux/dma/mmp-pdma.h b/include/linux/dma/mmp-pdma.h
new file mode 100644
index 0000000..2dc9b2b
--- /dev/null
+++ b/include/linux/dma/mmp-pdma.h
@@ -0,0 +1,15 @@
+#ifndef _MMP_PDMA_H_
+#define _MMP_PDMA_H_
+
+struct dma_chan;
+
+#ifdef CONFIG_MMP_PDMA
+bool mmp_pdma_filter_fn(struct dma_chan *chan, void *param);
+#else
+static inline bool mmp_pdma_filter_fn(struct dma_chan *chan, void *param)
+{
+	return false;
+}
+#endif
+
+#endif /* _MMP_PDMA_H_ */
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index cb286b1..0bc7275 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -38,7 +38,10 @@
 #define DMA_MIN_COOKIE	1
 #define DMA_MAX_COOKIE	INT_MAX
 
-#define dma_submit_error(cookie) ((cookie) < 0 ? 1 : 0)
+static inline int dma_submit_error(dma_cookie_t cookie)
+{
+	return cookie < 0 ? cookie : 0;
+}
 
 /**
  * enum dma_status - DMA transaction status
@@ -370,6 +373,25 @@
 	unsigned int slave_id;
 };
 
+/* struct dma_slave_caps - expose capabilities of a slave channel only
+ *
+ * @src_addr_widths: bit mask of src addr widths the channel supports
+ * @dstn_addr_widths: bit mask of dstn addr widths the channel supports
+ * @directions: bit mask of slave direction the channel supported
+ * 	since the enum dma_transfer_direction is not defined as bits for each
+ * 	type of direction, the dma controller should fill (1 << <TYPE>) and same
+ * 	should be checked by controller as well
+ * @cmd_pause: true, if pause and thereby resume is supported
+ * @cmd_terminate: true, if terminate cmd is supported
+ */
+struct dma_slave_caps {
+	u32 src_addr_widths;
+	u32 dstn_addr_widths;
+	u32 directions;
+	bool cmd_pause;
+	bool cmd_terminate;
+};
+
 static inline const char *dma_chan_name(struct dma_chan *chan)
 {
 	return dev_name(&chan->dev->device);
@@ -532,6 +554,7 @@
  *	struct with auxiliary transfer status information, otherwise the call
  *	will just return a simple status code
  * @device_issue_pending: push pending transactions to hardware
+ * @device_slave_caps: return the slave channel capabilities
  */
 struct dma_device {
 
@@ -597,6 +620,7 @@
 					    dma_cookie_t cookie,
 					    struct dma_tx_state *txstate);
 	void (*device_issue_pending)(struct dma_chan *chan);
+	int (*device_slave_caps)(struct dma_chan *chan, struct dma_slave_caps *caps);
 };
 
 static inline int dmaengine_device_control(struct dma_chan *chan,
@@ -670,6 +694,21 @@
 	return chan->device->device_prep_interleaved_dma(chan, xt, flags);
 }
 
+static inline int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps)
+{
+	if (!chan || !caps)
+		return -EINVAL;
+
+	/* check if the channel supports slave transactions */
+	if (!test_bit(DMA_SLAVE, chan->device->cap_mask.bits))
+		return -ENXIO;
+
+	if (chan->device->device_slave_caps)
+		return chan->device->device_slave_caps(chan, caps);
+
+	return -ENXIO;
+}
+
 static inline int dmaengine_terminate_all(struct dma_chan *chan)
 {
 	return dmaengine_device_control(chan, DMA_TERMINATE_ALL, 0);
@@ -958,8 +997,9 @@
 	}
 }
 
-enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie);
 #ifdef CONFIG_DMA_ENGINE
+struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type);
+enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie);
 enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx);
 void dma_issue_pending_all(void);
 struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask,
@@ -967,6 +1007,14 @@
 struct dma_chan *dma_request_slave_channel(struct device *dev, const char *name);
 void dma_release_channel(struct dma_chan *chan);
 #else
+static inline struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type)
+{
+	return NULL;
+}
+static inline enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
+{
+	return DMA_SUCCESS;
+}
 static inline enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
 {
 	return DMA_SUCCESS;
@@ -994,7 +1042,7 @@
 int dma_async_device_register(struct dma_device *device);
 void dma_async_device_unregister(struct dma_device *device);
 void dma_run_dependencies(struct dma_async_tx_descriptor *tx);
-struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type);
+struct dma_chan *dma_get_slave_channel(struct dma_chan *chan);
 struct dma_chan *net_dma_find_channel(void);
 #define dma_request_channel(mask, x, y) __dma_request_channel(&(mask), x, y)
 #define dma_request_slave_channel_compat(mask, x, y, dev, name) \
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index cf5d2af61..ff0b981 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -9,7 +9,6 @@
 #define _LINUX_EVENTFD_H
 
 #include <linux/fcntl.h>
-#include <linux/file.h>
 #include <linux/wait.h>
 
 /*
@@ -26,6 +25,8 @@
 #define EFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
 #define EFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS | EFD_SEMAPHORE)
 
+struct file;
+
 #ifdef CONFIG_EVENTFD
 
 struct file *eventfd_file_create(unsigned int count, int flags);
diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index a9ff9a3..7823e9e 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -251,6 +251,10 @@
 	/* unpin an object in the cache */
 	void (*unpin_object)(struct fscache_object *object);
 
+	/* check the consistency between the backing cache and the FS-Cache
+	 * cookie */
+	bool (*check_consistency)(struct fscache_operation *op);
+
 	/* store the updated auxiliary data on an object */
 	void (*update_object)(struct fscache_object *object);
 
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 7a08623..19b4645 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -183,6 +183,7 @@
 	const struct fscache_cookie_def *,
 	void *);
 extern void __fscache_relinquish_cookie(struct fscache_cookie *, int);
+extern int __fscache_check_consistency(struct fscache_cookie *);
 extern void __fscache_update_cookie(struct fscache_cookie *);
 extern int __fscache_attr_changed(struct fscache_cookie *);
 extern void __fscache_invalidate(struct fscache_cookie *);
@@ -208,6 +209,8 @@
 					 gfp_t);
 extern void __fscache_uncache_all_inode_pages(struct fscache_cookie *,
 					      struct inode *);
+extern void __fscache_readpages_cancel(struct fscache_cookie *cookie,
+				       struct list_head *pages);
 
 /**
  * fscache_register_netfs - Register a filesystem as desiring caching services
@@ -326,6 +329,25 @@
 }
 
 /**
+ * fscache_check_consistency - Request that if the cache is updated
+ * @cookie: The cookie representing the cache object
+ *
+ * Request an consistency check from fscache, which passes the request
+ * to the backing cache.
+ *
+ * Returns 0 if consistent and -ESTALE if inconsistent.  May also
+ * return -ENOMEM and -ERESTARTSYS.
+ */
+static inline
+int fscache_check_consistency(struct fscache_cookie *cookie)
+{
+	if (fscache_cookie_valid(cookie))
+		return __fscache_check_consistency(cookie);
+	else
+		return 0;
+}
+
+/**
  * fscache_update_cookie - Request that a cache object be updated
  * @cookie: The cookie representing the cache object
  *
@@ -570,6 +592,26 @@
 }
 
 /**
+ * fscache_readpages_cancel - Cancel read/alloc on pages
+ * @cookie: The cookie representing the inode's cache object.
+ * @pages: The netfs pages that we canceled write on in readpages()
+ *
+ * Uncache/unreserve the pages reserved earlier in readpages() via
+ * fscache_readpages_or_alloc() and similar.  In most successful caches in
+ * readpages() this doesn't do anything.  In cases when the underlying netfs's
+ * readahead failed we need to clean up the pagelist (unmark and uncache).
+ *
+ * This function may sleep as it may have to clean up disk state.
+ */
+static inline
+void fscache_readpages_cancel(struct fscache_cookie *cookie,
+			      struct list_head *pages)
+{
+	if (fscache_cookie_valid(cookie))
+		__fscache_readpages_cancel(cookie, pages);
+}
+
+/**
  * fscache_write_page - Request storage of a page in the cache
  * @cookie: The cookie representing the cache object
  * @page: The netfs page to store
diff --git a/include/linux/fsl/mxs-dma.h b/include/linux/fsl/mxs-dma.h
deleted file mode 100644
index 55d8702..0000000
--- a/include/linux/fsl/mxs-dma.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright 2011 Freescale Semiconductor, Inc. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __MACH_MXS_DMA_H__
-#define __MACH_MXS_DMA_H__
-
-#include <linux/dmaengine.h>
-
-struct mxs_dma_data {
-	int chan_irq;
-};
-
-extern int mxs_dma_is_apbh(struct dma_chan *chan);
-extern int mxs_dma_is_apbx(struct dma_chan *chan);
-#endif /* __MACH_MXS_DMA_H__ */
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 3e203eb..0e5d9ec 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -66,6 +66,7 @@
 void gic_init_bases(unsigned int, int, void __iomem *, void __iomem *,
 		    u32 offset, struct device_node *);
 void gic_cascade_irq(unsigned int gic_nr, unsigned int irq);
+void gic_cpu_if_down(void);
 
 static inline void gic_init(unsigned int nr, int start,
 			    void __iomem *dist , void __iomem *cpu)
diff --git a/include/linux/irqchip/mmp.h b/include/linux/irqchip/mmp.h
new file mode 100644
index 0000000..c78a892
--- /dev/null
+++ b/include/linux/irqchip/mmp.h
@@ -0,0 +1,6 @@
+#ifndef	__IRQCHIP_MMP_H
+#define	__IRQCHIP_MMP_H
+
+extern struct irq_chip icu_irq_chip;
+
+#endif	/* __IRQCHIP_MMP_H */
diff --git a/include/linux/math64.h b/include/linux/math64.h
index 2913b86..69ed5f5 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -31,6 +31,15 @@
 }
 
 /**
+ * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder
+ */
+static inline u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder)
+{
+	*remainder = dividend % divisor;
+	return dividend / divisor;
+}
+
+/**
  * div64_u64 - unsigned 64bit divide with 64bit divisor
  */
 static inline u64 div64_u64(u64 dividend, u64 divisor)
@@ -63,6 +72,10 @@
 extern s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder);
 #endif
 
+#ifndef div64_u64_rem
+extern u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder);
+#endif
+
 #ifndef div64_u64
 extern u64 div64_u64(u64 dividend, u64 divisor);
 #endif
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index ce35113..b22883d 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -108,7 +108,6 @@
 	unsigned int			cd_gpio;
 	void (*set_pwr)(struct platform_device *host, int state);
 	void (*set_clk_div)(struct platform_device *host, int state);
-	int (*get_cd)(struct platform_device *host);
 	int (*write16_hook)(struct tmio_mmc_host *host, int addr);
 	/* clock management callbacks */
 	int (*clk_enable)(struct platform_device *pdev, unsigned int *f);
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 443243b..da51bec 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -208,6 +208,8 @@
 	__mmc_claim_host(host, NULL);
 }
 
+struct device_node;
 extern u32 mmc_vddrange_to_ocrmask(int vdd_min, int vdd_max);
+extern int mmc_of_parse_voltage(struct device_node *np, u32 *mask);
 
 #endif /* LINUX_MMC_CORE_H */
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index e3c6a74..3e781b8 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -171,6 +171,7 @@
 	unsigned int            ocr_avail_sdio;	/* OCR bit masks */
 	unsigned int            ocr_avail_sd;
 	unsigned int            ocr_avail_mmc;
+	u32 ocr_mask;		/* available voltages */
 
 	wait_queue_head_t	buf_ready_int;	/* Waitqueue for Buffer Read Ready interrupt */
 	unsigned int		tuning_done;	/* Condition flag set when CMD19 succeeds */
diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h
index e7d5dd6..ccd8fb2 100644
--- a/include/linux/mmc/sh_mmcif.h
+++ b/include/linux/mmc/sh_mmcif.h
@@ -16,7 +16,6 @@
 
 #include <linux/io.h>
 #include <linux/platform_device.h>
-#include <linux/sh_dma.h>
 
 /*
  * MMCIF : CE_CLK_CTRL [19:16]
@@ -33,12 +32,12 @@
  */
 
 struct sh_mmcif_plat_data {
-	void (*set_pwr)(struct platform_device *pdev, int state);
-	void (*down_pwr)(struct platform_device *pdev);
 	int (*get_cd)(struct platform_device *pdef);
 	unsigned int		slave_id_tx;	/* embedded slave_id_[tr]x */
 	unsigned int		slave_id_rx;
 	bool			use_cd_gpio : 1;
+	bool			ccs_unsupported : 1;
+	bool			clk_ctrl2_present : 1;
 	unsigned int		cd_gpio;
 	u8			sup_pclk;	/* 1 :SH7757, 0: SH7724/SH7372 */
 	unsigned long		caps;
@@ -62,6 +61,7 @@
 #define MMCIF_CE_INT_MASK	0x00000044
 #define MMCIF_CE_HOST_STS1	0x00000048
 #define MMCIF_CE_HOST_STS2	0x0000004C
+#define MMCIF_CE_CLK_CTRL2	0x00000070
 #define MMCIF_CE_VERSION	0x0000007C
 
 /* CE_BUF_ACC */
diff --git a/include/linux/mmc/sh_mobile_sdhi.h b/include/linux/mmc/sh_mobile_sdhi.h
index b76bcf0..68927ae 100644
--- a/include/linux/mmc/sh_mobile_sdhi.h
+++ b/include/linux/mmc/sh_mobile_sdhi.h
@@ -25,8 +25,6 @@
 	unsigned long tmio_caps2;
 	u32 tmio_ocr_mask;	/* available MMC voltages */
 	unsigned int cd_gpio;
-	void (*set_pwr)(struct platform_device *pdev, int state);
-	int (*get_cd)(struct platform_device *pdev);
 
 	/* callbacks for board specific setup code */
 	int (*init)(struct platform_device *pdev,
diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h
index 7d88d27..b0c73e4 100644
--- a/include/linux/mmc/slot-gpio.h
+++ b/include/linux/mmc/slot-gpio.h
@@ -18,7 +18,8 @@
 void mmc_gpio_free_ro(struct mmc_host *host);
 
 int mmc_gpio_get_cd(struct mmc_host *host);
-int mmc_gpio_request_cd(struct mmc_host *host, unsigned int gpio);
+int mmc_gpio_request_cd(struct mmc_host *host, unsigned int gpio,
+			unsigned int debounce);
 void mmc_gpio_free_cd(struct mmc_host *host);
 
 #endif
diff --git a/include/linux/mtd/bbm.h b/include/linux/mtd/bbm.h
index 211ff67..95fc482 100644
--- a/include/linux/mtd/bbm.h
+++ b/include/linux/mtd/bbm.h
@@ -93,8 +93,6 @@
 #define NAND_BBT_CREATE_EMPTY	0x00000400
 /* Search good / bad pattern through all pages of a block */
 #define NAND_BBT_SCANALLPAGES	0x00000800
-/* Scan block empty during good / bad block scan */
-#define NAND_BBT_SCANEMPTY	0x00001000
 /* Write bbt if neccecary */
 #define NAND_BBT_WRITE		0x00002000
 /* Read and write back block contents when writing bbt */
diff --git a/include/linux/mtd/fsmc.h b/include/linux/mtd/fsmc.h
index d6ed61e..c8be32e 100644
--- a/include/linux/mtd/fsmc.h
+++ b/include/linux/mtd/fsmc.h
@@ -137,6 +137,7 @@
 
 /**
  * fsmc_nand_platform_data - platform specific NAND controller config
+ * @nand_timings: timing setup for the physical NAND interface
  * @partitions: partition table for the platform, use a default fallback
  * if this is NULL
  * @nr_partitions: the number of partitions in the previous entry
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index a5cf4e8..f9bfe52 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -173,6 +173,9 @@
 	/* ECC layout structure pointer - read only! */
 	struct nand_ecclayout *ecclayout;
 
+	/* the ecc step size. */
+	unsigned int ecc_step_size;
+
 	/* max number of correctible bit errors per ecc step */
 	unsigned int ecc_strength;
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index ab63634..ac8e89d 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -56,7 +56,7 @@
  * is supported now. If you add a chip with bigger oobsize/page
  * adjust this accordingly.
  */
-#define NAND_MAX_OOBSIZE	640
+#define NAND_MAX_OOBSIZE	744
 #define NAND_MAX_PAGESIZE	8192
 
 /*
@@ -202,6 +202,10 @@
 /* Keep gcc happy */
 struct nand_chip;
 
+/* ONFI features */
+#define ONFI_FEATURE_16_BIT_BUS		(1 << 0)
+#define ONFI_FEATURE_EXT_PARAM_PAGE	(1 << 7)
+
 /* ONFI timing mode, used in both asynchronous and synchronous mode */
 #define ONFI_TIMING_MODE_0		(1 << 0)
 #define ONFI_TIMING_MODE_1		(1 << 1)
@@ -217,6 +221,9 @@
 /* ONFI subfeature parameters length */
 #define ONFI_SUBFEATURE_PARAM_LEN	4
 
+/* ONFI optional commands SET/GET FEATURES supported? */
+#define ONFI_OPT_CMD_SET_GET_FEATURES	(1 << 2)
+
 struct nand_onfi_params {
 	/* rev info and features block */
 	/* 'O' 'N' 'F' 'I'  */
@@ -224,7 +231,10 @@
 	__le16 revision;
 	__le16 features;
 	__le16 opt_cmd;
-	u8 reserved[22];
+	u8 reserved0[2];
+	__le16 ext_param_page_length; /* since ONFI 2.1 */
+	u8 num_of_param_pages;        /* since ONFI 2.1 */
+	u8 reserved1[17];
 
 	/* manufacturer information block */
 	char manufacturer[12];
@@ -281,6 +291,40 @@
 
 #define ONFI_CRC_BASE	0x4F4E
 
+/* Extended ECC information Block Definition (since ONFI 2.1) */
+struct onfi_ext_ecc_info {
+	u8 ecc_bits;
+	u8 codeword_size;
+	__le16 bb_per_lun;
+	__le16 block_endurance;
+	u8 reserved[2];
+} __packed;
+
+#define ONFI_SECTION_TYPE_0	0	/* Unused section. */
+#define ONFI_SECTION_TYPE_1	1	/* for additional sections. */
+#define ONFI_SECTION_TYPE_2	2	/* for ECC information. */
+struct onfi_ext_section {
+	u8 type;
+	u8 length;
+} __packed;
+
+#define ONFI_EXT_SECTION_MAX 8
+
+/* Extended Parameter Page Definition (since ONFI 2.1) */
+struct onfi_ext_param_page {
+	__le16 crc;
+	u8 sig[4];             /* 'E' 'P' 'P' 'S' */
+	u8 reserved0[10];
+	struct onfi_ext_section sections[ONFI_EXT_SECTION_MAX];
+
+	/*
+	 * The actual size of the Extended Parameter Page is in
+	 * @ext_param_page_length of nand_onfi_params{}.
+	 * The following are the variable length sections.
+	 * So we do not add any fields below. Please see the ONFI spec.
+	 */
+} __packed;
+
 /**
  * struct nand_hw_control - Control structure for hardware controller (e.g ECC generator) shared among independent devices
  * @lock:               protection lock
@@ -390,8 +434,8 @@
  * @write_buf:		[REPLACEABLE] write data from the buffer to the chip
  * @read_buf:		[REPLACEABLE] read data from the chip into the buffer
  * @select_chip:	[REPLACEABLE] select chip nr
- * @block_bad:		[REPLACEABLE] check, if the block is bad
- * @block_markbad:	[REPLACEABLE] mark the block bad
+ * @block_bad:		[REPLACEABLE] check if a block is bad, using OOB markers
+ * @block_markbad:	[REPLACEABLE] mark a block bad
  * @cmd_ctrl:		[BOARDSPECIFIC] hardwarespecific function for controlling
  *			ALE/CLE/nCE. Also used to write command and address
  * @init_size:		[BOARDSPECIFIC] hardwarespecific function for setting
@@ -434,6 +478,12 @@
  *			bad block marker position; i.e., BBM == 11110111b is
  *			not bad when badblockbits == 7
  * @cellinfo:		[INTERN] MLC/multichip data from chip ident
+ * @ecc_strength_ds:	[INTERN] ECC correctability from the datasheet.
+ *			Minimum amount of bit errors per @ecc_step_ds guaranteed
+ *			to be correctable. If unknown, set to zero.
+ * @ecc_step_ds:	[INTERN] ECC step required by the @ecc_strength_ds,
+ *                      also from the datasheet. It is the recommended ECC step
+ *			size, if known; if unknown, set to zero.
  * @numchips:		[INTERN] number of physical chips
  * @chipsize:		[INTERN] the size of one chip for multichip arrays
  * @pagemask:		[INTERN] page number mask = number of (pages / chip) - 1
@@ -510,6 +560,8 @@
 	unsigned int pagebuf_bitflips;
 	int subpagesize;
 	uint8_t cellinfo;
+	uint16_t ecc_strength_ds;
+	uint16_t ecc_step_ds;
 	int badblockpos;
 	int badblockbits;
 
@@ -576,6 +628,11 @@
 	{ .name = (nm), {{ .dev_id = (devid) }}, .chipsize = (chipsz), \
 	  .options = (opts) }
 
+#define NAND_ECC_INFO(_strength, _step)	\
+			{ .strength_ds = (_strength), .step_ds = (_step) }
+#define NAND_ECC_STRENGTH(type)		((type)->ecc.strength_ds)
+#define NAND_ECC_STEP(type)		((type)->ecc.step_ds)
+
 /**
  * struct nand_flash_dev - NAND Flash Device ID Structure
  * @name: a human-readable name of the NAND chip
@@ -593,6 +650,12 @@
  * @options: stores various chip bit options
  * @id_len: The valid length of the @id.
  * @oobsize: OOB size
+ * @ecc.strength_ds: The ECC correctability from the datasheet, same as the
+ *                   @ecc_strength_ds in nand_chip{}.
+ * @ecc.step_ds: The ECC step required by the @ecc.strength_ds, same as the
+ *               @ecc_step_ds in nand_chip{}, also from the datasheet.
+ *               For example, the "4bit ECC for each 512Byte" can be set with
+ *               NAND_ECC_INFO(4, 512).
  */
 struct nand_flash_dev {
 	char *name;
@@ -609,6 +672,10 @@
 	unsigned int options;
 	uint16_t id_len;
 	uint16_t oobsize;
+	struct {
+		uint16_t strength_ds;
+		uint16_t step_ds;
+	} ecc;
 };
 
 /**
@@ -625,8 +692,8 @@
 extern struct nand_manufacturers nand_manuf_ids[];
 
 extern int nand_scan_bbt(struct mtd_info *mtd, struct nand_bbt_descr *bd);
-extern int nand_update_bbt(struct mtd_info *mtd, loff_t offs);
 extern int nand_default_bbt(struct mtd_info *mtd);
+extern int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs);
 extern int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt);
 extern int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 			   int allowbbt);
@@ -708,6 +775,12 @@
 	return chip->priv;
 }
 
+/* return the supported features. */
+static inline int onfi_feature(struct nand_chip *chip)
+{
+	return chip->onfi_version ? le16_to_cpu(chip->onfi_params.features) : 0;
+}
+
 /* return the supported asynchronous timing mode. */
 static inline int onfi_get_async_timing_mode(struct nand_chip *chip)
 {
diff --git a/include/linux/namei.h b/include/linux/namei.h
index cd09751..8e47bc7 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -58,7 +58,6 @@
 
 extern int user_path_at(int, const char __user *, unsigned, struct path *);
 extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty);
-extern int user_path_umountat(int, const char __user *, unsigned int, struct path *);
 
 #define user_path(name, path) user_path_at(AT_FDCWD, name, LOOKUP_FOLLOW, path)
 #define user_lpath(name, path) user_path_at(AT_FDCWD, name, 0, path)
@@ -71,8 +70,7 @@
 extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int);
 extern void done_path_create(struct path *, struct dentry *);
 extern struct dentry *kern_path_locked(const char *, struct path *);
-extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
-			   const char *, unsigned int, struct path *);
+extern int kern_path_mountpoint(int, const char *, struct path *, unsigned int);
 
 extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 7125cef..3ea4cde 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -524,6 +524,7 @@
  * linux/fs/nfs/unlink.c
  */
 extern void nfs_complete_unlink(struct dentry *dentry, struct inode *);
+extern void nfs_wait_on_sillyrename(struct dentry *dentry);
 extern void nfs_block_sillyrename(struct dentry *dentry);
 extern void nfs_unblock_sillyrename(struct dentry *dentry);
 extern int  nfs_sillyrename(struct inode *dir, struct dentry *dentry);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index d221243..b8cedce 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -56,6 +56,7 @@
 	struct rpc_cred		*cl_machine_cred;
 
 #if IS_ENABLED(CONFIG_NFS_V4)
+	struct list_head	cl_ds_clients; /* auth flavor data servers */
 	u64			cl_clientid;	/* constant */
 	nfs4_verifier		cl_confirm;	/* Clientid verifier */
 	unsigned long		cl_state;
@@ -78,6 +79,9 @@
 	u32			cl_cb_ident;	/* v4.0 callback identifier */
 	const struct nfs4_minor_version_ops *cl_mvops;
 
+	/* NFSv4.0 transport blocking */
+	struct nfs4_slot_table	*cl_slot_tbl;
+
 	/* The sequence id to use for the next CREATE_SESSION */
 	u32			cl_seqid;
 	/* The flags used for obtaining the clientid during EXCHANGE_ID */
@@ -87,6 +91,15 @@
 	struct nfs41_server_owner *cl_serverowner;
 	struct nfs41_server_scope *cl_serverscope;
 	struct nfs41_impl_id	*cl_implid;
+	/* nfs 4.1+ state protection modes: */
+	unsigned long		cl_sp4_flags;
+#define NFS_SP4_MACH_CRED_MINIMAL  1	/* Minimal sp4_mach_cred - state ops
+					 * must use machine cred */
+#define NFS_SP4_MACH_CRED_CLEANUP  2	/* CLOSE and LOCKU */
+#define NFS_SP4_MACH_CRED_SECINFO  3	/* SECINFO and SECINFO_NO_NAME */
+#define NFS_SP4_MACH_CRED_STATEID  4	/* TEST_STATEID and FREE_STATEID */
+#define NFS_SP4_MACH_CRED_WRITE    5	/* WRITE */
+#define NFS_SP4_MACH_CRED_COMMIT   6	/* COMMIT */
 #endif /* CONFIG_NFS_V4 */
 
 #ifdef CONFIG_NFS_FSCACHE
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 8651574..01fd84b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1107,6 +1107,23 @@
 	struct pnfs_commit_bucket *buckets;
 };
 
+#define NFS4_OP_MAP_NUM_LONGS \
+	DIV_ROUND_UP(LAST_NFS4_OP, 8 * sizeof(unsigned long))
+#define NFS4_OP_MAP_NUM_WORDS \
+	(NFS4_OP_MAP_NUM_LONGS * sizeof(unsigned long) / sizeof(u32))
+struct nfs4_op_map {
+	union {
+		unsigned long longs[NFS4_OP_MAP_NUM_LONGS];
+		u32 words[NFS4_OP_MAP_NUM_WORDS];
+	} u;
+};
+
+struct nfs41_state_protection {
+	u32 how;
+	struct nfs4_op_map enforce;
+	struct nfs4_op_map allow;
+};
+
 #define NFS4_EXCHANGE_ID_LEN	(48)
 struct nfs41_exchange_id_args {
 	struct nfs_client		*client;
@@ -1114,6 +1131,7 @@
 	unsigned int 			id_len;
 	char 				id[NFS4_EXCHANGE_ID_LEN];
 	u32				flags;
+	struct nfs41_state_protection	state_protect;
 };
 
 struct nfs41_server_owner {
@@ -1146,6 +1164,7 @@
 	struct nfs41_server_owner	*server_owner;
 	struct nfs41_server_scope	*server_scope;
 	struct nfs41_impl_id		*impl_id;
+	struct nfs41_state_protection	state_protect;
 };
 
 struct nfs41_create_session_args {
@@ -1419,12 +1438,12 @@
 	void	(*read_setup)   (struct nfs_read_data *, struct rpc_message *);
 	void	(*read_pageio_init)(struct nfs_pageio_descriptor *, struct inode *,
 				    const struct nfs_pgio_completion_ops *);
-	void	(*read_rpc_prepare)(struct rpc_task *, struct nfs_read_data *);
+	int	(*read_rpc_prepare)(struct rpc_task *, struct nfs_read_data *);
 	int	(*read_done)  (struct rpc_task *, struct nfs_read_data *);
 	void	(*write_setup)  (struct nfs_write_data *, struct rpc_message *);
 	void	(*write_pageio_init)(struct nfs_pageio_descriptor *, struct inode *, int,
 				     const struct nfs_pgio_completion_ops *);
-	void	(*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *);
+	int	(*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *);
 	int	(*write_done)  (struct rpc_task *, struct nfs_write_data *);
 	void	(*commit_setup) (struct nfs_commit_data *, struct rpc_message *);
 	void	(*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *);
@@ -1442,7 +1461,7 @@
 	struct nfs_client *(*alloc_client) (const struct nfs_client_initdata *);
 	struct nfs_client *
 		(*init_client) (struct nfs_client *, const struct rpc_timeout *,
-				const char *, rpc_authflavor_t);
+				const char *);
 	void	(*free_client) (struct nfs_client *);
 	struct nfs_server *(*create_server)(struct nfs_mount_info *, struct nfs_subversion *);
 	struct nfs_server *(*clone_server)(struct nfs_server *, struct nfs_fh *,
diff --git a/include/linux/of.h b/include/linux/of.h
index 3a45c4f..f95aee3 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -281,6 +281,9 @@
 extern int of_parse_phandle_with_args(const struct device_node *np,
 	const char *list_name, const char *cells_name, int index,
 	struct of_phandle_args *out_args);
+extern int of_parse_phandle_with_fixed_args(const struct device_node *np,
+	const char *list_name, int cells_count, int index,
+	struct of_phandle_args *out_args);
 extern int of_count_phandle_with_args(const struct device_node *np,
 	const char *list_name, const char *cells_name);
 
@@ -324,12 +327,6 @@
  */
 const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur,
 			       u32 *pu);
-#define of_property_for_each_u32(np, propname, prop, p, u)	\
-	for (prop = of_find_property(np, propname, NULL),	\
-		p = of_prop_next_u32(prop, NULL, &u);		\
-		p;						\
-		p = of_prop_next_u32(prop, p, &u))
-
 /*
  * struct property *prop;
  * const char *s;
@@ -338,11 +335,6 @@
  *         printk("String value: %s\n", s);
  */
 const char *of_prop_next_string(struct property *prop, const char *cur);
-#define of_property_for_each_string(np, propname, prop, s)	\
-	for (prop = of_find_property(np, propname, NULL),	\
-		s = of_prop_next_string(prop, NULL);		\
-		s;						\
-		s = of_prop_next_string(prop, s))
 
 int of_device_is_stdout_path(struct device_node *dn);
 
@@ -497,6 +489,13 @@
 	return -ENOSYS;
 }
 
+static inline int of_parse_phandle_with_fixed_args(const struct device_node *np,
+	const char *list_name, int cells_count, int index,
+	struct of_phandle_args *out_args)
+{
+	return -ENOSYS;
+}
+
 static inline int of_count_phandle_with_args(struct device_node *np,
 					     const char *list_name,
 					     const char *cells_name)
@@ -519,12 +518,20 @@
 	return 0;
 }
 
+static inline const __be32 *of_prop_next_u32(struct property *prop,
+		const __be32 *cur, u32 *pu)
+{
+	return NULL;
+}
+
+static inline const char *of_prop_next_string(struct property *prop,
+		const char *cur)
+{
+	return NULL;
+}
+
 #define of_match_ptr(_ptr)	NULL
 #define of_match_node(_matches, _node)	NULL
-#define of_property_for_each_u32(np, propname, prop, p, u) \
-	while (0)
-#define of_property_for_each_string(np, propname, prop, s) \
-	while (0)
 #endif /* CONFIG_OF */
 
 #ifndef of_node_to_nid
@@ -573,6 +580,18 @@
 	return of_property_read_u32_array(np, propname, out_value, 1);
 }
 
+#define of_property_for_each_u32(np, propname, prop, p, u)	\
+	for (prop = of_find_property(np, propname, NULL),	\
+		p = of_prop_next_u32(prop, NULL, &u);		\
+		p;						\
+		p = of_prop_next_u32(prop, p, &u))
+
+#define of_property_for_each_string(np, propname, prop, s)	\
+	for (prop = of_find_property(np, propname, NULL),	\
+		s = of_prop_next_string(prop, NULL);		\
+		s;						\
+		s = of_prop_next_string(prop, s))
+
 #if defined(CONFIG_PROC_FS) && defined(CONFIG_PROC_DEVICETREE)
 extern void proc_device_tree_add_node(struct device_node *, struct proc_dir_entry *);
 extern void proc_device_tree_add_prop(struct proc_dir_entry *pde, struct property *prop);
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index ed136ad..a478c62 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -90,6 +90,9 @@
 extern int of_flat_dt_is_compatible(unsigned long node, const char *name);
 extern int of_flat_dt_match(unsigned long node, const char *const *matches);
 extern unsigned long of_get_flat_dt_root(void);
+extern int of_scan_flat_dt_by_path(const char *path,
+	int (*it)(unsigned long node, const char *name, int depth, void *data),
+	void *data);
 
 extern int early_init_dt_scan_chosen(unsigned long node, const char *uname,
 				     int depth, void *data);
@@ -106,8 +109,7 @@
  * physical addresses.
  */
 #ifdef CONFIG_BLK_DEV_INITRD
-extern void early_init_dt_setup_initrd_arch(unsigned long start,
-					    unsigned long end);
+extern void early_init_dt_setup_initrd_arch(u64 start, u64 end);
 #endif
 
 /* Early flat tree scan hooks */
diff --git a/include/linux/of_net.h b/include/linux/of_net.h
index 61bf53b..34597c8 100644
--- a/include/linux/of_net.h
+++ b/include/linux/of_net.h
@@ -9,10 +9,10 @@
 
 #ifdef CONFIG_OF_NET
 #include <linux/of.h>
-extern const int of_get_phy_mode(struct device_node *np);
+extern int of_get_phy_mode(struct device_node *np);
 extern const void *of_get_mac_address(struct device_node *np);
 #else
-static inline const int of_get_phy_mode(struct device_node *np)
+static inline int of_get_phy_mode(struct device_node *np)
 {
 	return -ENODEV;
 }
diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
new file mode 100644
index 0000000..c841282
--- /dev/null
+++ b/include/linux/of_reserved_mem.h
@@ -0,0 +1,14 @@
+#ifndef __OF_RESERVED_MEM_H
+#define __OF_RESERVED_MEM_H
+
+#ifdef CONFIG_OF_RESERVED_MEM
+void of_reserved_mem_device_init(struct device *dev);
+void of_reserved_mem_device_release(struct device *dev);
+void early_init_dt_scan_reserved_mem(void);
+#else
+static inline void of_reserved_mem_device_init(struct device *dev) { }
+static inline void of_reserved_mem_device_release(struct device *dev) { }
+static inline void early_init_dt_scan_reserved_mem(void) { }
+#endif
+
+#endif /* __OF_RESERVED_MEM_H */
diff --git a/include/linux/platform_data/atmel.h b/include/linux/platform_data/atmel.h
index 6a293b7..cea9f701 100644
--- a/include/linux/platform_data/atmel.h
+++ b/include/linux/platform_data/atmel.h
@@ -71,6 +71,10 @@
 	u8		on_flash_bbt;		/* bbt on flash */
 	struct mtd_partition *parts;
 	unsigned int	num_parts;
+	bool		has_dma;		/* support dma transfer */
+
+	/* default is false, only for at32ap7000 chip is true */
+	bool		need_reset_workaround;
 };
 
  /* Serial */
diff --git a/include/linux/platform_data/dma-rcar-hpbdma.h b/include/linux/platform_data/dma-rcar-hpbdma.h
new file mode 100644
index 0000000..648b8ea
--- /dev/null
+++ b/include/linux/platform_data/dma-rcar-hpbdma.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2011-2013 Renesas Electronics Corporation
+ * Copyright (C) 2013 Cogent Embedded, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef __DMA_RCAR_HPBDMA_H
+#define __DMA_RCAR_HPBDMA_H
+
+#include <linux/bitops.h>
+#include <linux/types.h>
+
+/* Transmit sizes and respective register values */
+enum {
+	XMIT_SZ_8BIT	= 0,
+	XMIT_SZ_16BIT	= 1,
+	XMIT_SZ_32BIT	= 2,
+	XMIT_SZ_MAX
+};
+
+/* DMA control register (DCR) bits */
+#define HPB_DMAE_DCR_DTAMD		(1u << 26)
+#define HPB_DMAE_DCR_DTAC		(1u << 25)
+#define HPB_DMAE_DCR_DTAU		(1u << 24)
+#define HPB_DMAE_DCR_DTAU1		(1u << 23)
+#define HPB_DMAE_DCR_SWMD		(1u << 22)
+#define HPB_DMAE_DCR_BTMD		(1u << 21)
+#define HPB_DMAE_DCR_PKMD		(1u << 20)
+#define HPB_DMAE_DCR_CT			(1u << 18)
+#define HPB_DMAE_DCR_ACMD		(1u << 17)
+#define HPB_DMAE_DCR_DIP		(1u << 16)
+#define HPB_DMAE_DCR_SMDL		(1u << 13)
+#define HPB_DMAE_DCR_SPDAM		(1u << 12)
+#define HPB_DMAE_DCR_SDRMD_MASK		(3u << 10)
+#define HPB_DMAE_DCR_SDRMD_MOD		(0u << 10)
+#define HPB_DMAE_DCR_SDRMD_AUTO		(1u << 10)
+#define HPB_DMAE_DCR_SDRMD_TIMER	(2u << 10)
+#define HPB_DMAE_DCR_SPDS_MASK		(3u << 8)
+#define HPB_DMAE_DCR_SPDS_8BIT		(0u << 8)
+#define HPB_DMAE_DCR_SPDS_16BIT		(1u << 8)
+#define HPB_DMAE_DCR_SPDS_32BIT		(2u << 8)
+#define HPB_DMAE_DCR_DMDL		(1u << 5)
+#define HPB_DMAE_DCR_DPDAM		(1u << 4)
+#define HPB_DMAE_DCR_DDRMD_MASK		(3u << 2)
+#define HPB_DMAE_DCR_DDRMD_MOD		(0u << 2)
+#define HPB_DMAE_DCR_DDRMD_AUTO		(1u << 2)
+#define HPB_DMAE_DCR_DDRMD_TIMER	(2u << 2)
+#define HPB_DMAE_DCR_DPDS_MASK		(3u << 0)
+#define HPB_DMAE_DCR_DPDS_8BIT		(0u << 0)
+#define HPB_DMAE_DCR_DPDS_16BIT		(1u << 0)
+#define HPB_DMAE_DCR_DPDS_32BIT		(2u << 0)
+
+/* Asynchronous reset register (ASYNCRSTR) bits */
+#define HPB_DMAE_ASYNCRSTR_ASRST41	BIT(10)
+#define HPB_DMAE_ASYNCRSTR_ASRST40	BIT(9)
+#define HPB_DMAE_ASYNCRSTR_ASRST39	BIT(8)
+#define HPB_DMAE_ASYNCRSTR_ASRST27	BIT(7)
+#define HPB_DMAE_ASYNCRSTR_ASRST26	BIT(6)
+#define HPB_DMAE_ASYNCRSTR_ASRST25	BIT(5)
+#define HPB_DMAE_ASYNCRSTR_ASRST24	BIT(4)
+#define HPB_DMAE_ASYNCRSTR_ASRST23	BIT(3)
+#define HPB_DMAE_ASYNCRSTR_ASRST22	BIT(2)
+#define HPB_DMAE_ASYNCRSTR_ASRST21	BIT(1)
+#define HPB_DMAE_ASYNCRSTR_ASRST20	BIT(0)
+
+struct hpb_dmae_slave_config {
+	unsigned int	id;
+	dma_addr_t	addr;
+	u32		dcr;
+	u32		port;
+	u32		rstr;
+	u32		mdr;
+	u32		mdm;
+	u32		flags;
+#define	HPB_DMAE_SET_ASYNC_RESET	BIT(0)
+#define	HPB_DMAE_SET_ASYNC_MODE		BIT(1)
+	u32		dma_ch;
+};
+
+#define HPB_DMAE_CHANNEL(_irq, _s_id)	\
+{					\
+	.ch_irq		= _irq,		\
+	.s_id		= _s_id,	\
+}
+
+struct hpb_dmae_channel {
+	unsigned int	ch_irq;
+	unsigned int	s_id;
+};
+
+struct hpb_dmae_pdata {
+	const struct hpb_dmae_slave_config *slaves;
+	int num_slaves;
+	const struct hpb_dmae_channel *channels;
+	int num_channels;
+	const unsigned int ts_shift[XMIT_SZ_MAX];
+	int num_hw_channels;
+};
+
+#endif
diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index 57300fd..179fb91b 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -180,4 +180,6 @@
 	const s16	(*xbar_chans)[2];
 };
 
+int edma_trigger_channel(unsigned);
+
 #endif
diff --git a/include/linux/platform_data/mtd-nand-pxa3xx.h b/include/linux/platform_data/mtd-nand-pxa3xx.h
index c42f39f..ffb8019 100644
--- a/include/linux/platform_data/mtd-nand-pxa3xx.h
+++ b/include/linux/platform_data/mtd-nand-pxa3xx.h
@@ -16,19 +16,6 @@
 	unsigned int	tAR;  /* ND_ALE low to ND_nRE low delay */
 };
 
-struct pxa3xx_nand_cmdset {
-	uint16_t	read1;
-	uint16_t	read2;
-	uint16_t	program;
-	uint16_t	read_status;
-	uint16_t	read_id;
-	uint16_t	erase;
-	uint16_t	reset;
-	uint16_t	lock;
-	uint16_t	unlock;
-	uint16_t	lock_status;
-};
-
 struct pxa3xx_nand_flash {
 	char		*name;
 	uint32_t	chip_id;
diff --git a/include/linux/quota.h b/include/linux/quota.h
index d133711..cc7494a 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -328,6 +328,7 @@
 	int (*set_dqblk)(struct super_block *, struct kqid, struct fs_disk_quota *);
 	int (*get_xstate)(struct super_block *, struct fs_quota_stat *);
 	int (*set_xstate)(struct super_block *, unsigned int, int);
+	int (*get_xstatev)(struct super_block *, struct fs_quota_statv *);
 };
 
 struct quota_format_type {
diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index 0f42469..73069cb 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -101,6 +101,7 @@
 extern const struct raid6_calls raid6_avx2x1;
 extern const struct raid6_calls raid6_avx2x2;
 extern const struct raid6_calls raid6_avx2x4;
+extern const struct raid6_calls raid6_tilegx8;
 
 struct raid6_recov_calls {
 	void (*data2)(int, size_t, int, int, void **);
diff --git a/include/linux/sh_dma.h b/include/linux/sh_dma.h
index 4e83f3e..b7b43b82 100644
--- a/include/linux/sh_dma.h
+++ b/include/linux/sh_dma.h
@@ -33,13 +33,44 @@
 	char		mid_rid;
 };
 
+/**
+ * struct sh_dmae_channel - DMAC channel platform data
+ * @offset:		register offset within the main IOMEM resource
+ * @dmars:		channel DMARS register offset
+ * @chclr_offset:	channel CHCLR register offset
+ * @dmars_bit:		channel DMARS field offset within the register
+ * @chclr_bit:		bit position, to be set to reset the channel
+ */
 struct sh_dmae_channel {
 	unsigned int	offset;
 	unsigned int	dmars;
-	unsigned int	dmars_bit;
 	unsigned int	chclr_offset;
+	unsigned char	dmars_bit;
+	unsigned char	chclr_bit;
 };
 
+/**
+ * struct sh_dmae_pdata - DMAC platform data
+ * @slave:		array of slaves
+ * @slave_num:		number of slaves in the above array
+ * @channel:		array of DMA channels
+ * @channel_num:	number of channels in the above array
+ * @ts_low_shift:	shift of the low part of the TS field
+ * @ts_low_mask:	low TS field mask
+ * @ts_high_shift:	additional shift of the high part of the TS field
+ * @ts_high_mask:	high TS field mask
+ * @ts_shift:		array of Transfer Size shifts, indexed by TS value
+ * @ts_shift_num:	number of shifts in the above array
+ * @dmaor_init:		DMAOR initialisation value
+ * @chcr_offset:	CHCR address offset
+ * @chcr_ie_bit:	CHCR Interrupt Enable bit
+ * @dmaor_is_32bit:	DMAOR is a 32-bit register
+ * @needs_tend_set:	the TEND register has to be set
+ * @no_dmars:		DMAC has no DMARS registers
+ * @chclr_present:	DMAC has one or several CHCLR registers
+ * @chclr_bitwise:	channel CHCLR registers are bitwise
+ * @slave_only:		DMAC cannot be used for MEMCPY
+ */
 struct sh_dmae_pdata {
 	const struct sh_dmae_slave_config *slave;
 	int slave_num;
@@ -59,42 +90,22 @@
 	unsigned int needs_tend_set:1;
 	unsigned int no_dmars:1;
 	unsigned int chclr_present:1;
+	unsigned int chclr_bitwise:1;
 	unsigned int slave_only:1;
 };
 
-/* DMA register */
-#define SAR	0x00
-#define DAR	0x04
-#define TCR	0x08
-#define CHCR	0x0C
-#define DMAOR	0x40
-
-#define TEND	0x18 /* USB-DMAC */
-
 /* DMAOR definitions */
 #define DMAOR_AE	0x00000004
 #define DMAOR_NMIF	0x00000002
 #define DMAOR_DME	0x00000001
 
 /* Definitions for the SuperH DMAC */
-#define REQ_L	0x00000000
-#define REQ_E	0x00080000
-#define RACK_H	0x00000000
-#define RACK_L	0x00040000
-#define ACK_R	0x00000000
-#define ACK_W	0x00020000
-#define ACK_H	0x00000000
-#define ACK_L	0x00010000
 #define DM_INC	0x00004000
 #define DM_DEC	0x00008000
 #define DM_FIX	0x0000c000
 #define SM_INC	0x00001000
 #define SM_DEC	0x00002000
 #define SM_FIX	0x00003000
-#define RS_IN	0x00000200
-#define RS_OUT	0x00000300
-#define TS_BLK	0x00000040
-#define TM_BUR	0x00000020
 #define CHCR_DE	0x00000001
 #define CHCR_TE	0x00000002
 #define CHCR_IE	0x00000004
diff --git a/include/linux/shdma-base.h b/include/linux/shdma-base.h
index 5b1c984..f92c0a4 100644
--- a/include/linux/shdma-base.h
+++ b/include/linux/shdma-base.h
@@ -96,7 +96,7 @@
 	dma_addr_t (*slave_addr)(struct shdma_chan *);
 	int (*desc_setup)(struct shdma_chan *, struct shdma_desc *,
 			  dma_addr_t, dma_addr_t, size_t *);
-	int (*set_slave)(struct shdma_chan *, int, bool);
+	int (*set_slave)(struct shdma_chan *, int, dma_addr_t, bool);
 	void (*setup_xfer)(struct shdma_chan *, int);
 	void (*start_xfer)(struct shdma_chan *, struct shdma_desc *);
 	struct shdma_desc *(*embedded_desc)(void *, int);
@@ -116,7 +116,6 @@
 
 int shdma_request_irq(struct shdma_chan *, int,
 			   unsigned long, const char *);
-void shdma_free_irq(struct shdma_chan *);
 bool shdma_reset(struct shdma_dev *sdev);
 void shdma_chan_probe(struct shdma_dev *sdev,
 			   struct shdma_chan *schan, int id);
diff --git a/include/linux/spi/mmc_spi.h b/include/linux/spi/mmc_spi.h
index 32be8db..274bc0f 100644
--- a/include/linux/spi/mmc_spi.h
+++ b/include/linux/spi/mmc_spi.h
@@ -7,6 +7,11 @@
 struct device;
 struct mmc_host;
 
+#define MMC_SPI_USE_CD_GPIO			(1 << 0)
+#define MMC_SPI_USE_RO_GPIO			(1 << 1)
+#define MMC_SPI_CD_GPIO_ACTIVE_LOW		(1 << 2)
+#define MMC_SPI_RO_GPIO_ACTIVE_LOW		(1 << 3)
+
 /* Put this in platform_data of a device being used to manage an MMC/SD
  * card slot.  (Modeled after PXA mmc glue; see that for usage examples.)
  *
@@ -21,17 +26,19 @@
 		void *);
 	void (*exit)(struct device *, void *);
 
-	/* sense switch on sd cards */
-	int (*get_ro)(struct device *);
-
 	/*
-	 * If board does not use CD interrupts, driver can optimize polling
-	 * using this function.
+	 * Card Detect and Read Only GPIOs. To enable debouncing on the card
+	 * detect GPIO, set the cd_debounce to the debounce time in
+	 * microseconds.
 	 */
-	int (*get_cd)(struct device *);
+	unsigned int flags;
+	unsigned int cd_gpio;
+	unsigned int cd_debounce;
+	unsigned int ro_gpio;
 
 	/* Capabilities to pass into mmc core (e.g. MMC_CAP_NEEDS_POLL). */
 	unsigned long caps;
+	unsigned long caps2;
 
 	/* how long to debounce card detect, in msecs */
 	u16 detect_delay;
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 0dd00f4..790be14 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -24,12 +24,21 @@
 
 struct rpcsec_gss_info;
 
+/* auth_cred ac_flags bits */
+enum {
+	RPC_CRED_NO_CRKEY_TIMEOUT = 0, /* underlying cred has no key timeout */
+	RPC_CRED_KEY_EXPIRE_SOON = 1, /* underlying cred key will expire soon */
+	RPC_CRED_NOTIFY_TIMEOUT = 2,   /* nofity generic cred when underlying
+					key will expire soon */
+};
+
 /* Work around the lack of a VFS credential */
 struct auth_cred {
 	kuid_t	uid;
 	kgid_t	gid;
 	struct group_info *group_info;
 	const char *principal;
+	unsigned long ac_flags;
 	unsigned char machine_cred : 1;
 };
 
@@ -87,6 +96,11 @@
 	/* per-flavor data */
 };
 
+struct rpc_auth_create_args {
+	rpc_authflavor_t pseudoflavor;
+	const char *target_name;
+};
+
 /* Flags for rpcauth_lookupcred() */
 #define RPCAUTH_LOOKUP_NEW		0x01	/* Accept an uninitialised cred */
 
@@ -97,17 +111,17 @@
 	struct module		*owner;
 	rpc_authflavor_t	au_flavor;	/* flavor (RPC_AUTH_*) */
 	char *			au_name;
-	struct rpc_auth *	(*create)(struct rpc_clnt *, rpc_authflavor_t);
+	struct rpc_auth *	(*create)(struct rpc_auth_create_args *, struct rpc_clnt *);
 	void			(*destroy)(struct rpc_auth *);
 
 	struct rpc_cred *	(*lookup_cred)(struct rpc_auth *, struct auth_cred *, int);
 	struct rpc_cred *	(*crcreate)(struct rpc_auth*, struct auth_cred *, int);
-	int			(*pipes_create)(struct rpc_auth *);
-	void			(*pipes_destroy)(struct rpc_auth *);
 	int			(*list_pseudoflavors)(rpc_authflavor_t *, int);
 	rpc_authflavor_t	(*info2flavor)(struct rpcsec_gss_info *);
 	int			(*flavor2info)(rpc_authflavor_t,
 						struct rpcsec_gss_info *);
+	int			(*key_timeout)(struct rpc_auth *,
+						struct rpc_cred *);
 };
 
 struct rpc_credops {
@@ -124,6 +138,8 @@
 						void *, __be32 *, void *);
 	int			(*crunwrap_resp)(struct rpc_task *, kxdrdproc_t,
 						void *, __be32 *, void *);
+	int			(*crkey_timeout)(struct rpc_cred *);
+	bool			(*crkey_to_expire)(struct rpc_cred *);
 };
 
 extern const struct rpc_authops	authunix_ops;
@@ -140,7 +156,8 @@
 struct rpc_cred *	rpc_lookup_machine_cred(const char *service_name);
 int			rpcauth_register(const struct rpc_authops *);
 int			rpcauth_unregister(const struct rpc_authops *);
-struct rpc_auth *	rpcauth_create(rpc_authflavor_t, struct rpc_clnt *);
+struct rpc_auth *	rpcauth_create(struct rpc_auth_create_args *,
+				struct rpc_clnt *);
 void			rpcauth_release(struct rpc_auth *);
 rpc_authflavor_t	rpcauth_get_pseudoflavor(rpc_authflavor_t,
 				struct rpcsec_gss_info *);
@@ -162,6 +179,9 @@
 int			rpcauth_init_credcache(struct rpc_auth *);
 void			rpcauth_destroy_credcache(struct rpc_auth *);
 void			rpcauth_clear_credcache(struct rpc_cred_cache *);
+int			rpcauth_key_timeout_notify(struct rpc_auth *,
+						struct rpc_cred *);
+bool			rpcauth_cred_key_to_expire(struct rpc_cred *);
 
 static inline
 struct rpc_cred *	get_rpccred(struct rpc_cred *cred)
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index bfe11be..6740801 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -21,6 +21,7 @@
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/timer.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
 #include <asm/signal.h>
 #include <linux/path.h>
 #include <net/ipv6.h>
@@ -32,6 +33,7 @@
  */
 struct rpc_clnt {
 	atomic_t		cl_count;	/* Number of references */
+	unsigned int		cl_clid;	/* client id */
 	struct list_head	cl_clients;	/* Global list of clients */
 	struct list_head	cl_tasks;	/* List of tasks */
 	spinlock_t		cl_lock;	/* spinlock */
@@ -41,7 +43,6 @@
 				cl_vers,	/* RPC version number */
 				cl_maxproc;	/* max procedure number */
 
-	const char *		cl_protname;	/* protocol name */
 	struct rpc_auth *	cl_auth;	/* authenticator */
 	struct rpc_stat *	cl_stats;	/* per-program statistics */
 	struct rpc_iostats *	cl_metrics;	/* per-client statistics */
@@ -56,12 +57,11 @@
 
 	int			cl_nodelen;	/* nodename length */
 	char 			cl_nodename[UNX_MAXNODENAME];
-	struct dentry *		cl_dentry;
+	struct rpc_pipe_dir_head cl_pipedir_objects;
 	struct rpc_clnt *	cl_parent;	/* Points to parent of clones */
 	struct rpc_rtt		cl_rtt_default;
 	struct rpc_timeout	cl_timeout_default;
 	const struct rpc_program *cl_program;
-	char			*cl_principal;	/* target to authenticate to */
 };
 
 /*
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index aa5b582..a353e03 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -5,6 +5,26 @@
 
 #include <linux/workqueue.h>
 
+struct rpc_pipe_dir_head {
+	struct list_head pdh_entries;
+	struct dentry *pdh_dentry;
+};
+
+struct rpc_pipe_dir_object_ops;
+struct rpc_pipe_dir_object {
+	struct list_head pdo_head;
+	const struct rpc_pipe_dir_object_ops *pdo_ops;
+
+	void *pdo_data;
+};
+
+struct rpc_pipe_dir_object_ops {
+	int (*create)(struct dentry *dir,
+			struct rpc_pipe_dir_object *pdo);
+	void (*destroy)(struct dentry *dir,
+			struct rpc_pipe_dir_object *pdo);
+};
+
 struct rpc_pipe_msg {
 	struct list_head list;
 	void *data;
@@ -74,7 +94,24 @@
 
 struct rpc_clnt;
 extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *);
-extern int rpc_remove_client_dir(struct dentry *);
+extern int rpc_remove_client_dir(struct rpc_clnt *);
+
+extern void rpc_init_pipe_dir_head(struct rpc_pipe_dir_head *pdh);
+extern void rpc_init_pipe_dir_object(struct rpc_pipe_dir_object *pdo,
+		const struct rpc_pipe_dir_object_ops *pdo_ops,
+		void *pdo_data);
+extern int rpc_add_pipe_dir_object(struct net *net,
+		struct rpc_pipe_dir_head *pdh,
+		struct rpc_pipe_dir_object *pdo);
+extern void rpc_remove_pipe_dir_object(struct net *net,
+		struct rpc_pipe_dir_head *pdh,
+		struct rpc_pipe_dir_object *pdo);
+extern struct rpc_pipe_dir_object *rpc_find_or_alloc_pipe_dir_object(
+		struct net *net,
+		struct rpc_pipe_dir_head *pdh,
+		int (*match)(struct rpc_pipe_dir_object *, void *),
+		struct rpc_pipe_dir_object *(*alloc)(void *),
+		void *data);
 
 struct cache_detail;
 extern struct dentry *rpc_create_cache_dir(struct dentry *,
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 1821445..096ee58 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -79,7 +79,7 @@
 	unsigned short		tk_flags;	/* misc flags */
 	unsigned short		tk_timeouts;	/* maj timeouts */
 
-#ifdef RPC_DEBUG
+#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS)
 	unsigned short		tk_pid;		/* debugging aid */
 #endif
 	unsigned char		tk_priority : 2,/* Task priority */
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index ac8d488..24579a0 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -90,4 +90,11 @@
 	TYPE tmp;						\
 	offsetof(TYPE, MEMBER) + sizeof(tmp.MEMBER); })		\
 
+/*
+ * External user API
+ */
+extern struct vfio_group *vfio_group_get_external_user(struct file *filep);
+extern void vfio_group_put_external_user(struct vfio_group *group);
+extern int vfio_external_user_iommu_id(struct vfio_group *group);
+
 #endif /* VFIO_H */
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 43be87d..d51d16c 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -6,6 +6,8 @@
 
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/clnt.h>
+#include <net/tcp_states.h>
+#include <linux/net.h>
 #include <linux/tracepoint.h>
 
 DECLARE_EVENT_CLASS(rpc_task_status,
@@ -15,18 +17,20 @@
 	TP_ARGS(task),
 
 	TP_STRUCT__entry(
-		__field(const struct rpc_task *, task)
-		__field(const struct rpc_clnt *, clnt)
+		__field(unsigned int, task_id)
+		__field(unsigned int, client_id)
 		__field(int, status)
 	),
 
 	TP_fast_assign(
-		__entry->task = task;
-		__entry->clnt = task->tk_client;
+		__entry->task_id = task->tk_pid;
+		__entry->client_id = task->tk_client->cl_clid;
 		__entry->status = task->tk_status;
 	),
 
-	TP_printk("task:%p@%p, status %d",__entry->task, __entry->clnt, __entry->status)
+	TP_printk("task:%u@%u, status %d",
+		__entry->task_id, __entry->client_id,
+		__entry->status)
 );
 
 DEFINE_EVENT(rpc_task_status, rpc_call_status,
@@ -47,18 +51,20 @@
 	TP_ARGS(task, status),
 
 	TP_STRUCT__entry(
-		__field(const struct rpc_task *, task)
-		__field(const struct rpc_clnt *, clnt)
+		__field(unsigned int, task_id)
+		__field(unsigned int, client_id)
 		__field(int, status)
 	),
 
 	TP_fast_assign(
-		__entry->task = task;
-		__entry->clnt = task->tk_client;
+		__entry->task_id = task->tk_pid;
+		__entry->client_id = task->tk_client->cl_clid;
 		__entry->status = status;
 	),
 
-	TP_printk("task:%p@%p, status %d",__entry->task, __entry->clnt, __entry->status)
+	TP_printk("task:%u@%u, status %d",
+		__entry->task_id, __entry->client_id,
+		__entry->status)
 );
 
 DECLARE_EVENT_CLASS(rpc_task_running,
@@ -68,8 +74,8 @@
 	TP_ARGS(clnt, task, action),
 
 	TP_STRUCT__entry(
-		__field(const struct rpc_clnt *, clnt)
-		__field(const struct rpc_task *, task)
+		__field(unsigned int, task_id)
+		__field(unsigned int, client_id)
 		__field(const void *, action)
 		__field(unsigned long, runstate)
 		__field(int, status)
@@ -77,17 +83,16 @@
 		),
 
 	TP_fast_assign(
-		__entry->clnt = clnt;
-		__entry->task = task;
+		__entry->client_id = clnt->cl_clid;
+		__entry->task_id = task->tk_pid;
 		__entry->action = action;
 		__entry->runstate = task->tk_runstate;
 		__entry->status = task->tk_status;
 		__entry->flags = task->tk_flags;
 		),
 
-	TP_printk("task:%p@%p flags=%4.4x state=%4.4lx status=%d action=%pf",
-		__entry->task,
-		__entry->clnt,
+	TP_printk("task:%u@%u flags=%4.4x state=%4.4lx status=%d action=%pf",
+		__entry->task_id, __entry->client_id,
 		__entry->flags,
 		__entry->runstate,
 		__entry->status,
@@ -126,8 +131,8 @@
 	TP_ARGS(clnt, task, q),
 
 	TP_STRUCT__entry(
-		__field(const struct rpc_clnt *, clnt)
-		__field(const struct rpc_task *, task)
+		__field(unsigned int, task_id)
+		__field(unsigned int, client_id)
 		__field(unsigned long, timeout)
 		__field(unsigned long, runstate)
 		__field(int, status)
@@ -136,8 +141,8 @@
 		),
 
 	TP_fast_assign(
-		__entry->clnt = clnt;
-		__entry->task = task;
+		__entry->client_id = clnt->cl_clid;
+		__entry->task_id = task->tk_pid;
 		__entry->timeout = task->tk_timeout;
 		__entry->runstate = task->tk_runstate;
 		__entry->status = task->tk_status;
@@ -145,9 +150,8 @@
 		__assign_str(q_name, rpc_qname(q));
 		),
 
-	TP_printk("task:%p@%p flags=%4.4x state=%4.4lx status=%d timeout=%lu queue=%s",
-		__entry->task,
-		__entry->clnt,
+	TP_printk("task:%u@%u flags=%4.4x state=%4.4lx status=%d timeout=%lu queue=%s",
+		__entry->task_id, __entry->client_id,
 		__entry->flags,
 		__entry->runstate,
 		__entry->status,
@@ -172,6 +176,135 @@
 
 );
 
+#define rpc_show_socket_state(state) \
+	__print_symbolic(state, \
+		{ SS_FREE, "FREE" }, \
+		{ SS_UNCONNECTED, "UNCONNECTED" }, \
+		{ SS_CONNECTING, "CONNECTING," }, \
+		{ SS_CONNECTED, "CONNECTED," }, \
+		{ SS_DISCONNECTING, "DISCONNECTING" })
+
+#define rpc_show_sock_state(state) \
+	__print_symbolic(state, \
+		{ TCP_ESTABLISHED, "ESTABLISHED" }, \
+		{ TCP_SYN_SENT, "SYN_SENT" }, \
+		{ TCP_SYN_RECV, "SYN_RECV" }, \
+		{ TCP_FIN_WAIT1, "FIN_WAIT1" }, \
+		{ TCP_FIN_WAIT2, "FIN_WAIT2" }, \
+		{ TCP_TIME_WAIT, "TIME_WAIT" }, \
+		{ TCP_CLOSE, "CLOSE" }, \
+		{ TCP_CLOSE_WAIT, "CLOSE_WAIT" }, \
+		{ TCP_LAST_ACK, "LAST_ACK" }, \
+		{ TCP_LISTEN, "LISTEN" }, \
+		{ TCP_CLOSING, "CLOSING" })
+
+DECLARE_EVENT_CLASS(xs_socket_event,
+
+		TP_PROTO(
+			struct rpc_xprt *xprt,
+			struct socket *socket
+		),
+
+		TP_ARGS(xprt, socket),
+
+		TP_STRUCT__entry(
+			__field(unsigned int, socket_state)
+			__field(unsigned int, sock_state)
+			__field(unsigned long long, ino)
+			__string(dstaddr,
+				xprt->address_strings[RPC_DISPLAY_ADDR])
+			__string(dstport,
+				xprt->address_strings[RPC_DISPLAY_PORT])
+		),
+
+		TP_fast_assign(
+			struct inode *inode = SOCK_INODE(socket);
+			__entry->socket_state = socket->state;
+			__entry->sock_state = socket->sk->sk_state;
+			__entry->ino = (unsigned long long)inode->i_ino;
+			__assign_str(dstaddr,
+				xprt->address_strings[RPC_DISPLAY_ADDR]);
+			__assign_str(dstport,
+				xprt->address_strings[RPC_DISPLAY_PORT]);
+		),
+
+		TP_printk(
+			"socket:[%llu] dstaddr=%s/%s "
+			"state=%u (%s) sk_state=%u (%s)",
+			__entry->ino, __get_str(dstaddr), __get_str(dstport),
+			__entry->socket_state,
+			rpc_show_socket_state(__entry->socket_state),
+			__entry->sock_state,
+			rpc_show_sock_state(__entry->sock_state)
+		)
+);
+#define DEFINE_RPC_SOCKET_EVENT(name) \
+	DEFINE_EVENT(xs_socket_event, name, \
+			TP_PROTO( \
+				struct rpc_xprt *xprt, \
+				struct socket *socket \
+			), \
+			TP_ARGS(xprt, socket))
+
+DECLARE_EVENT_CLASS(xs_socket_event_done,
+
+		TP_PROTO(
+			struct rpc_xprt *xprt,
+			struct socket *socket,
+			int error
+		),
+
+		TP_ARGS(xprt, socket, error),
+
+		TP_STRUCT__entry(
+			__field(int, error)
+			__field(unsigned int, socket_state)
+			__field(unsigned int, sock_state)
+			__field(unsigned long long, ino)
+			__string(dstaddr,
+				xprt->address_strings[RPC_DISPLAY_ADDR])
+			__string(dstport,
+				xprt->address_strings[RPC_DISPLAY_PORT])
+		),
+
+		TP_fast_assign(
+			struct inode *inode = SOCK_INODE(socket);
+			__entry->socket_state = socket->state;
+			__entry->sock_state = socket->sk->sk_state;
+			__entry->ino = (unsigned long long)inode->i_ino;
+			__entry->error = error;
+			__assign_str(dstaddr,
+				xprt->address_strings[RPC_DISPLAY_ADDR]);
+			__assign_str(dstport,
+				xprt->address_strings[RPC_DISPLAY_PORT]);
+		),
+
+		TP_printk(
+			"error=%d socket:[%llu] dstaddr=%s/%s "
+			"state=%u (%s) sk_state=%u (%s)",
+			__entry->error,
+			__entry->ino, __get_str(dstaddr), __get_str(dstport),
+			__entry->socket_state,
+			rpc_show_socket_state(__entry->socket_state),
+			__entry->sock_state,
+			rpc_show_sock_state(__entry->sock_state)
+		)
+);
+#define DEFINE_RPC_SOCKET_EVENT_DONE(name) \
+	DEFINE_EVENT(xs_socket_event_done, name, \
+			TP_PROTO( \
+				struct rpc_xprt *xprt, \
+				struct socket *socket, \
+				int error \
+			), \
+			TP_ARGS(xprt, socket, error))
+
+DEFINE_RPC_SOCKET_EVENT(rpc_socket_state_change);
+DEFINE_RPC_SOCKET_EVENT_DONE(rpc_socket_connect);
+DEFINE_RPC_SOCKET_EVENT_DONE(rpc_socket_reset_connection);
+DEFINE_RPC_SOCKET_EVENT(rpc_socket_close);
+DEFINE_RPC_SOCKET_EVENT(rpc_socket_shutdown);
+
 #endif /* _TRACE_SUNRPC_H */
 
 #include <trace/define_trace.h>
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index afd0cbd..f1e12bd 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -267,9 +267,9 @@
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	25
+#define DM_VERSION_MINOR	26
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2013-06-26)"
+#define DM_VERSION_EXTRA	"-ioctl (2013-08-15)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
diff --git a/include/uapi/linux/dqblk_xfs.h b/include/uapi/linux/dqblk_xfs.h
index 8655280..dcd75cc 100644
--- a/include/uapi/linux/dqblk_xfs.h
+++ b/include/uapi/linux/dqblk_xfs.h
@@ -38,6 +38,7 @@
 #define Q_XGETQSTAT	XQM_CMD(5)	/* get quota subsystem status */
 #define Q_XQUOTARM	XQM_CMD(6)	/* free disk space used by dquots */
 #define Q_XQUOTASYNC	XQM_CMD(7)	/* delalloc flush, updates dquots */
+#define Q_XGETQSTATV	XQM_CMD(8)	/* newer version of get quota */
 
 /*
  * fs_disk_quota structure:
@@ -163,4 +164,50 @@
 	__u16		qs_iwarnlimit;	/* limit for num warnings */
 } fs_quota_stat_t;
 
+/*
+ * fs_quota_statv is used by Q_XGETQSTATV for a given file system. It provides
+ * a centralized way to get meta information about the quota subsystem. eg.
+ * space taken up for user, group, and project quotas, number of dquots
+ * currently incore.
+ *
+ * This version has proper versioning support with appropriate padding for
+ * future expansions, and ability to expand for future without creating any
+ * backward compatibility issues.
+ *
+ * Q_XGETQSTATV uses the passed in value of the requested version via
+ * fs_quota_statv.qs_version to determine the return data layout of
+ * fs_quota_statv.  The kernel will fill the data fields relevant to that
+ * version.
+ *
+ * If kernel does not support user space caller specified version, EINVAL will
+ * be returned. User space caller can then reduce the version number and retry
+ * the same command.
+ */
+#define FS_QSTATV_VERSION1	1	/* fs_quota_statv.qs_version */
+/*
+ * Some basic information about 'quota files' for Q_XGETQSTATV command
+ */
+struct fs_qfilestatv {
+	__u64		qfs_ino;	/* inode number */
+	__u64		qfs_nblks;	/* number of BBs 512-byte-blks */
+	__u32		qfs_nextents;	/* number of extents */
+	__u32		qfs_pad;	/* pad for 8-byte alignment */
+};
+
+struct fs_quota_statv {
+	__s8			qs_version;	/* version for future changes */
+	__u8			qs_pad1;	/* pad for 16bit alignment */
+	__u16			qs_flags;	/* FS_QUOTA_.* flags */
+	__u32			qs_incoredqs;	/* number of dquots incore */
+	struct fs_qfilestatv	qs_uquota;	/* user quota information */
+	struct fs_qfilestatv	qs_gquota;	/* group quota information */
+	struct fs_qfilestatv	qs_pquota;	/* project quota information */
+	__s32			qs_btimelimit;  /* limit for blks timer */
+	__s32			qs_itimelimit;  /* limit for inodes timer */
+	__s32			qs_rtbtimelimit;/* limit for rt blks timer */
+	__u16			qs_bwarnlimit;	/* limit for num warnings */
+	__u16			qs_iwarnlimit;	/* limit for num warnings */
+	__u64			qs_pad2[8];	/* for future proofing */
+};
+
 #endif	/* _LINUX_DQBLK_XFS_H */
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 916e444..0fd47f5 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -324,6 +324,44 @@
 	VFIO_PCI_NUM_IRQS
 };
 
+/**
+ * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IORW(VFIO_TYPE, VFIO_BASE + 12,
+ *					      struct vfio_pci_hot_reset_info)
+ *
+ * Return: 0 on success, -errno on failure:
+ *	-enospc = insufficient buffer, -enodev = unsupported for device.
+ */
+struct vfio_pci_dependent_device {
+	__u32	group_id;
+	__u16	segment;
+	__u8	bus;
+	__u8	devfn; /* Use PCI_SLOT/PCI_FUNC */
+};
+
+struct vfio_pci_hot_reset_info {
+	__u32	argsz;
+	__u32	flags;
+	__u32	count;
+	struct vfio_pci_dependent_device	devices[];
+};
+
+#define VFIO_DEVICE_GET_PCI_HOT_RESET_INFO	_IO(VFIO_TYPE, VFIO_BASE + 12)
+
+/**
+ * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13,
+ *				    struct vfio_pci_hot_reset)
+ *
+ * Return: 0 on success, -errno on failure.
+ */
+struct vfio_pci_hot_reset {
+	__u32	argsz;
+	__u32	flags;
+	__u32	count;
+	__s32	group_fds[];
+};
+
+#define VFIO_DEVICE_PCI_HOT_RESET	_IO(VFIO_TYPE, VFIO_BASE + 13)
+
 /* -------- API for Type1 VFIO IOMMU -------- */
 
 /**
diff --git a/init/Kconfig b/init/Kconfig
index 0a2c4bc..bfa9e13 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1123,7 +1123,6 @@
 
 config USER_NS
 	bool "User namespace"
-	depends on UIDGID_CONVERTED
 	select UIDGID_STRICT_TYPE_CHECKS
 
 	default n
@@ -1157,20 +1156,8 @@
 
 endif # NAMESPACES
 
-config UIDGID_CONVERTED
-	# True if all of the selected software conmponents are known
-	# to have uid_t and gid_t converted to kuid_t and kgid_t
-	# where appropriate and are otherwise safe to use with
-	# the user namespace.
-	bool
-	default y
-
-	# Filesystems
-	depends on XFS_FS = n
-
 config UIDGID_STRICT_TYPE_CHECKS
 	bool "Require conversions between uid/gids and their internal representation"
-	depends on UIDGID_CONVERTED
 	default n
 	help
 	 While the nececessary conversions are being added to all subsystems this option allows
diff --git a/kernel/capability.c b/kernel/capability.c
index 6fc1c8a..4e66bf9 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -452,3 +452,4 @@
 
 	return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid);
 }
+EXPORT_SYMBOL(inode_capable);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e0aeb32..2418b6e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -60,6 +60,7 @@
 #include <linux/poll.h>
 #include <linux/flex_array.h> /* used in cgroup_attach_task */
 #include <linux/kthread.h>
+#include <linux/file.h>
 
 #include <linux/atomic.h>
 
@@ -4034,8 +4035,8 @@
 	struct cgroup_event *event;
 	struct cgroup_subsys_state *cfile_css;
 	unsigned int efd, cfd;
-	struct file *efile;
-	struct file *cfile;
+	struct fd efile;
+	struct fd cfile;
 	char *endp;
 	int ret;
 
@@ -4058,31 +4059,31 @@
 	init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
 	INIT_WORK(&event->remove, cgroup_event_remove);
 
-	efile = eventfd_fget(efd);
-	if (IS_ERR(efile)) {
-		ret = PTR_ERR(efile);
+	efile = fdget(efd);
+	if (!efile.file) {
+		ret = -EBADF;
 		goto out_kfree;
 	}
 
-	event->eventfd = eventfd_ctx_fileget(efile);
+	event->eventfd = eventfd_ctx_fileget(efile.file);
 	if (IS_ERR(event->eventfd)) {
 		ret = PTR_ERR(event->eventfd);
 		goto out_put_efile;
 	}
 
-	cfile = fget(cfd);
-	if (!cfile) {
+	cfile = fdget(cfd);
+	if (!cfile.file) {
 		ret = -EBADF;
 		goto out_put_eventfd;
 	}
 
 	/* the process need read permission on control file */
 	/* AV: shouldn't we check that it's been opened for read instead? */
-	ret = inode_permission(file_inode(cfile), MAY_READ);
+	ret = inode_permission(file_inode(cfile.file), MAY_READ);
 	if (ret < 0)
 		goto out_put_cfile;
 
-	event->cft = __file_cft(cfile);
+	event->cft = __file_cft(cfile.file);
 	if (IS_ERR(event->cft)) {
 		ret = PTR_ERR(event->cft);
 		goto out_put_cfile;
@@ -4103,7 +4104,7 @@
 
 	ret = -EINVAL;
 	event->css = cgroup_css(cgrp, event->cft->ss);
-	cfile_css = css_from_dir(cfile->f_dentry->d_parent, event->cft->ss);
+	cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, event->cft->ss);
 	if (event->css && event->css == cfile_css && css_tryget(event->css))
 		ret = 0;
 
@@ -4121,25 +4122,25 @@
 	if (ret)
 		goto out_put_css;
 
-	efile->f_op->poll(efile, &event->pt);
+	efile.file->f_op->poll(efile.file, &event->pt);
 
 	spin_lock(&cgrp->event_list_lock);
 	list_add(&event->list, &cgrp->event_list);
 	spin_unlock(&cgrp->event_list_lock);
 
-	fput(cfile);
-	fput(efile);
+	fdput(cfile);
+	fdput(efile);
 
 	return 0;
 
 out_put_css:
 	css_put(event->css);
 out_put_cfile:
-	fput(cfile);
+	fdput(cfile);
 out_put_eventfd:
 	eventfd_ctx_put(event->eventfd);
 out_put_efile:
-	fput(efile);
+	fdput(efile);
 out_kfree:
 	kfree(event);
 
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 33eb462..b02a339 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -122,7 +122,7 @@
 	STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_sched", &rcu_sched_lock_key);
 EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
 
-int debug_lockdep_rcu_enabled(void)
+int notrace debug_lockdep_rcu_enabled(void)
 {
 	return rcu_scheduler_active && debug_locks &&
 	       current->lockdep_recursion == 0;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a6d098c..03cf44a 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1978,12 +1978,27 @@
 
 void ftrace_modify_all_code(int command)
 {
+	int update = command & FTRACE_UPDATE_TRACE_FUNC;
+
+	/*
+	 * If the ftrace_caller calls a ftrace_ops func directly,
+	 * we need to make sure that it only traces functions it
+	 * expects to trace. When doing the switch of functions,
+	 * we need to update to the ftrace_ops_list_func first
+	 * before the transition between old and new calls are set,
+	 * as the ftrace_ops_list_func will check the ops hashes
+	 * to make sure the ops are having the right functions
+	 * traced.
+	 */
+	if (update)
+		ftrace_update_ftrace_func(ftrace_ops_list_func);
+
 	if (command & FTRACE_UPDATE_CALLS)
 		ftrace_replace_code(1);
 	else if (command & FTRACE_DISABLE_CALLS)
 		ftrace_replace_code(0);
 
-	if (command & FTRACE_UPDATE_TRACE_FUNC)
+	if (update && ftrace_trace_function != ftrace_ops_list_func)
 		ftrace_update_ftrace_func(ftrace_trace_function);
 
 	if (command & FTRACE_START_FUNC_RET)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 496f94d..7974ba2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3166,11 +3166,6 @@
 };
 
 /*
- * Only trace on a CPU if the bitmask is set:
- */
-static cpumask_var_t tracing_cpumask;
-
-/*
  * The tracer itself will not take this lock, but still we want
  * to provide a consistent cpumask to user-space:
  */
@@ -3186,11 +3181,12 @@
 tracing_cpumask_read(struct file *filp, char __user *ubuf,
 		     size_t count, loff_t *ppos)
 {
+	struct trace_array *tr = file_inode(filp)->i_private;
 	int len;
 
 	mutex_lock(&tracing_cpumask_update_lock);
 
-	len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
+	len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
 	if (count - len < 2) {
 		count = -EINVAL;
 		goto out_err;
@@ -3208,7 +3204,7 @@
 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
 		      size_t count, loff_t *ppos)
 {
-	struct trace_array *tr = filp->private_data;
+	struct trace_array *tr = file_inode(filp)->i_private;
 	cpumask_var_t tracing_cpumask_new;
 	int err, cpu;
 
@@ -3228,12 +3224,12 @@
 		 * Increase/decrease the disabled counter if we are
 		 * about to flip a bit in the cpumask:
 		 */
-		if (cpumask_test_cpu(cpu, tracing_cpumask) &&
+		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
 			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
 			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
 		}
-		if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
+		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
 			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
 			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
@@ -3242,7 +3238,7 @@
 	arch_spin_unlock(&ftrace_max_lock);
 	local_irq_enable();
 
-	cpumask_copy(tracing_cpumask, tracing_cpumask_new);
+	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
 
 	mutex_unlock(&tracing_cpumask_update_lock);
 	free_cpumask_var(tracing_cpumask_new);
@@ -3256,9 +3252,10 @@
 }
 
 static const struct file_operations tracing_cpumask_fops = {
-	.open		= tracing_open_generic,
+	.open		= tracing_open_generic_tr,
 	.read		= tracing_cpumask_read,
 	.write		= tracing_cpumask_write,
+	.release	= tracing_release_generic_tr,
 	.llseek		= generic_file_llseek,
 };
 
@@ -5938,6 +5935,11 @@
 	if (!tr->name)
 		goto out_free_tr;
 
+	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
+		goto out_free_tr;
+
+	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
+
 	raw_spin_lock_init(&tr->start_lock);
 
 	tr->current_trace = &nop_trace;
@@ -5969,6 +5971,7 @@
  out_free_tr:
 	if (tr->trace_buffer.buffer)
 		ring_buffer_free(tr->trace_buffer.buffer);
+	free_cpumask_var(tr->tracing_cpumask);
 	kfree(tr->name);
 	kfree(tr);
 
@@ -6098,6 +6101,9 @@
 {
 	int cpu;
 
+	trace_create_file("tracing_cpumask", 0644, d_tracer,
+			  tr, &tracing_cpumask_fops);
+
 	trace_create_file("trace_options", 0644, d_tracer,
 			  tr, &tracing_iter_fops);
 
@@ -6147,9 +6153,6 @@
 
 	init_tracer_debugfs(&global_trace, d_tracer);
 
-	trace_create_file("tracing_cpumask", 0644, d_tracer,
-			&global_trace, &tracing_cpumask_fops);
-
 	trace_create_file("available_tracers", 0444, d_tracer,
 			&global_trace, &show_traces_fops);
 
@@ -6371,7 +6374,7 @@
 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
 		goto out;
 
-	if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
+	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
 		goto out_free_buffer_mask;
 
 	/* Only allocate trace_printk buffers if a trace_printk exists */
@@ -6386,7 +6389,7 @@
 		ring_buf_size = 1;
 
 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
-	cpumask_copy(tracing_cpumask, cpu_all_mask);
+	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
 
 	raw_spin_lock_init(&global_trace.start_lock);
 
@@ -6441,7 +6444,7 @@
 #ifdef CONFIG_TRACER_MAX_TRACE
 	free_percpu(global_trace.max_buffer.data);
 #endif
-	free_cpumask_var(tracing_cpumask);
+	free_cpumask_var(global_trace.tracing_cpumask);
 out_free_buffer_mask:
 	free_cpumask_var(tracing_buffer_mask);
 out:
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index fe39acd..10c86fb 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -206,6 +206,7 @@
 	struct dentry		*event_dir;
 	struct list_head	systems;
 	struct list_head	events;
+	cpumask_var_t		tracing_cpumask; /* only trace on set CPUs */
 	int			ref;
 };
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 29a7ebc..368a4d5 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1489,12 +1489,7 @@
 }
 
 static int
-event_create_dir(struct dentry *parent,
-		 struct ftrace_event_file *file,
-		 const struct file_operations *id,
-		 const struct file_operations *enable,
-		 const struct file_operations *filter,
-		 const struct file_operations *format)
+event_create_dir(struct dentry *parent, struct ftrace_event_file *file)
 {
 	struct ftrace_event_call *call = file->event_call;
 	struct trace_array *tr = file->tr;
@@ -1522,12 +1517,13 @@
 
 	if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
 		trace_create_file("enable", 0644, file->dir, file,
-				  enable);
+				  &ftrace_enable_fops);
 
 #ifdef CONFIG_PERF_EVENTS
 	if (call->event.type && call->class->reg)
 		trace_create_file("id", 0444, file->dir,
-				  (void *)(long)call->event.type, id);
+				  (void *)(long)call->event.type,
+				  &ftrace_event_id_fops);
 #endif
 
 	/*
@@ -1544,10 +1540,10 @@
 		}
 	}
 	trace_create_file("filter", 0644, file->dir, call,
-			  filter);
+			  &ftrace_event_filter_fops);
 
 	trace_create_file("format", 0444, file->dir, call,
-			  format);
+			  &ftrace_event_format_fops);
 
 	return 0;
 }
@@ -1648,12 +1644,7 @@
 
 /* Add an event to a trace directory */
 static int
-__trace_add_new_event(struct ftrace_event_call *call,
-		      struct trace_array *tr,
-		      const struct file_operations *id,
-		      const struct file_operations *enable,
-		      const struct file_operations *filter,
-		      const struct file_operations *format)
+__trace_add_new_event(struct ftrace_event_call *call, struct trace_array *tr)
 {
 	struct ftrace_event_file *file;
 
@@ -1661,7 +1652,7 @@
 	if (!file)
 		return -ENOMEM;
 
-	return event_create_dir(tr->event_dir, file, id, enable, filter, format);
+	return event_create_dir(tr->event_dir, file);
 }
 
 /*
@@ -1683,8 +1674,7 @@
 }
 
 struct ftrace_module_file_ops;
-static void __add_event_to_tracers(struct ftrace_event_call *call,
-				   struct ftrace_module_file_ops *file_ops);
+static void __add_event_to_tracers(struct ftrace_event_call *call);
 
 /* Add an additional event_call dynamically */
 int trace_add_event_call(struct ftrace_event_call *call)
@@ -1695,7 +1685,7 @@
 
 	ret = __register_event(call, NULL);
 	if (ret >= 0)
-		__add_event_to_tracers(call, NULL);
+		__add_event_to_tracers(call);
 
 	mutex_unlock(&event_mutex);
 	mutex_unlock(&trace_types_lock);
@@ -1769,100 +1759,21 @@
 
 #ifdef CONFIG_MODULES
 
-static LIST_HEAD(ftrace_module_file_list);
-
-/*
- * Modules must own their file_operations to keep up with
- * reference counting.
- */
-struct ftrace_module_file_ops {
-	struct list_head		list;
-	struct module			*mod;
-	struct file_operations		id;
-	struct file_operations		enable;
-	struct file_operations		format;
-	struct file_operations		filter;
-};
-
-static struct ftrace_module_file_ops *
-find_ftrace_file_ops(struct ftrace_module_file_ops *file_ops, struct module *mod)
-{
-	/*
-	 * As event_calls are added in groups by module,
-	 * when we find one file_ops, we don't need to search for
-	 * each call in that module, as the rest should be the
-	 * same. Only search for a new one if the last one did
-	 * not match.
-	 */
-	if (file_ops && mod == file_ops->mod)
-		return file_ops;
-
-	list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
-		if (file_ops->mod == mod)
-			return file_ops;
-	}
-	return NULL;
-}
-
-static struct ftrace_module_file_ops *
-trace_create_file_ops(struct module *mod)
-{
-	struct ftrace_module_file_ops *file_ops;
-
-	/*
-	 * This is a bit of a PITA. To allow for correct reference
-	 * counting, modules must "own" their file_operations.
-	 * To do this, we allocate the file operations that will be
-	 * used in the event directory.
-	 */
-
-	file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL);
-	if (!file_ops)
-		return NULL;
-
-	file_ops->mod = mod;
-
-	file_ops->id = ftrace_event_id_fops;
-	file_ops->id.owner = mod;
-
-	file_ops->enable = ftrace_enable_fops;
-	file_ops->enable.owner = mod;
-
-	file_ops->filter = ftrace_event_filter_fops;
-	file_ops->filter.owner = mod;
-
-	file_ops->format = ftrace_event_format_fops;
-	file_ops->format.owner = mod;
-
-	list_add(&file_ops->list, &ftrace_module_file_list);
-
-	return file_ops;
-}
-
 static void trace_module_add_events(struct module *mod)
 {
-	struct ftrace_module_file_ops *file_ops = NULL;
 	struct ftrace_event_call **call, **start, **end;
 
 	start = mod->trace_events;
 	end = mod->trace_events + mod->num_trace_events;
 
-	if (start == end)
-		return;
-
-	file_ops = trace_create_file_ops(mod);
-	if (!file_ops)
-		return;
-
 	for_each_event(call, start, end) {
 		__register_event(*call, mod);
-		__add_event_to_tracers(*call, file_ops);
+		__add_event_to_tracers(*call);
 	}
 }
 
 static void trace_module_remove_events(struct module *mod)
 {
-	struct ftrace_module_file_ops *file_ops;
 	struct ftrace_event_call *call, *p;
 	bool clear_trace = false;
 
@@ -1874,16 +1785,6 @@
 			__trace_remove_event_call(call);
 		}
 	}
-
-	/* Now free the file_operations */
-	list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
-		if (file_ops->mod == mod)
-			break;
-	}
-	if (&file_ops->list != &ftrace_module_file_list) {
-		list_del(&file_ops->list);
-		kfree(file_ops);
-	}
 	up_write(&trace_event_sem);
 
 	/*
@@ -1919,67 +1820,21 @@
 	return 0;
 }
 
-static int
-__trace_add_new_mod_event(struct ftrace_event_call *call,
-			  struct trace_array *tr,
-			  struct ftrace_module_file_ops *file_ops)
-{
-	return __trace_add_new_event(call, tr,
-				     &file_ops->id, &file_ops->enable,
-				     &file_ops->filter, &file_ops->format);
-}
-
-#else
-static inline struct ftrace_module_file_ops *
-find_ftrace_file_ops(struct ftrace_module_file_ops *file_ops, struct module *mod)
-{
-	return NULL;
-}
-static inline int trace_module_notify(struct notifier_block *self,
-				      unsigned long val, void *data)
-{
-	return 0;
-}
-static inline int
-__trace_add_new_mod_event(struct ftrace_event_call *call,
-			  struct trace_array *tr,
-			  struct ftrace_module_file_ops *file_ops)
-{
-	return -ENODEV;
-}
+static struct notifier_block trace_module_nb = {
+	.notifier_call = trace_module_notify,
+	.priority = 0,
+};
 #endif /* CONFIG_MODULES */
 
 /* Create a new event directory structure for a trace directory. */
 static void
 __trace_add_event_dirs(struct trace_array *tr)
 {
-	struct ftrace_module_file_ops *file_ops = NULL;
 	struct ftrace_event_call *call;
 	int ret;
 
 	list_for_each_entry(call, &ftrace_events, list) {
-		if (call->mod) {
-			/*
-			 * Directories for events by modules need to
-			 * keep module ref counts when opened (as we don't
-			 * want the module to disappear when reading one
-			 * of these files). The file_ops keep account of
-			 * the module ref count.
-			 */
-			file_ops = find_ftrace_file_ops(file_ops, call->mod);
-			if (!file_ops)
-				continue; /* Warn? */
-			ret = __trace_add_new_mod_event(call, tr, file_ops);
-			if (ret < 0)
-				pr_warning("Could not create directory for event %s\n",
-					   call->name);
-			continue;
-		}
-		ret = __trace_add_new_event(call, tr,
-					    &ftrace_event_id_fops,
-					    &ftrace_enable_fops,
-					    &ftrace_event_filter_fops,
-					    &ftrace_event_format_fops);
+		ret = __trace_add_new_event(call, tr);
 		if (ret < 0)
 			pr_warning("Could not create directory for event %s\n",
 				   call->name);
@@ -2287,11 +2142,7 @@
 
 
 	list_for_each_entry(file, &tr->events, list) {
-		ret = event_create_dir(tr->event_dir, file,
-				       &ftrace_event_id_fops,
-				       &ftrace_enable_fops,
-				       &ftrace_event_filter_fops,
-				       &ftrace_event_format_fops);
+		ret = event_create_dir(tr->event_dir, file);
 		if (ret < 0)
 			pr_warning("Could not create directory for event %s\n",
 				   file->event_call->name);
@@ -2332,29 +2183,14 @@
 		remove_event_file_dir(file);
 }
 
-static void
-__add_event_to_tracers(struct ftrace_event_call *call,
-		       struct ftrace_module_file_ops *file_ops)
+static void __add_event_to_tracers(struct ftrace_event_call *call)
 {
 	struct trace_array *tr;
 
-	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
-		if (file_ops)
-			__trace_add_new_mod_event(call, tr, file_ops);
-		else
-			__trace_add_new_event(call, tr,
-					      &ftrace_event_id_fops,
-					      &ftrace_enable_fops,
-					      &ftrace_event_filter_fops,
-					      &ftrace_event_format_fops);
-	}
+	list_for_each_entry(tr, &ftrace_trace_arrays, list)
+		__trace_add_new_event(call, tr);
 }
 
-static struct notifier_block trace_module_nb = {
-	.notifier_call = trace_module_notify,
-	.priority = 0,
-};
-
 extern struct ftrace_event_call *__start_ftrace_events[];
 extern struct ftrace_event_call *__stop_ftrace_events[];
 
@@ -2559,10 +2395,11 @@
 	if (ret)
 		return ret;
 
+#ifdef CONFIG_MODULES
 	ret = register_module_notifier(&trace_module_nb);
 	if (ret)
 		pr_warning("Failed to register trace events module notifier\n");
-
+#endif
 	return 0;
 }
 early_initcall(event_trace_memsetup);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 8fd0365..559329d 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -200,8 +200,8 @@
 		#type, #name, offsetof(typeof(trace), name),		\
 		sizeof(trace.name), is_signed_type(type)
 
-static
-int  __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
+static int __init
+__set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
 {
 	int i;
 	int pos = 0;
@@ -228,7 +228,7 @@
 	return pos;
 }
 
-static int set_syscall_print_fmt(struct ftrace_event_call *call)
+static int __init set_syscall_print_fmt(struct ftrace_event_call *call)
 {
 	char *print_fmt;
 	int len;
@@ -253,7 +253,7 @@
 	return 0;
 }
 
-static void free_syscall_print_fmt(struct ftrace_event_call *call)
+static void __init free_syscall_print_fmt(struct ftrace_event_call *call)
 {
 	struct syscall_metadata *entry = call->data;
 
@@ -459,7 +459,7 @@
 	mutex_unlock(&syscall_trace_lock);
 }
 
-static int init_syscall_trace(struct ftrace_event_call *call)
+static int __init init_syscall_trace(struct ftrace_event_call *call)
 {
 	int id;
 	int num;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 444e1c1..652bea9 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -908,7 +908,7 @@
 	bool
 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
 	select STACKTRACE
-	select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE
+	select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC
 	select KALLSYMS
 	select KALLSYMS_ALL
 
@@ -1366,7 +1366,7 @@
 	depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
 	depends on !X86_64
 	select STACKTRACE
-	select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND
+	select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC
 	help
 	  Provide stacktrace filter for fault-injection capabilities
 
@@ -1376,7 +1376,7 @@
 	depends on DEBUG_KERNEL
 	depends on STACKTRACE_SUPPORT
 	depends on PROC_FS
-	select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND
+	select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC
 	select KALLSYMS
 	select KALLSYMS_ALL
 	select STACKTRACE
diff --git a/lib/div64.c b/lib/div64.c
index a163b6c..4382ad7 100644
--- a/lib/div64.c
+++ b/lib/div64.c
@@ -79,6 +79,46 @@
 #endif
 
 /**
+ * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder
+ * @dividend:	64bit dividend
+ * @divisor:	64bit divisor
+ * @remainder:  64bit remainder
+ *
+ * This implementation is a comparable to algorithm used by div64_u64.
+ * But this operation, which includes math for calculating the remainder,
+ * is kept distinct to avoid slowing down the div64_u64 operation on 32bit
+ * systems.
+ */
+#ifndef div64_u64_rem
+u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder)
+{
+	u32 high = divisor >> 32;
+	u64 quot;
+
+	if (high == 0) {
+		u32 rem32;
+		quot = div_u64_rem(dividend, divisor, &rem32);
+		*remainder = rem32;
+	} else {
+		int n = 1 + fls(high);
+		quot = div_u64(dividend >> n, divisor >> n);
+
+		if (quot != 0)
+			quot--;
+
+		*remainder = dividend - quot * divisor;
+		if (*remainder >= divisor) {
+			quot++;
+			*remainder -= divisor;
+		}
+	}
+
+	return quot;
+}
+EXPORT_SYMBOL(div64_u64_rem);
+#endif
+
+/**
  * div64_u64 - unsigned 64bit divide with 64bit divisor
  * @dividend:	64bit dividend
  * @divisor:	64bit divisor
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index b462578..c7dab06 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -6,6 +6,7 @@
 raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o
 raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
+raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
 
 hostprogs-y	+= mktables
 
@@ -110,6 +111,11 @@
 $(obj)/neon8.c:   $(src)/neon.uc $(src)/unroll.awk FORCE
 	$(call if_changed,unroll)
 
+targets += tilegx8.c
+$(obj)/tilegx8.c:   UNROLL := 8
+$(obj)/tilegx8.c:   $(src)/tilegx.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
 quiet_cmd_mktable = TABLE   $@
       cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 )
 
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 74e6f56..f0b1aa3 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -66,6 +66,9 @@
 	&raid6_altivec4,
 	&raid6_altivec8,
 #endif
+#if defined(CONFIG_TILEGX)
+	&raid6_tilegx8,
+#endif
 	&raid6_intx1,
 	&raid6_intx2,
 	&raid6_intx4,
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 28afa1a..29090f3 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -40,13 +40,16 @@
         OBJS   += neon.o neon1.o neon2.o neon4.o neon8.o
         CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
 else
-        HAS_ALTIVEC := $(shell echo -e '\#include <altivec.h>\nvector int a;' |\
+        HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
                          gcc -c -x c - >&/dev/null && \
                          rm ./-.o && echo yes)
         ifeq ($(HAS_ALTIVEC),yes)
                 OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
         endif
 endif
+ifeq ($(ARCH),tilegx)
+OBJS += tilegx8.o
+endif
 
 .c.o:
 	$(CC) $(CFLAGS) -c -o $@ $<
@@ -109,11 +112,15 @@
 int32.c: int.uc ../unroll.awk
 	$(AWK) ../unroll.awk -vN=32 < int.uc > $@
 
+tilegx8.c: tilegx.uc ../unroll.awk
+	$(AWK) ../unroll.awk -vN=8 < tilegx.uc > $@
+
 tables.c: mktables
 	./mktables > tables.c
 
 clean:
 	rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test
+	rm -f tilegx*.c
 
 spotless: clean
 	rm -f *~
diff --git a/lib/raid6/tilegx.uc b/lib/raid6/tilegx.uc
new file mode 100644
index 0000000..e7c2945
--- /dev/null
+++ b/lib/raid6/tilegx.uc
@@ -0,0 +1,86 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *   Copyright 2012 Tilera Corporation - All Rights Reserved
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ *   Boston MA 02111-1307, USA; either version 2 of the License, or
+ *   (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * tilegx$#.c
+ *
+ * $#-way unrolled TILE-Gx SIMD for RAID-6 math.
+ *
+ * This file is postprocessed using unroll.awk.
+ *
+ */
+
+#include <linux/raid/pq.h>
+
+/* Create 8 byte copies of constant byte */
+# define NBYTES(x) (__insn_v1addi(0, x))
+# define NSIZE  8
+
+/*
+ * The SHLBYTE() operation shifts each byte left by 1, *not*
+ * rolling over into the next byte
+ */
+static inline __attribute_const__ u64 SHLBYTE(u64 v)
+{
+	/* Vector One Byte Shift Left Immediate. */
+	return __insn_v1shli(v, 1);
+}
+
+/*
+ * The MASK() operation returns 0xFF in any byte for which the high
+ * bit is 1, 0x00 for any byte for which the high bit is 0.
+ */
+static inline __attribute_const__ u64 MASK(u64 v)
+{
+	/* Vector One Byte Shift Right Signed Immediate. */
+	return __insn_v1shrsi(v, 7);
+}
+
+
+void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	u8 **dptr = (u8 **)ptrs;
+	u64 *p, *q;
+	int d, z, z0;
+
+	u64 wd$$, wq$$, wp$$, w1$$, w2$$;
+	u64 x1d = NBYTES(0x1d);
+	u64 * z0ptr;
+
+	z0 = disks - 3;			/* Highest data disk */
+	p = (u64 *)dptr[z0+1];	/* XOR parity */
+	q = (u64 *)dptr[z0+2];	/* RS syndrome */
+
+	z0ptr = (u64 *)&dptr[z0][0];
+	for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
+		wq$$ = wp$$ = *z0ptr++;
+		for ( z = z0-1 ; z >= 0 ; z-- ) {
+			wd$$ = *(u64 *)&dptr[z][d+$$*NSIZE];
+			wp$$ = wp$$ ^ wd$$;
+			w2$$ = MASK(wq$$);
+			w1$$ = SHLBYTE(wq$$);
+			w2$$ = w2$$ & x1d;
+			w1$$ = w1$$ ^ w2$$;
+			wq$$ = w1$$ ^ wd$$;
+		}
+		*p++ = wp$$;
+		*q++ = wq$$;
+	}
+}
+
+const struct raid6_calls raid6_tilegx$# = {
+	raid6_tilegx$#_gen_syndrome,
+	NULL,
+	"tilegx$#",
+	0
+};
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 3be308e..4a5df7b 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -290,7 +290,7 @@
 	if (ceph_msgr_slab_init())
 		return -ENOMEM;
 
-	ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0);
+	ceph_msgr_wq = alloc_workqueue("ceph-msgr", 0, 0);
 	if (ceph_msgr_wq)
 		return 0;
 
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index dd47889..1606f74 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -503,7 +503,9 @@
 	struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
 	size_t payload_len = 0;
 
-	BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE);
+	BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
+	       opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO &&
+	       opcode != CEPH_OSD_OP_TRUNCATE);
 
 	op->extent.offset = offset;
 	op->extent.length = length;
@@ -631,6 +633,9 @@
 		break;
 	case CEPH_OSD_OP_READ:
 	case CEPH_OSD_OP_WRITE:
+	case CEPH_OSD_OP_ZERO:
+	case CEPH_OSD_OP_DELETE:
+	case CEPH_OSD_OP_TRUNCATE:
 		if (src->op == CEPH_OSD_OP_WRITE)
 			request_data_len = src->extent.length;
 		dst->extent.offset = cpu_to_le64(src->extent.offset);
@@ -715,7 +720,9 @@
 	u64 object_base;
 	int r;
 
-	BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE);
+	BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
+	       opcode != CEPH_OSD_OP_DELETE && opcode != CEPH_OSD_OP_ZERO &&
+	       opcode != CEPH_OSD_OP_TRUNCATE);
 
 	req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
 					GFP_NOFS);
@@ -1488,14 +1495,14 @@
 	dout("handle_reply %p tid %llu req %p result %d\n", msg, tid,
 	     req, result);
 
-	ceph_decode_need(&p, end, 4, bad);
+	ceph_decode_need(&p, end, 4, bad_put);
 	numops = ceph_decode_32(&p);
 	if (numops > CEPH_OSD_MAX_OP)
 		goto bad_put;
 	if (numops != req->r_num_ops)
 		goto bad_put;
 	payload_len = 0;
-	ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad);
+	ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad_put);
 	for (i = 0; i < numops; i++) {
 		struct ceph_osd_op *op = p;
 		int len;
@@ -1513,7 +1520,7 @@
 		goto bad_put;
 	}
 
-	ceph_decode_need(&p, end, 4 + numops * 4, bad);
+	ceph_decode_need(&p, end, 4 + numops * 4, bad_put);
 	retry_attempt = ceph_decode_32(&p);
 	for (i = 0; i < numops; i++)
 		req->r_reply_op_result[i] = ceph_decode_32(&p);
@@ -1786,6 +1793,8 @@
 		nr_maps--;
 	}
 
+	if (!osdc->osdmap)
+		goto bad;
 done:
 	downgrade_write(&osdc->map_sem);
 	ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
@@ -2129,6 +2138,8 @@
 			dout("osdc_start_request failed map, "
 				" will retry %lld\n", req->r_tid);
 			rc = 0;
+		} else {
+			__unregister_request(osdc, req);
 		}
 		goto out_unlock;
 	}
@@ -2253,12 +2264,10 @@
 	if (err < 0)
 		goto out_msgpool;
 
+	err = -ENOMEM;
 	osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify");
-	if (IS_ERR(osdc->notify_wq)) {
-		err = PTR_ERR(osdc->notify_wq);
-		osdc->notify_wq = NULL;
+	if (!osdc->notify_wq)
 		goto out_msgpool;
-	}
 	return 0;
 
 out_msgpool:
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 603ddd9..dbd9a47 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1129,7 +1129,7 @@
 
 	/* pg_temp? */
 	pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num,
-				    pool->pgp_num_mask);
+				    pool->pg_num_mask);
 	pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
 	if (pg) {
 		*num = pg->len;
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index ed2fdd2..4151590 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -250,11 +250,11 @@
 EXPORT_SYMBOL_GPL(rpcauth_list_flavors);
 
 struct rpc_auth *
-rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
+rpcauth_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 {
 	struct rpc_auth		*auth;
 	const struct rpc_authops *ops;
-	u32			flavor = pseudoflavor_to_flavor(pseudoflavor);
+	u32			flavor = pseudoflavor_to_flavor(args->pseudoflavor);
 
 	auth = ERR_PTR(-EINVAL);
 	if (flavor >= RPC_AUTH_MAXFLAVOR)
@@ -269,7 +269,7 @@
 		goto out;
 	}
 	spin_unlock(&rpc_authflavor_lock);
-	auth = ops->create(clnt, pseudoflavor);
+	auth = ops->create(args, clnt);
 	module_put(ops->owner);
 	if (IS_ERR(auth))
 		return auth;
@@ -343,6 +343,27 @@
 EXPORT_SYMBOL_GPL(rpcauth_init_credcache);
 
 /*
+ * Setup a credential key lifetime timeout notification
+ */
+int
+rpcauth_key_timeout_notify(struct rpc_auth *auth, struct rpc_cred *cred)
+{
+	if (!cred->cr_auth->au_ops->key_timeout)
+		return 0;
+	return cred->cr_auth->au_ops->key_timeout(auth, cred);
+}
+EXPORT_SYMBOL_GPL(rpcauth_key_timeout_notify);
+
+bool
+rpcauth_cred_key_to_expire(struct rpc_cred *cred)
+{
+	if (!cred->cr_ops->crkey_to_expire)
+		return false;
+	return cred->cr_ops->crkey_to_expire(cred);
+}
+EXPORT_SYMBOL_GPL(rpcauth_cred_key_to_expire);
+
+/*
  * Destroy a list of credentials
  */
 static inline
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index b6badaf..f6d84be 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -89,6 +89,7 @@
 	gcred->acred.uid = acred->uid;
 	gcred->acred.gid = acred->gid;
 	gcred->acred.group_info = acred->group_info;
+	gcred->acred.ac_flags = 0;
 	if (gcred->acred.group_info != NULL)
 		get_group_info(gcred->acred.group_info);
 	gcred->acred.machine_cred = acred->machine_cred;
@@ -182,11 +183,78 @@
 	rpcauth_destroy_credcache(&generic_auth);
 }
 
+/*
+ * Test the the current time (now) against the underlying credential key expiry
+ * minus a timeout and setup notification.
+ *
+ * The normal case:
+ * If 'now' is before the key expiry minus RPC_KEY_EXPIRE_TIMEO, set
+ * the RPC_CRED_NOTIFY_TIMEOUT flag to setup the underlying credential
+ * rpc_credops crmatch routine to notify this generic cred when it's key
+ * expiration is within RPC_KEY_EXPIRE_TIMEO, and return 0.
+ *
+ * The error case:
+ * If the underlying cred lookup fails, return -EACCES.
+ *
+ * The 'almost' error case:
+ * If 'now' is within key expiry minus RPC_KEY_EXPIRE_TIMEO, but not within
+ * key expiry minus RPC_KEY_EXPIRE_FAIL, set the RPC_CRED_EXPIRE_SOON bit
+ * on the acred ac_flags and return 0.
+ */
+static int
+generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred)
+{
+	struct auth_cred *acred = &container_of(cred, struct generic_cred,
+						gc_base)->acred;
+	struct rpc_cred *tcred;
+	int ret = 0;
+
+
+	/* Fast track for non crkey_timeout (no key) underlying credentials */
+	if (test_bit(RPC_CRED_NO_CRKEY_TIMEOUT, &acred->ac_flags))
+		return 0;
+
+	/* Fast track for the normal case */
+	if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags))
+		return 0;
+
+	/* lookup_cred either returns a valid referenced rpc_cred, or PTR_ERR */
+	tcred = auth->au_ops->lookup_cred(auth, acred, 0);
+	if (IS_ERR(tcred))
+		return -EACCES;
+
+	if (!tcred->cr_ops->crkey_timeout) {
+		set_bit(RPC_CRED_NO_CRKEY_TIMEOUT, &acred->ac_flags);
+		ret = 0;
+		goto out_put;
+	}
+
+	/* Test for the almost error case */
+	ret = tcred->cr_ops->crkey_timeout(tcred);
+	if (ret != 0) {
+		set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
+		ret = 0;
+	} else {
+		/* In case underlying cred key has been reset */
+		if (test_and_clear_bit(RPC_CRED_KEY_EXPIRE_SOON,
+					&acred->ac_flags))
+			dprintk("RPC:        UID %d Credential key reset\n",
+				tcred->cr_uid);
+		/* set up fasttrack for the normal case */
+		set_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags);
+	}
+
+out_put:
+	put_rpccred(tcred);
+	return ret;
+}
+
 static const struct rpc_authops generic_auth_ops = {
 	.owner = THIS_MODULE,
 	.au_name = "Generic",
 	.lookup_cred = generic_lookup_cred,
 	.crcreate = generic_create_cred,
+	.key_timeout = generic_key_timeout,
 };
 
 static struct rpc_auth generic_auth = {
@@ -194,9 +262,23 @@
 	.au_count = ATOMIC_INIT(0),
 };
 
+static bool generic_key_to_expire(struct rpc_cred *cred)
+{
+	struct auth_cred *acred = &container_of(cred, struct generic_cred,
+						gc_base)->acred;
+	bool ret;
+
+	get_rpccred(cred);
+	ret = test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
+	put_rpccred(cred);
+
+	return ret;
+}
+
 static const struct rpc_credops generic_credops = {
 	.cr_name = "Generic cred",
 	.crdestroy = generic_destroy_cred,
 	.crbind = generic_bind_cred,
 	.crmatch = generic_match,
+	.crkey_to_expire = generic_key_to_expire,
 };
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index fc2f78d..30eb502 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -51,6 +51,7 @@
 #include <linux/sunrpc/rpc_pipe_fs.h>
 #include <linux/sunrpc/gss_api.h>
 #include <asm/uaccess.h>
+#include <linux/hashtable.h>
 
 #include "../netns.h"
 
@@ -62,6 +63,9 @@
 #define GSS_RETRY_EXPIRED 5
 static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED;
 
+#define GSS_KEY_EXPIRE_TIMEO 240
+static unsigned int gss_key_expire_timeo = GSS_KEY_EXPIRE_TIMEO;
+
 #ifdef RPC_DEBUG
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
@@ -71,19 +75,33 @@
  * using integrity (two 4-byte integers): */
 #define GSS_VERF_SLACK		100
 
+static DEFINE_HASHTABLE(gss_auth_hash_table, 16);
+static DEFINE_SPINLOCK(gss_auth_hash_lock);
+
+struct gss_pipe {
+	struct rpc_pipe_dir_object pdo;
+	struct rpc_pipe *pipe;
+	struct rpc_clnt *clnt;
+	const char *name;
+	struct kref kref;
+};
+
 struct gss_auth {
 	struct kref kref;
+	struct hlist_node hash;
 	struct rpc_auth rpc_auth;
 	struct gss_api_mech *mech;
 	enum rpc_gss_svc service;
 	struct rpc_clnt *client;
+	struct net *net;
 	/*
 	 * There are two upcall pipes; dentry[1], named "gssd", is used
 	 * for the new text-based upcall; dentry[0] is named after the
 	 * mechanism (for example, "krb5") and exists for
 	 * backwards-compatibility with older gssd's.
 	 */
-	struct rpc_pipe *pipe[2];
+	struct gss_pipe *gss_pipe[2];
+	const char *target_name;
 };
 
 /* pipe_version >= 0 if and only if someone has a pipe open. */
@@ -294,7 +312,7 @@
 static void
 gss_release_msg(struct gss_upcall_msg *gss_msg)
 {
-	struct net *net = rpc_net_ns(gss_msg->auth->client);
+	struct net *net = gss_msg->auth->net;
 	if (!atomic_dec_and_test(&gss_msg->count))
 		return;
 	put_pipe_version(net);
@@ -406,8 +424,8 @@
 }
 
 static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
-				struct rpc_clnt *clnt,
-				const char *service_name)
+				const char *service_name,
+				const char *target_name)
 {
 	struct gss_api_mech *mech = gss_msg->auth->mech;
 	char *p = gss_msg->databuf;
@@ -417,8 +435,8 @@
 				   mech->gm_name,
 				   from_kuid(&init_user_ns, gss_msg->uid));
 	p += gss_msg->msg.len;
-	if (clnt->cl_principal) {
-		len = sprintf(p, "target=%s ", clnt->cl_principal);
+	if (target_name) {
+		len = sprintf(p, "target=%s ", target_name);
 		p += len;
 		gss_msg->msg.len += len;
 	}
@@ -439,21 +457,8 @@
 	BUG_ON(gss_msg->msg.len > UPCALL_BUF_LEN);
 }
 
-static void gss_encode_msg(struct gss_upcall_msg *gss_msg,
-				struct rpc_clnt *clnt,
-				const char *service_name)
-{
-	struct net *net = rpc_net_ns(clnt);
-	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
-
-	if (sn->pipe_version == 0)
-		gss_encode_v0_msg(gss_msg);
-	else /* pipe_version == 1 */
-		gss_encode_v1_msg(gss_msg, clnt, service_name);
-}
-
 static struct gss_upcall_msg *
-gss_alloc_msg(struct gss_auth *gss_auth, struct rpc_clnt *clnt,
+gss_alloc_msg(struct gss_auth *gss_auth,
 		kuid_t uid, const char *service_name)
 {
 	struct gss_upcall_msg *gss_msg;
@@ -462,31 +467,36 @@
 	gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS);
 	if (gss_msg == NULL)
 		return ERR_PTR(-ENOMEM);
-	vers = get_pipe_version(rpc_net_ns(clnt));
+	vers = get_pipe_version(gss_auth->net);
 	if (vers < 0) {
 		kfree(gss_msg);
 		return ERR_PTR(vers);
 	}
-	gss_msg->pipe = gss_auth->pipe[vers];
+	gss_msg->pipe = gss_auth->gss_pipe[vers]->pipe;
 	INIT_LIST_HEAD(&gss_msg->list);
 	rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq");
 	init_waitqueue_head(&gss_msg->waitqueue);
 	atomic_set(&gss_msg->count, 1);
 	gss_msg->uid = uid;
 	gss_msg->auth = gss_auth;
-	gss_encode_msg(gss_msg, clnt, service_name);
+	switch (vers) {
+	case 0:
+		gss_encode_v0_msg(gss_msg);
+	default:
+		gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name);
+	};
 	return gss_msg;
 }
 
 static struct gss_upcall_msg *
-gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cred *cred)
+gss_setup_upcall(struct gss_auth *gss_auth, struct rpc_cred *cred)
 {
 	struct gss_cred *gss_cred = container_of(cred,
 			struct gss_cred, gc_base);
 	struct gss_upcall_msg *gss_new, *gss_msg;
 	kuid_t uid = cred->cr_uid;
 
-	gss_new = gss_alloc_msg(gss_auth, clnt, uid, gss_cred->gc_principal);
+	gss_new = gss_alloc_msg(gss_auth, uid, gss_cred->gc_principal);
 	if (IS_ERR(gss_new))
 		return gss_new;
 	gss_msg = gss_add_msg(gss_new);
@@ -527,7 +537,7 @@
 
 	dprintk("RPC: %5u %s for uid %u\n",
 		task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_uid));
-	gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred);
+	gss_msg = gss_setup_upcall(gss_auth, cred);
 	if (PTR_ERR(gss_msg) == -EAGAIN) {
 		/* XXX: warning on the first, under the assumption we
 		 * shouldn't normally hit this case on a refresh. */
@@ -566,7 +576,7 @@
 static inline int
 gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
 {
-	struct net *net = rpc_net_ns(gss_auth->client);
+	struct net *net = gss_auth->net;
 	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 	struct rpc_pipe *pipe;
 	struct rpc_cred *cred = &gss_cred->gc_base;
@@ -583,7 +593,7 @@
 	timeout = 15 * HZ;
 	if (!sn->gssd_running)
 		timeout = HZ >> 2;
-	gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred);
+	gss_msg = gss_setup_upcall(gss_auth, cred);
 	if (PTR_ERR(gss_msg) == -EAGAIN) {
 		err = wait_event_interruptible_timeout(pipe_version_waitqueue,
 				sn->pipe_version >= 0, timeout);
@@ -797,83 +807,153 @@
 	}
 }
 
-static void gss_pipes_dentries_destroy(struct rpc_auth *auth)
+static void gss_pipe_dentry_destroy(struct dentry *dir,
+		struct rpc_pipe_dir_object *pdo)
 {
-	struct gss_auth *gss_auth;
+	struct gss_pipe *gss_pipe = pdo->pdo_data;
+	struct rpc_pipe *pipe = gss_pipe->pipe;
 
-	gss_auth = container_of(auth, struct gss_auth, rpc_auth);
-	if (gss_auth->pipe[0]->dentry)
-		rpc_unlink(gss_auth->pipe[0]->dentry);
-	if (gss_auth->pipe[1]->dentry)
-		rpc_unlink(gss_auth->pipe[1]->dentry);
+	if (pipe->dentry != NULL) {
+		rpc_unlink(pipe->dentry);
+		pipe->dentry = NULL;
+	}
 }
 
-static int gss_pipes_dentries_create(struct rpc_auth *auth)
+static int gss_pipe_dentry_create(struct dentry *dir,
+		struct rpc_pipe_dir_object *pdo)
 {
-	int err;
-	struct gss_auth *gss_auth;
-	struct rpc_clnt *clnt;
+	struct gss_pipe *p = pdo->pdo_data;
+	struct dentry *dentry;
 
-	gss_auth = container_of(auth, struct gss_auth, rpc_auth);
-	clnt = gss_auth->client;
-
-	gss_auth->pipe[1]->dentry = rpc_mkpipe_dentry(clnt->cl_dentry,
-						      "gssd",
-						      clnt, gss_auth->pipe[1]);
-	if (IS_ERR(gss_auth->pipe[1]->dentry))
-		return PTR_ERR(gss_auth->pipe[1]->dentry);
-	gss_auth->pipe[0]->dentry = rpc_mkpipe_dentry(clnt->cl_dentry,
-						      gss_auth->mech->gm_name,
-						      clnt, gss_auth->pipe[0]);
-	if (IS_ERR(gss_auth->pipe[0]->dentry)) {
-		err = PTR_ERR(gss_auth->pipe[0]->dentry);
-		goto err_unlink_pipe_1;
-	}
+	dentry = rpc_mkpipe_dentry(dir, p->name, p->clnt, p->pipe);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+	p->pipe->dentry = dentry;
 	return 0;
-
-err_unlink_pipe_1:
-	rpc_unlink(gss_auth->pipe[1]->dentry);
-	return err;
 }
 
-static void gss_pipes_dentries_destroy_net(struct rpc_clnt *clnt,
-					   struct rpc_auth *auth)
-{
-	struct net *net = rpc_net_ns(clnt);
-	struct super_block *sb;
+static const struct rpc_pipe_dir_object_ops gss_pipe_dir_object_ops = {
+	.create = gss_pipe_dentry_create,
+	.destroy = gss_pipe_dentry_destroy,
+};
 
-	sb = rpc_get_sb_net(net);
-	if (sb) {
-		if (clnt->cl_dentry)
-			gss_pipes_dentries_destroy(auth);
-		rpc_put_sb_net(net);
+static struct gss_pipe *gss_pipe_alloc(struct rpc_clnt *clnt,
+		const char *name,
+		const struct rpc_pipe_ops *upcall_ops)
+{
+	struct gss_pipe *p;
+	int err = -ENOMEM;
+
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (p == NULL)
+		goto err;
+	p->pipe = rpc_mkpipe_data(upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
+	if (IS_ERR(p->pipe)) {
+		err = PTR_ERR(p->pipe);
+		goto err_free_gss_pipe;
 	}
+	p->name = name;
+	p->clnt = clnt;
+	kref_init(&p->kref);
+	rpc_init_pipe_dir_object(&p->pdo,
+			&gss_pipe_dir_object_ops,
+			p);
+	return p;
+err_free_gss_pipe:
+	kfree(p);
+err:
+	return ERR_PTR(err);
 }
 
-static int gss_pipes_dentries_create_net(struct rpc_clnt *clnt,
-					 struct rpc_auth *auth)
+struct gss_alloc_pdo {
+	struct rpc_clnt *clnt;
+	const char *name;
+	const struct rpc_pipe_ops *upcall_ops;
+};
+
+static int gss_pipe_match_pdo(struct rpc_pipe_dir_object *pdo, void *data)
+{
+	struct gss_pipe *gss_pipe;
+	struct gss_alloc_pdo *args = data;
+
+	if (pdo->pdo_ops != &gss_pipe_dir_object_ops)
+		return 0;
+	gss_pipe = container_of(pdo, struct gss_pipe, pdo);
+	if (strcmp(gss_pipe->name, args->name) != 0)
+		return 0;
+	if (!kref_get_unless_zero(&gss_pipe->kref))
+		return 0;
+	return 1;
+}
+
+static struct rpc_pipe_dir_object *gss_pipe_alloc_pdo(void *data)
+{
+	struct gss_pipe *gss_pipe;
+	struct gss_alloc_pdo *args = data;
+
+	gss_pipe = gss_pipe_alloc(args->clnt, args->name, args->upcall_ops);
+	if (!IS_ERR(gss_pipe))
+		return &gss_pipe->pdo;
+	return NULL;
+}
+
+static struct gss_pipe *gss_pipe_get(struct rpc_clnt *clnt,
+		const char *name,
+		const struct rpc_pipe_ops *upcall_ops)
 {
 	struct net *net = rpc_net_ns(clnt);
-	struct super_block *sb;
-	int err = 0;
+	struct rpc_pipe_dir_object *pdo;
+	struct gss_alloc_pdo args = {
+		.clnt = clnt,
+		.name = name,
+		.upcall_ops = upcall_ops,
+	};
 
-	sb = rpc_get_sb_net(net);
-	if (sb) {
-		if (clnt->cl_dentry)
-			err = gss_pipes_dentries_create(auth);
-		rpc_put_sb_net(net);
-	}
-	return err;
+	pdo = rpc_find_or_alloc_pipe_dir_object(net,
+			&clnt->cl_pipedir_objects,
+			gss_pipe_match_pdo,
+			gss_pipe_alloc_pdo,
+			&args);
+	if (pdo != NULL)
+		return container_of(pdo, struct gss_pipe, pdo);
+	return ERR_PTR(-ENOMEM);
+}
+
+static void __gss_pipe_free(struct gss_pipe *p)
+{
+	struct rpc_clnt *clnt = p->clnt;
+	struct net *net = rpc_net_ns(clnt);
+
+	rpc_remove_pipe_dir_object(net,
+			&clnt->cl_pipedir_objects,
+			&p->pdo);
+	rpc_destroy_pipe_data(p->pipe);
+	kfree(p);
+}
+
+static void __gss_pipe_release(struct kref *kref)
+{
+	struct gss_pipe *p = container_of(kref, struct gss_pipe, kref);
+
+	__gss_pipe_free(p);
+}
+
+static void gss_pipe_free(struct gss_pipe *p)
+{
+	if (p != NULL)
+		kref_put(&p->kref, __gss_pipe_release);
 }
 
 /*
  * NOTE: we have the opportunity to use different
  * parameters based on the input flavor (which must be a pseudoflavor)
  */
-static struct rpc_auth *
-gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
+static struct gss_auth *
+gss_create_new(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 {
+	rpc_authflavor_t flavor = args->pseudoflavor;
 	struct gss_auth *gss_auth;
+	struct gss_pipe *gss_pipe;
 	struct rpc_auth * auth;
 	int err = -ENOMEM; /* XXX? */
 
@@ -883,12 +963,20 @@
 		return ERR_PTR(err);
 	if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL)))
 		goto out_dec;
+	INIT_HLIST_NODE(&gss_auth->hash);
+	gss_auth->target_name = NULL;
+	if (args->target_name) {
+		gss_auth->target_name = kstrdup(args->target_name, GFP_KERNEL);
+		if (gss_auth->target_name == NULL)
+			goto err_free;
+	}
 	gss_auth->client = clnt;
+	gss_auth->net = get_net(rpc_net_ns(clnt));
 	err = -EINVAL;
 	gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
 	if (!gss_auth->mech) {
 		dprintk("RPC:       Pseudoflavor %d not found!\n", flavor);
-		goto err_free;
+		goto err_put_net;
 	}
 	gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
 	if (gss_auth->service == 0)
@@ -901,42 +989,41 @@
 	atomic_set(&auth->au_count, 1);
 	kref_init(&gss_auth->kref);
 
+	err = rpcauth_init_credcache(auth);
+	if (err)
+		goto err_put_mech;
 	/*
 	 * Note: if we created the old pipe first, then someone who
 	 * examined the directory at the right moment might conclude
 	 * that we supported only the old pipe.  So we instead create
 	 * the new pipe first.
 	 */
-	gss_auth->pipe[1] = rpc_mkpipe_data(&gss_upcall_ops_v1,
-					    RPC_PIPE_WAIT_FOR_OPEN);
-	if (IS_ERR(gss_auth->pipe[1])) {
-		err = PTR_ERR(gss_auth->pipe[1]);
-		goto err_put_mech;
+	gss_pipe = gss_pipe_get(clnt, "gssd", &gss_upcall_ops_v1);
+	if (IS_ERR(gss_pipe)) {
+		err = PTR_ERR(gss_pipe);
+		goto err_destroy_credcache;
 	}
+	gss_auth->gss_pipe[1] = gss_pipe;
 
-	gss_auth->pipe[0] = rpc_mkpipe_data(&gss_upcall_ops_v0,
-					    RPC_PIPE_WAIT_FOR_OPEN);
-	if (IS_ERR(gss_auth->pipe[0])) {
-		err = PTR_ERR(gss_auth->pipe[0]);
+	gss_pipe = gss_pipe_get(clnt, gss_auth->mech->gm_name,
+			&gss_upcall_ops_v0);
+	if (IS_ERR(gss_pipe)) {
+		err = PTR_ERR(gss_pipe);
 		goto err_destroy_pipe_1;
 	}
-	err = gss_pipes_dentries_create_net(clnt, auth);
-	if (err)
-		goto err_destroy_pipe_0;
-	err = rpcauth_init_credcache(auth);
-	if (err)
-		goto err_unlink_pipes;
+	gss_auth->gss_pipe[0] = gss_pipe;
 
-	return auth;
-err_unlink_pipes:
-	gss_pipes_dentries_destroy_net(clnt, auth);
-err_destroy_pipe_0:
-	rpc_destroy_pipe_data(gss_auth->pipe[0]);
+	return gss_auth;
 err_destroy_pipe_1:
-	rpc_destroy_pipe_data(gss_auth->pipe[1]);
+	gss_pipe_free(gss_auth->gss_pipe[1]);
+err_destroy_credcache:
+	rpcauth_destroy_credcache(auth);
 err_put_mech:
 	gss_mech_put(gss_auth->mech);
+err_put_net:
+	put_net(gss_auth->net);
 err_free:
+	kfree(gss_auth->target_name);
 	kfree(gss_auth);
 out_dec:
 	module_put(THIS_MODULE);
@@ -946,10 +1033,11 @@
 static void
 gss_free(struct gss_auth *gss_auth)
 {
-	gss_pipes_dentries_destroy_net(gss_auth->client, &gss_auth->rpc_auth);
-	rpc_destroy_pipe_data(gss_auth->pipe[0]);
-	rpc_destroy_pipe_data(gss_auth->pipe[1]);
+	gss_pipe_free(gss_auth->gss_pipe[0]);
+	gss_pipe_free(gss_auth->gss_pipe[1]);
 	gss_mech_put(gss_auth->mech);
+	put_net(gss_auth->net);
+	kfree(gss_auth->target_name);
 
 	kfree(gss_auth);
 	module_put(THIS_MODULE);
@@ -966,17 +1054,101 @@
 static void
 gss_destroy(struct rpc_auth *auth)
 {
-	struct gss_auth *gss_auth;
+	struct gss_auth *gss_auth = container_of(auth,
+			struct gss_auth, rpc_auth);
 
 	dprintk("RPC:       destroying GSS authenticator %p flavor %d\n",
 			auth, auth->au_flavor);
 
+	if (hash_hashed(&gss_auth->hash)) {
+		spin_lock(&gss_auth_hash_lock);
+		hash_del(&gss_auth->hash);
+		spin_unlock(&gss_auth_hash_lock);
+	}
+
+	gss_pipe_free(gss_auth->gss_pipe[0]);
+	gss_auth->gss_pipe[0] = NULL;
+	gss_pipe_free(gss_auth->gss_pipe[1]);
+	gss_auth->gss_pipe[1] = NULL;
 	rpcauth_destroy_credcache(auth);
 
-	gss_auth = container_of(auth, struct gss_auth, rpc_auth);
 	kref_put(&gss_auth->kref, gss_free_callback);
 }
 
+static struct gss_auth *
+gss_auth_find_or_add_hashed(struct rpc_auth_create_args *args,
+		struct rpc_clnt *clnt,
+		struct gss_auth *new)
+{
+	struct gss_auth *gss_auth;
+	unsigned long hashval = (unsigned long)clnt;
+
+	spin_lock(&gss_auth_hash_lock);
+	hash_for_each_possible(gss_auth_hash_table,
+			gss_auth,
+			hash,
+			hashval) {
+		if (gss_auth->rpc_auth.au_flavor != args->pseudoflavor)
+			continue;
+		if (gss_auth->target_name != args->target_name) {
+			if (gss_auth->target_name == NULL)
+				continue;
+			if (args->target_name == NULL)
+				continue;
+			if (strcmp(gss_auth->target_name, args->target_name))
+				continue;
+		}
+		if (!atomic_inc_not_zero(&gss_auth->rpc_auth.au_count))
+			continue;
+		goto out;
+	}
+	if (new)
+		hash_add(gss_auth_hash_table, &new->hash, hashval);
+	gss_auth = new;
+out:
+	spin_unlock(&gss_auth_hash_lock);
+	return gss_auth;
+}
+
+static struct gss_auth *
+gss_create_hashed(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
+{
+	struct gss_auth *gss_auth;
+	struct gss_auth *new;
+
+	gss_auth = gss_auth_find_or_add_hashed(args, clnt, NULL);
+	if (gss_auth != NULL)
+		goto out;
+	new = gss_create_new(args, clnt);
+	if (IS_ERR(new))
+		return new;
+	gss_auth = gss_auth_find_or_add_hashed(args, clnt, new);
+	if (gss_auth != new)
+		gss_destroy(&new->rpc_auth);
+out:
+	return gss_auth;
+}
+
+static struct rpc_auth *
+gss_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
+{
+	struct gss_auth *gss_auth;
+	struct rpc_xprt *xprt = rcu_access_pointer(clnt->cl_xprt);
+
+	while (clnt != clnt->cl_parent) {
+		struct rpc_clnt *parent = clnt->cl_parent;
+		/* Find the original parent for this transport */
+		if (rcu_access_pointer(parent->cl_xprt) != xprt)
+			break;
+		clnt = parent;
+	}
+
+	gss_auth = gss_create_hashed(args, clnt);
+	if (IS_ERR(gss_auth))
+		return ERR_CAST(gss_auth);
+	return &gss_auth->rpc_auth;
+}
+
 /*
  * gss_destroying_context will cause the RPCSEC_GSS to send a NULL RPC call
  * to the server with the GSS control procedure field set to
@@ -1126,10 +1298,32 @@
 	return err;
 }
 
+/*
+ * Returns -EACCES if GSS context is NULL or will expire within the
+ * timeout (miliseconds)
+ */
+static int
+gss_key_timeout(struct rpc_cred *rc)
+{
+	struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
+	unsigned long now = jiffies;
+	unsigned long expire;
+
+	if (gss_cred->gc_ctx == NULL)
+		return -EACCES;
+
+	expire = gss_cred->gc_ctx->gc_expiry - (gss_key_expire_timeo * HZ);
+
+	if (time_after(now, expire))
+		return -EACCES;
+	return 0;
+}
+
 static int
 gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags)
 {
 	struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
+	int ret;
 
 	if (test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags))
 		goto out;
@@ -1142,11 +1336,26 @@
 	if (acred->principal != NULL) {
 		if (gss_cred->gc_principal == NULL)
 			return 0;
-		return strcmp(acred->principal, gss_cred->gc_principal) == 0;
+		ret = strcmp(acred->principal, gss_cred->gc_principal) == 0;
+		goto check_expire;
 	}
 	if (gss_cred->gc_principal != NULL)
 		return 0;
-	return uid_eq(rc->cr_uid, acred->uid);
+	ret = uid_eq(rc->cr_uid, acred->uid);
+
+check_expire:
+	if (ret == 0)
+		return ret;
+
+	/* Notify acred users of GSS context expiration timeout */
+	if (test_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags) &&
+	    (gss_key_timeout(rc) != 0)) {
+		/* test will now be done from generic cred */
+		test_and_clear_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags);
+		/* tell NFS layer that key will expire soon */
+		set_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
+	}
+	return ret;
 }
 
 /*
@@ -1292,6 +1501,7 @@
 	struct xdr_netobj mic;
 	u32		flav,len;
 	u32		maj_stat;
+	__be32		*ret = ERR_PTR(-EIO);
 
 	dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
 
@@ -1307,6 +1517,7 @@
 	mic.data = (u8 *)p;
 	mic.len = len;
 
+	ret = ERR_PTR(-EACCES);
 	maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
 	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
 		clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
@@ -1324,8 +1535,9 @@
 	return p + XDR_QUADLEN(len);
 out_bad:
 	gss_put_ctx(ctx);
-	dprintk("RPC: %5u %s failed.\n", task->tk_pid, __func__);
-	return NULL;
+	dprintk("RPC: %5u %s failed ret %ld.\n", task->tk_pid, __func__,
+		PTR_ERR(ret));
+	return ret;
 }
 
 static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
@@ -1657,8 +1869,6 @@
 	.destroy	= gss_destroy,
 	.lookup_cred	= gss_lookup_cred,
 	.crcreate	= gss_create_cred,
-	.pipes_create	= gss_pipes_dentries_create,
-	.pipes_destroy	= gss_pipes_dentries_destroy,
 	.list_pseudoflavors = gss_mech_list_pseudoflavors,
 	.info2flavor	= gss_mech_info2flavor,
 	.flavor2info	= gss_mech_flavor2info,
@@ -1675,6 +1885,7 @@
 	.crvalidate	= gss_validate,
 	.crwrap_req	= gss_wrap_req,
 	.crunwrap_resp	= gss_unwrap_resp,
+	.crkey_timeout	= gss_key_timeout,
 };
 
 static const struct rpc_credops gss_nullops = {
@@ -1762,5 +1973,12 @@
 MODULE_PARM_DESC(expired_cred_retry_delay, "Timeout (in seconds) until "
 		"the RPC engine retries an expired credential");
 
+module_param_named(key_expire_timeo,
+		   gss_key_expire_timeo,
+		   uint, 0644);
+MODULE_PARM_DESC(key_expire_timeo, "Time (in seconds) at the end of a "
+		"credential keys lifetime where the NFS layer cleans up "
+		"prior to key expiration");
+
 module_init(init_rpcsec_gss)
 module_exit(exit_rpcsec_gss)
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index a5c36c0..f0ebe07 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -18,7 +18,7 @@
 static struct rpc_cred null_cred;
 
 static struct rpc_auth *
-nul_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
+nul_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 {
 	atomic_inc(&null_auth.au_count);
 	return &null_auth;
@@ -88,13 +88,13 @@
 	flavor = ntohl(*p++);
 	if (flavor != RPC_AUTH_NULL) {
 		printk("RPC: bad verf flavor: %u\n", flavor);
-		return NULL;
+		return ERR_PTR(-EIO);
 	}
 
 	size = ntohl(*p++);
 	if (size != 0) {
 		printk("RPC: bad verf size: %u\n", size);
-		return NULL;
+		return ERR_PTR(-EIO);
 	}
 
 	return p;
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index dc37021..d5d6923 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -33,7 +33,7 @@
 static const struct rpc_credops	unix_credops;
 
 static struct rpc_auth *
-unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
+unx_create(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 {
 	dprintk("RPC:       creating UNIX authenticator for client %p\n",
 			clnt);
@@ -192,13 +192,13 @@
 	    flavor != RPC_AUTH_UNIX &&
 	    flavor != RPC_AUTH_SHORT) {
 		printk("RPC: bad verf flavor: %u\n", flavor);
-		return NULL;
+		return ERR_PTR(-EIO);
 	}
 
 	size = ntohl(*p++);
 	if (size > RPC_MAX_AUTH_SIZE) {
 		printk("RPC: giant verf size: %u\n", size);
-		return NULL;
+		return ERR_PTR(-EIO);
 	}
 	task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2;
 	p += (size >> 2);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index ecbc4e3..7747960 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -102,12 +102,7 @@
 
 static void __rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
 {
-	if (clnt->cl_dentry) {
-		if (clnt->cl_auth && clnt->cl_auth->au_ops->pipes_destroy)
-			clnt->cl_auth->au_ops->pipes_destroy(clnt->cl_auth);
-		rpc_remove_client_dir(clnt->cl_dentry);
-	}
-	clnt->cl_dentry = NULL;
+	rpc_remove_client_dir(clnt);
 }
 
 static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
@@ -123,10 +118,10 @@
 }
 
 static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb,
-				    struct rpc_clnt *clnt,
-				    const char *dir_name)
+				    struct rpc_clnt *clnt)
 {
 	static uint32_t clntid;
+	const char *dir_name = clnt->cl_program->pipe_dir_name;
 	char name[15];
 	struct dentry *dir, *dentry;
 
@@ -153,28 +148,35 @@
 }
 
 static int
-rpc_setup_pipedir(struct rpc_clnt *clnt, const char *dir_name,
-		  struct super_block *pipefs_sb)
+rpc_setup_pipedir(struct super_block *pipefs_sb, struct rpc_clnt *clnt)
 {
 	struct dentry *dentry;
 
-	clnt->cl_dentry = NULL;
-	if (dir_name == NULL)
-		return 0;
-	dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt, dir_name);
-	if (IS_ERR(dentry))
-		return PTR_ERR(dentry);
-	clnt->cl_dentry = dentry;
+	if (clnt->cl_program->pipe_dir_name != NULL) {
+		dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt);
+		if (IS_ERR(dentry))
+			return PTR_ERR(dentry);
+	}
 	return 0;
 }
 
-static inline int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event)
+static int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event)
 {
-	if (((event == RPC_PIPEFS_MOUNT) && clnt->cl_dentry) ||
-	    ((event == RPC_PIPEFS_UMOUNT) && !clnt->cl_dentry))
+	if (clnt->cl_program->pipe_dir_name == NULL)
 		return 1;
-	if ((event == RPC_PIPEFS_MOUNT) && atomic_read(&clnt->cl_count) == 0)
-		return 1;
+
+	switch (event) {
+	case RPC_PIPEFS_MOUNT:
+		if (clnt->cl_pipedir_objects.pdh_dentry != NULL)
+			return 1;
+		if (atomic_read(&clnt->cl_count) == 0)
+			return 1;
+		break;
+	case RPC_PIPEFS_UMOUNT:
+		if (clnt->cl_pipedir_objects.pdh_dentry == NULL)
+			return 1;
+		break;
+	}
 	return 0;
 }
 
@@ -186,18 +188,11 @@
 
 	switch (event) {
 	case RPC_PIPEFS_MOUNT:
-		dentry = rpc_setup_pipedir_sb(sb, clnt,
-					      clnt->cl_program->pipe_dir_name);
+		dentry = rpc_setup_pipedir_sb(sb, clnt);
 		if (!dentry)
 			return -ENOENT;
 		if (IS_ERR(dentry))
 			return PTR_ERR(dentry);
-		clnt->cl_dentry = dentry;
-		if (clnt->cl_auth->au_ops->pipes_create) {
-			err = clnt->cl_auth->au_ops->pipes_create(clnt->cl_auth);
-			if (err)
-				__rpc_clnt_remove_pipedir(clnt);
-		}
 		break;
 	case RPC_PIPEFS_UMOUNT:
 		__rpc_clnt_remove_pipedir(clnt);
@@ -230,8 +225,6 @@
 
 	spin_lock(&sn->rpc_client_lock);
 	list_for_each_entry(clnt, &sn->all_clients, cl_clients) {
-		if (clnt->cl_program->pipe_dir_name == NULL)
-			continue;
 		if (rpc_clnt_skip_event(clnt, event))
 			continue;
 		spin_unlock(&sn->rpc_client_lock);
@@ -282,7 +275,10 @@
 static int rpc_client_register(const struct rpc_create_args *args,
 			       struct rpc_clnt *clnt)
 {
-	const struct rpc_program *program = args->program;
+	struct rpc_auth_create_args auth_args = {
+		.pseudoflavor = args->authflavor,
+		.target_name = args->client_name,
+	};
 	struct rpc_auth *auth;
 	struct net *net = rpc_net_ns(clnt);
 	struct super_block *pipefs_sb;
@@ -290,7 +286,7 @@
 
 	pipefs_sb = rpc_get_sb_net(net);
 	if (pipefs_sb) {
-		err = rpc_setup_pipedir(clnt, program->pipe_dir_name, pipefs_sb);
+		err = rpc_setup_pipedir(pipefs_sb, clnt);
 		if (err)
 			goto out;
 	}
@@ -299,7 +295,7 @@
 	if (pipefs_sb)
 		rpc_put_sb_net(net);
 
-	auth = rpcauth_create(args->authflavor, clnt);
+	auth = rpcauth_create(&auth_args, clnt);
 	if (IS_ERR(auth)) {
 		dprintk("RPC:       Couldn't create auth handle (flavor %u)\n",
 				args->authflavor);
@@ -317,7 +313,27 @@
 	return err;
 }
 
-static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt)
+static DEFINE_IDA(rpc_clids);
+
+static int rpc_alloc_clid(struct rpc_clnt *clnt)
+{
+	int clid;
+
+	clid = ida_simple_get(&rpc_clids, 0, 0, GFP_KERNEL);
+	if (clid < 0)
+		return clid;
+	clnt->cl_clid = clid;
+	return 0;
+}
+
+static void rpc_free_clid(struct rpc_clnt *clnt)
+{
+	ida_simple_remove(&rpc_clids, clnt->cl_clid);
+}
+
+static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
+		struct rpc_xprt *xprt,
+		struct rpc_clnt *parent)
 {
 	const struct rpc_program *program = args->program;
 	const struct rpc_version *version;
@@ -343,16 +359,20 @@
 	clnt = kzalloc(sizeof(*clnt), GFP_KERNEL);
 	if (!clnt)
 		goto out_err;
-	clnt->cl_parent = clnt;
+	clnt->cl_parent = parent ? : clnt;
+
+	err = rpc_alloc_clid(clnt);
+	if (err)
+		goto out_no_clid;
 
 	rcu_assign_pointer(clnt->cl_xprt, xprt);
 	clnt->cl_procinfo = version->procs;
 	clnt->cl_maxproc  = version->nrprocs;
-	clnt->cl_protname = program->name;
 	clnt->cl_prog     = args->prognumber ? : program->number;
 	clnt->cl_vers     = version->number;
 	clnt->cl_stats    = program->stats;
 	clnt->cl_metrics  = rpc_alloc_iostats(clnt);
+	rpc_init_pipe_dir_head(&clnt->cl_pipedir_objects);
 	err = -ENOMEM;
 	if (clnt->cl_metrics == NULL)
 		goto out_no_stats;
@@ -372,12 +392,6 @@
 
 	clnt->cl_rtt = &clnt->cl_rtt_default;
 	rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval);
-	clnt->cl_principal = NULL;
-	if (args->client_name) {
-		clnt->cl_principal = kstrdup(args->client_name, GFP_KERNEL);
-		if (!clnt->cl_principal)
-			goto out_no_principal;
-	}
 
 	atomic_set(&clnt->cl_count, 1);
 
@@ -387,13 +401,15 @@
 	err = rpc_client_register(args, clnt);
 	if (err)
 		goto out_no_path;
+	if (parent)
+		atomic_inc(&parent->cl_count);
 	return clnt;
 
 out_no_path:
-	kfree(clnt->cl_principal);
-out_no_principal:
 	rpc_free_iostats(clnt->cl_metrics);
 out_no_stats:
+	rpc_free_clid(clnt);
+out_no_clid:
 	kfree(clnt);
 out_err:
 	rpciod_down();
@@ -479,7 +495,7 @@
 	if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
 		xprt->resvport = 0;
 
-	clnt = rpc_new_client(args, xprt);
+	clnt = rpc_new_client(args, xprt, NULL);
 	if (IS_ERR(clnt))
 		return clnt;
 
@@ -526,15 +542,12 @@
 		goto out_err;
 	args->servername = xprt->servername;
 
-	new = rpc_new_client(args, xprt);
+	new = rpc_new_client(args, xprt, clnt);
 	if (IS_ERR(new)) {
 		err = PTR_ERR(new);
 		goto out_err;
 	}
 
-	atomic_inc(&clnt->cl_count);
-	new->cl_parent = clnt;
-
 	/* Turn off autobind on clones */
 	new->cl_autobind = 0;
 	new->cl_softrtry = clnt->cl_softrtry;
@@ -561,7 +574,6 @@
 		.prognumber	= clnt->cl_prog,
 		.version	= clnt->cl_vers,
 		.authflavor	= clnt->cl_auth->au_flavor,
-		.client_name	= clnt->cl_principal,
 	};
 	return __rpc_clone_client(&args, clnt);
 }
@@ -583,7 +595,6 @@
 		.prognumber	= clnt->cl_prog,
 		.version	= clnt->cl_vers,
 		.authflavor	= flavor,
-		.client_name	= clnt->cl_principal,
 	};
 	return __rpc_clone_client(&args, clnt);
 }
@@ -629,7 +640,7 @@
 	might_sleep();
 
 	dprintk_rcu("RPC:       shutting down %s client for %s\n",
-			clnt->cl_protname,
+			clnt->cl_program->name,
 			rcu_dereference(clnt->cl_xprt)->servername);
 
 	while (!list_empty(&clnt->cl_tasks)) {
@@ -649,17 +660,17 @@
 rpc_free_client(struct rpc_clnt *clnt)
 {
 	dprintk_rcu("RPC:       destroying %s client for %s\n",
-			clnt->cl_protname,
+			clnt->cl_program->name,
 			rcu_dereference(clnt->cl_xprt)->servername);
 	if (clnt->cl_parent != clnt)
 		rpc_release_client(clnt->cl_parent);
 	rpc_clnt_remove_pipedir(clnt);
 	rpc_unregister_client(clnt);
 	rpc_free_iostats(clnt->cl_metrics);
-	kfree(clnt->cl_principal);
 	clnt->cl_metrics = NULL;
 	xprt_put(rcu_dereference_raw(clnt->cl_xprt));
 	rpciod_down();
+	rpc_free_clid(clnt);
 	kfree(clnt);
 }
 
@@ -720,7 +731,6 @@
 		.prognumber	= program->number,
 		.version	= vers,
 		.authflavor	= old->cl_auth->au_flavor,
-		.client_name	= old->cl_principal,
 	};
 	struct rpc_clnt *clnt;
 	int err;
@@ -1299,7 +1309,7 @@
 	struct rpc_clnt	*clnt = task->tk_client;
 
 	dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid,
-			clnt->cl_protname, clnt->cl_vers,
+			clnt->cl_program->name, clnt->cl_vers,
 			rpc_proc_name(task),
 			(RPC_IS_ASYNC(task) ? "async" : "sync"));
 
@@ -1423,9 +1433,9 @@
 		return;
 	case -ETIMEDOUT:
 		rpc_delay(task, 3*HZ);
-	case -EKEYEXPIRED:
 	case -EAGAIN:
 		status = -EACCES;
+	case -EKEYEXPIRED:
 		if (!task->tk_cred_retry)
 			break;
 		task->tk_cred_retry--;
@@ -1912,7 +1922,7 @@
 	default:
 		if (clnt->cl_chatty)
 			printk("%s: RPC call returned error %d\n",
-			       clnt->cl_protname, -status);
+			       clnt->cl_program->name, -status);
 		rpc_exit(task, status);
 	}
 }
@@ -1943,7 +1953,7 @@
 		if (clnt->cl_chatty) {
 			rcu_read_lock();
 			printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
-				clnt->cl_protname,
+				clnt->cl_program->name,
 				rcu_dereference(clnt->cl_xprt)->servername);
 			rcu_read_unlock();
 		}
@@ -1959,7 +1969,7 @@
 		if (clnt->cl_chatty) {
 			rcu_read_lock();
 			printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
-			clnt->cl_protname,
+			clnt->cl_program->name,
 			rcu_dereference(clnt->cl_xprt)->servername);
 			rcu_read_unlock();
 		}
@@ -1994,7 +2004,7 @@
 		if (clnt->cl_chatty) {
 			rcu_read_lock();
 			printk(KERN_NOTICE "%s: server %s OK\n",
-				clnt->cl_protname,
+				clnt->cl_program->name,
 				rcu_dereference(clnt->cl_xprt)->servername);
 			rcu_read_unlock();
 		}
@@ -2019,7 +2029,7 @@
 			goto out_retry;
 		}
 		dprintk("RPC:       %s: too small RPC reply size (%d bytes)\n",
-				clnt->cl_protname, task->tk_status);
+				clnt->cl_program->name, task->tk_status);
 		task->tk_action = call_timeout;
 		goto out_retry;
 	}
@@ -2091,7 +2101,8 @@
 		dprintk("RPC: %5u %s: XDR representation not a multiple of"
 		       " 4 bytes: 0x%x\n", task->tk_pid, __func__,
 		       task->tk_rqstp->rq_rcv_buf.len);
-		goto out_eio;
+		error = -EIO;
+		goto out_err;
 	}
 	if ((len -= 3) < 0)
 		goto out_overflow;
@@ -2100,6 +2111,7 @@
 	if ((n = ntohl(*p++)) != RPC_REPLY) {
 		dprintk("RPC: %5u %s: not an RPC reply: %x\n",
 			task->tk_pid, __func__, n);
+		error = -EIO;
 		goto out_garbage;
 	}
 
@@ -2118,7 +2130,8 @@
 			dprintk("RPC: %5u %s: RPC call rejected, "
 				"unknown error: %x\n",
 				task->tk_pid, __func__, n);
-			goto out_eio;
+			error = -EIO;
+			goto out_err;
 		}
 		if (--len < 0)
 			goto out_overflow;
@@ -2163,9 +2176,11 @@
 				task->tk_pid, __func__, n);
 		goto out_err;
 	}
-	if (!(p = rpcauth_checkverf(task, p))) {
-		dprintk("RPC: %5u %s: auth check failed\n",
-				task->tk_pid, __func__);
+	p = rpcauth_checkverf(task, p);
+	if (IS_ERR(p)) {
+		error = PTR_ERR(p);
+		dprintk("RPC: %5u %s: auth check failed with %d\n",
+				task->tk_pid, __func__, error);
 		goto out_garbage;		/* bad verifier, retry */
 	}
 	len = p - (__be32 *)iov->iov_base - 1;
@@ -2218,8 +2233,6 @@
 out_retry:
 		return ERR_PTR(-EAGAIN);
 	}
-out_eio:
-	error = -EIO;
 out_err:
 	rpc_exit(task, error);
 	dprintk("RPC: %5u %s: call failed with error %d\n", task->tk_pid,
@@ -2291,7 +2304,7 @@
 	printk(KERN_INFO "%5u %04x %6d %8p %8p %8ld %8p %sv%u %s a:%ps q:%s\n",
 		task->tk_pid, task->tk_flags, task->tk_status,
 		clnt, task->tk_rqstp, task->tk_timeout, task->tk_ops,
-		clnt->cl_protname, clnt->cl_vers, rpc_proc_name(task),
+		clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task),
 		task->tk_action, rpc_waitq);
 }
 
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 406859c..f94567b 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -409,7 +409,7 @@
 	rcu_read_lock();
 	seq_printf(m, "RPC server: %s\n",
 			rcu_dereference(clnt->cl_xprt)->servername);
-	seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname,
+	seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_program->name,
 			clnt->cl_prog, clnt->cl_vers);
 	seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
 	seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO));
@@ -480,23 +480,6 @@
 	.d_delete = rpc_delete_dentry,
 };
 
-/*
- * Lookup the data. This is trivial - if the dentry didn't already
- * exist, we know it is negative.
- */
-static struct dentry *
-rpc_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
-{
-	if (dentry->d_name.len > NAME_MAX)
-		return ERR_PTR(-ENAMETOOLONG);
-	d_add(dentry, NULL);
-	return NULL;
-}
-
-static const struct inode_operations rpc_dir_inode_operations = {
-	.lookup		= rpc_lookup,
-};
-
 static struct inode *
 rpc_get_inode(struct super_block *sb, umode_t mode)
 {
@@ -509,7 +492,7 @@
 	switch (mode & S_IFMT) {
 	case S_IFDIR:
 		inode->i_fop = &simple_dir_operations;
-		inode->i_op = &rpc_dir_inode_operations;
+		inode->i_op = &simple_dir_inode_operations;
 		inc_nlink(inode);
 	default:
 		break;
@@ -901,6 +884,159 @@
 }
 EXPORT_SYMBOL_GPL(rpc_unlink);
 
+/**
+ * rpc_init_pipe_dir_head - initialise a struct rpc_pipe_dir_head
+ * @pdh: pointer to struct rpc_pipe_dir_head
+ */
+void rpc_init_pipe_dir_head(struct rpc_pipe_dir_head *pdh)
+{
+	INIT_LIST_HEAD(&pdh->pdh_entries);
+	pdh->pdh_dentry = NULL;
+}
+EXPORT_SYMBOL_GPL(rpc_init_pipe_dir_head);
+
+/**
+ * rpc_init_pipe_dir_object - initialise a struct rpc_pipe_dir_object
+ * @pdo: pointer to struct rpc_pipe_dir_object
+ * @pdo_ops: pointer to const struct rpc_pipe_dir_object_ops
+ * @pdo_data: pointer to caller-defined data
+ */
+void rpc_init_pipe_dir_object(struct rpc_pipe_dir_object *pdo,
+		const struct rpc_pipe_dir_object_ops *pdo_ops,
+		void *pdo_data)
+{
+	INIT_LIST_HEAD(&pdo->pdo_head);
+	pdo->pdo_ops = pdo_ops;
+	pdo->pdo_data = pdo_data;
+}
+EXPORT_SYMBOL_GPL(rpc_init_pipe_dir_object);
+
+static int
+rpc_add_pipe_dir_object_locked(struct net *net,
+		struct rpc_pipe_dir_head *pdh,
+		struct rpc_pipe_dir_object *pdo)
+{
+	int ret = 0;
+
+	if (pdh->pdh_dentry)
+		ret = pdo->pdo_ops->create(pdh->pdh_dentry, pdo);
+	if (ret == 0)
+		list_add_tail(&pdo->pdo_head, &pdh->pdh_entries);
+	return ret;
+}
+
+static void
+rpc_remove_pipe_dir_object_locked(struct net *net,
+		struct rpc_pipe_dir_head *pdh,
+		struct rpc_pipe_dir_object *pdo)
+{
+	if (pdh->pdh_dentry)
+		pdo->pdo_ops->destroy(pdh->pdh_dentry, pdo);
+	list_del_init(&pdo->pdo_head);
+}
+
+/**
+ * rpc_add_pipe_dir_object - associate a rpc_pipe_dir_object to a directory
+ * @net: pointer to struct net
+ * @pdh: pointer to struct rpc_pipe_dir_head
+ * @pdo: pointer to struct rpc_pipe_dir_object
+ *
+ */
+int
+rpc_add_pipe_dir_object(struct net *net,
+		struct rpc_pipe_dir_head *pdh,
+		struct rpc_pipe_dir_object *pdo)
+{
+	int ret = 0;
+
+	if (list_empty(&pdo->pdo_head)) {
+		struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+		mutex_lock(&sn->pipefs_sb_lock);
+		ret = rpc_add_pipe_dir_object_locked(net, pdh, pdo);
+		mutex_unlock(&sn->pipefs_sb_lock);
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(rpc_add_pipe_dir_object);
+
+/**
+ * rpc_remove_pipe_dir_object - remove a rpc_pipe_dir_object from a directory
+ * @net: pointer to struct net
+ * @pdh: pointer to struct rpc_pipe_dir_head
+ * @pdo: pointer to struct rpc_pipe_dir_object
+ *
+ */
+void
+rpc_remove_pipe_dir_object(struct net *net,
+		struct rpc_pipe_dir_head *pdh,
+		struct rpc_pipe_dir_object *pdo)
+{
+	if (!list_empty(&pdo->pdo_head)) {
+		struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+		mutex_lock(&sn->pipefs_sb_lock);
+		rpc_remove_pipe_dir_object_locked(net, pdh, pdo);
+		mutex_unlock(&sn->pipefs_sb_lock);
+	}
+}
+EXPORT_SYMBOL_GPL(rpc_remove_pipe_dir_object);
+
+/**
+ * rpc_find_or_alloc_pipe_dir_object
+ * @net: pointer to struct net
+ * @pdh: pointer to struct rpc_pipe_dir_head
+ * @match: match struct rpc_pipe_dir_object to data
+ * @alloc: allocate a new struct rpc_pipe_dir_object
+ * @data: user defined data for match() and alloc()
+ *
+ */
+struct rpc_pipe_dir_object *
+rpc_find_or_alloc_pipe_dir_object(struct net *net,
+		struct rpc_pipe_dir_head *pdh,
+		int (*match)(struct rpc_pipe_dir_object *, void *),
+		struct rpc_pipe_dir_object *(*alloc)(void *),
+		void *data)
+{
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+	struct rpc_pipe_dir_object *pdo;
+
+	mutex_lock(&sn->pipefs_sb_lock);
+	list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head) {
+		if (!match(pdo, data))
+			continue;
+		goto out;
+	}
+	pdo = alloc(data);
+	if (!pdo)
+		goto out;
+	rpc_add_pipe_dir_object_locked(net, pdh, pdo);
+out:
+	mutex_unlock(&sn->pipefs_sb_lock);
+	return pdo;
+}
+EXPORT_SYMBOL_GPL(rpc_find_or_alloc_pipe_dir_object);
+
+static void
+rpc_create_pipe_dir_objects(struct rpc_pipe_dir_head *pdh)
+{
+	struct rpc_pipe_dir_object *pdo;
+	struct dentry *dir = pdh->pdh_dentry;
+
+	list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head)
+		pdo->pdo_ops->create(dir, pdo);
+}
+
+static void
+rpc_destroy_pipe_dir_objects(struct rpc_pipe_dir_head *pdh)
+{
+	struct rpc_pipe_dir_object *pdo;
+	struct dentry *dir = pdh->pdh_dentry;
+
+	list_for_each_entry(pdo, &pdh->pdh_entries, pdo_head)
+		pdo->pdo_ops->destroy(dir, pdo);
+}
+
 enum {
 	RPCAUTH_info,
 	RPCAUTH_EOF
@@ -941,16 +1077,29 @@
 				   const char *name,
 				   struct rpc_clnt *rpc_client)
 {
-	return rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL,
+	struct dentry *ret;
+
+	ret = rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL,
 			rpc_clntdir_populate, rpc_client);
+	if (!IS_ERR(ret)) {
+		rpc_client->cl_pipedir_objects.pdh_dentry = ret;
+		rpc_create_pipe_dir_objects(&rpc_client->cl_pipedir_objects);
+	}
+	return ret;
 }
 
 /**
  * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir()
- * @dentry: dentry for the pipe
+ * @rpc_client: rpc_client for the pipe
  */
-int rpc_remove_client_dir(struct dentry *dentry)
+int rpc_remove_client_dir(struct rpc_clnt *rpc_client)
 {
+	struct dentry *dentry = rpc_client->cl_pipedir_objects.pdh_dentry;
+
+	if (dentry == NULL)
+		return 0;
+	rpc_destroy_pipe_dir_objects(&rpc_client->cl_pipedir_objects);
+	rpc_client->cl_pipedir_objects.pdh_dentry = NULL;
 	return rpc_rmdir_depopulate(dentry, rpc_clntdir_depopulate);
 }
 
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 93a7a4e..ff3cc4b 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -258,7 +258,7 @@
 	return 0;
 }
 
-#ifdef RPC_DEBUG
+#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS)
 static void rpc_task_set_debuginfo(struct rpc_task *task)
 {
 	static atomic_t rpc_pid;
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 21b75cb..5453049 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -188,7 +188,7 @@
 
 	seq_printf(seq, "\tRPC iostats version: %s  ", RPC_IOSTATS_VERS);
 	seq_printf(seq, "p/v: %u/%u (%s)\n",
-			clnt->cl_prog, clnt->cl_vers, clnt->cl_protname);
+			clnt->cl_prog, clnt->cl_vers, clnt->cl_program->name);
 
 	rcu_read_lock();
 	xprt = rcu_dereference(clnt->cl_xprt);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index d6656d7..ee03d35 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -47,6 +47,8 @@
 #include <net/udp.h>
 #include <net/tcp.h>
 
+#include <trace/events/sunrpc.h>
+
 #include "sunrpc.h"
 
 static void xs_close(struct rpc_xprt *xprt);
@@ -665,8 +667,10 @@
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 	struct socket *sock = transport->sock;
 
-	if (sock != NULL)
+	if (sock != NULL) {
 		kernel_sock_shutdown(sock, SHUT_WR);
+		trace_rpc_socket_shutdown(xprt, sock);
+	}
 }
 
 /**
@@ -811,6 +815,7 @@
 
 	sk->sk_no_check = 0;
 
+	trace_rpc_socket_close(&transport->xprt, sock);
 	sock_release(sock);
 }
 
@@ -1492,6 +1497,7 @@
 			sock_flag(sk, SOCK_ZAPPED),
 			sk->sk_shutdown);
 
+	trace_rpc_socket_state_change(xprt, sk->sk_socket);
 	switch (sk->sk_state) {
 	case TCP_ESTABLISHED:
 		spin_lock(&xprt->transport_lock);
@@ -1896,6 +1902,7 @@
 			xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
 
 	status = xs_local_finish_connecting(xprt, sock);
+	trace_rpc_socket_connect(xprt, sock, status);
 	switch (status) {
 	case 0:
 		dprintk("RPC:       xprt %p connected to %s\n",
@@ -2039,6 +2046,7 @@
 			xprt->address_strings[RPC_DISPLAY_PORT]);
 
 	xs_udp_finish_connecting(xprt, sock);
+	trace_rpc_socket_connect(xprt, sock, 0);
 	status = 0;
 out:
 	xprt_clear_connecting(xprt);
@@ -2064,6 +2072,8 @@
 	memset(&any, 0, sizeof(any));
 	any.sa_family = AF_UNSPEC;
 	result = kernel_connect(transport->sock, &any, sizeof(any), 0);
+	trace_rpc_socket_reset_connection(&transport->xprt,
+			transport->sock, result);
 	if (!result)
 		xs_sock_reset_connection_flags(&transport->xprt);
 	dprintk("RPC:       AF_UNSPEC connect return code %d\n", result);
@@ -2194,6 +2204,7 @@
 			xprt->address_strings[RPC_DISPLAY_PORT]);
 
 	status = xs_tcp_finish_connecting(xprt, sock);
+	trace_rpc_socket_connect(xprt, sock, status);
 	dprintk("RPC:       %p connect status %d connected %d sock state %d\n",
 			xprt, -status, xprt_connected(xprt),
 			sock->sk->sk_state);
diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c
index 68f67cf..32cf2ce 100644
--- a/tools/lguest/lguest.c
+++ b/tools/lguest/lguest.c
@@ -42,6 +42,10 @@
 #include <pwd.h>
 #include <grp.h>
 
+#ifndef VIRTIO_F_ANY_LAYOUT
+#define VIRTIO_F_ANY_LAYOUT		27
+#endif
+
 /*L:110
  * We can ignore the 43 include files we need for this program, but I do want
  * to draw attention to the use of kernel-style types.
@@ -1544,6 +1548,8 @@
 	add_feature(dev, VIRTIO_NET_F_HOST_ECN);
 	/* We handle indirect ring entries */
 	add_feature(dev, VIRTIO_RING_F_INDIRECT_DESC);
+	/* We're compliant with the damn spec. */
+	add_feature(dev, VIRTIO_F_ANY_LAYOUT);
 	set_config(dev, sizeof(conf), &conf);
 
 	/* We don't need the socket any more; setup is done. */
diff --git a/tools/virtio/.gitignore b/tools/virtio/.gitignore
new file mode 100644
index 0000000..1cfbb01
--- /dev/null
+++ b/tools/virtio/.gitignore
@@ -0,0 +1,3 @@
+*.d
+virtio_test
+vringh_test